Update LLVM for 3.5 rebase (r209712).

Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
author: Stephen Hines <srhines@google.com> 2014-05-29 02:49:00 -0700
committer: Stephen Hines <srhines@google.com> 2014-05-29 02:49:00 -0700
commit: dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch)
tree: dcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Target/ARM64
parent: 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff)
download: external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2
84 files changed, 0 insertions, 58692 deletions
diff --git a/lib/Target/ARM64/ARM64.h b/lib/Target/ARM64/ARM64.h
deleted file mode 100644
index f2c5e60..0000000
--- a/lib/Target/ARM64/ARM64.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//===-- ARM64.h - Top-level interface for ARM64 representation --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// ARM64 back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TARGET_ARM64_H
-#define TARGET_ARM64_H
-
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-class ARM64TargetMachine;
-class FunctionPass;
-class MachineFunctionPass;
-
-FunctionPass *createARM64DeadRegisterDefinitions();
-FunctionPass *createARM64ConditionalCompares();
-FunctionPass *createARM64AdvSIMDScalar();
-FunctionPass *createARM64BranchRelaxation();
-FunctionPass *createARM64ISelDag(ARM64TargetMachine &TM,
-                                 CodeGenOpt::Level OptLevel);
-FunctionPass *createARM64StorePairSuppressPass();
-FunctionPass *createARM64ExpandPseudoPass();
-FunctionPass *createARM64LoadStoreOptimizationPass();
-ModulePass *createARM64PromoteConstantPass();
-FunctionPass *createARM64AddressTypePromotionPass();
-/// \brief Creates an ARM-specific Target Transformation Info pass.
-ImmutablePass *createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM);
-
-FunctionPass *createARM64CleanupLocalDynamicTLSPass();
-
-FunctionPass *createARM64CollectLOHPass();
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/ARM64.td b/lib/Target/ARM64/ARM64.td
deleted file mode 100644
index 3eef8b2..0000000
--- a/lib/Target/ARM64/ARM64.td
+++ /dev/null
@@ -1,95 +0,0 @@
-//===- ARM64.td - Describe the ARM64 Target Machine --------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces which we are implementing
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-//===----------------------------------------------------------------------===//
-// ARM64 Subtarget features.
-//
-
-/// Cyclone has register move instructions which are "free".
-def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
-                                        "Has zereo-cycle register moves">;
-
-/// Cyclone has instructions which zero registers for "free".
-def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
-                                        "Has zero-cycle zeroing instructions">;
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "ARM64RegisterInfo.td"
-include "ARM64CallingConvention.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "ARM64Schedule.td"
-include "ARM64InstrInfo.td"
-
-def ARM64InstrInfo : InstrInfo;
-
-//===----------------------------------------------------------------------===//
-// ARM64 Processors supported.
-//
-include "ARM64SchedCyclone.td"
-
-def : ProcessorModel<"arm64-generic", NoSchedModel, []>;
-
-def : ProcessorModel<"cyclone", CycloneModel, [FeatureZCRegMove, FeatureZCZeroing]>;
-
-//===----------------------------------------------------------------------===//
-// Assembly parser
-//===----------------------------------------------------------------------===//
-
-def GenericAsmParserVariant : AsmParserVariant {
-  int Variant = 0;
-  string Name = "generic";
-}
-
-def AppleAsmParserVariant : AsmParserVariant {
-  int Variant = 1;
-  string Name = "apple-neon";
-}
-
-//===----------------------------------------------------------------------===//
-// Assembly printer
-//===----------------------------------------------------------------------===//
-// ARM64 Uses the MC printer for asm output, so make sure the TableGen
-// AsmWriter bits get associated with the correct class.
-def GenericAsmWriter : AsmWriter {
-  string AsmWriterClassName  = "InstPrinter";
-  int Variant = 0;
-  bit isMCAsmWriter = 1;
-}
-
-def AppleAsmWriter : AsmWriter {
-  let AsmWriterClassName = "AppleInstPrinter";
-  int Variant = 1;
-  int isMCAsmWriter = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Target Declaration
-//===----------------------------------------------------------------------===//
-
-def ARM64 : Target {
-  let InstructionSet = ARM64InstrInfo;
-  let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant];
-  let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
-}
diff --git a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp b/lib/Target/ARM64/ARM64AddressTypePromotion.cpp
deleted file mode 100644
index 72fa6af..0000000
--- a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp
+++ /dev/null
@@ -1,496 +0,0 @@
-
-//===-- ARM64AddressTypePromotion.cpp --- Promote type for addr accesses -===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass tries to promote the computations use to obtained a sign extended
-// value used into memory accesses.
-// E.g.
-// a = add nsw i32 b, 3
-// d = sext i32 a to i64
-// e = getelementptr ..., i64 d
-//
-// =>
-// f = sext i32 b to i64
-// a = add nsw i64 f, 3
-// e = getelementptr ..., i64 a
-//
-// This is legal to do so if the computations are markers with either nsw or nuw
-// markers.
-// Moreover, the current heuristic is simple: it does not create new sext
-// operations, i.e., it gives up when a sext would have forked (e.g., if
-// a = add i32 b, c, two sexts are required to promote the computation).
-//
-// FIXME: This pass may be useful for other targets too.
-// ===---------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-type-promotion"
-#include "ARM64.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-static cl::opt<bool>
-EnableAddressTypePromotion("arm64-type-promotion", cl::Hidden,
-                           cl::desc("Enable the type promotion pass"),
-                           cl::init(true));
-static cl::opt<bool>
-EnableMerge("arm64-type-promotion-merge", cl::Hidden,
-            cl::desc("Enable merging of redundant sexts when one is dominating"
-                     " the other."),
-            cl::init(true));
-
-//===----------------------------------------------------------------------===//
-//                       ARM64AddressTypePromotion
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-void initializeARM64AddressTypePromotionPass(PassRegistry &);
-}
-
-namespace {
-class ARM64AddressTypePromotion : public FunctionPass {
-
-public:
-  static char ID;
-  ARM64AddressTypePromotion()
-      : FunctionPass(ID), Func(NULL), ConsideredSExtType(NULL) {
-    initializeARM64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
-  }
-
-  virtual const char *getPassName() const {
-    return "ARM64 Address Type Promotion";
-  }
-
-  /// Iterate over the functions and promote the computation of interesting
-  // sext instructions.
-  bool runOnFunction(Function &F);
-
-private:
-  /// The current function.
-  Function *Func;
-  /// Filter out all sexts that does not have this type.
-  /// Currently initialized with Int64Ty.
-  Type *ConsideredSExtType;
-
-  // This transformation requires dominator info.
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesCFG();
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addPreserved<DominatorTreeWrapperPass>();
-    FunctionPass::getAnalysisUsage(AU);
-  }
-
-  typedef SmallPtrSet<Instruction *, 32> SetOfInstructions;
-  typedef SmallVector<Instruction *, 16> Instructions;
-  typedef DenseMap<Value *, Instructions> ValueToInsts;
-
-  /// Check if it is profitable to move a sext through this instruction.
-  /// Currently, we consider it is profitable if:
-  /// - Inst is used only once (no need to insert truncate).
-  /// - Inst has only one operand that will require a sext operation (we do
-  ///   do not create new sext operation).
-  bool shouldGetThrough(const Instruction *Inst);
-
-  /// Check if it is possible and legal to move a sext through this
-  /// instruction.
-  /// Current heuristic considers that we can get through:
-  /// - Arithmetic operation marked with the nsw or nuw flag.
-  /// - Other sext operation.
-  /// - Truncate operation if it was just dropping sign extended bits.
-  bool canGetThrough(const Instruction *Inst);
-
-  /// Move sext operations through safe to sext instructions.
-  bool propagateSignExtension(Instructions &SExtInsts);
-
-  /// Is this sext should be considered for code motion.
-  /// We look for sext with ConsideredSExtType and uses in at least one
-  // GetElementPtrInst.
-  bool shouldConsiderSExt(const Instruction *SExt) const;
-
-  /// Collect all interesting sext operations, i.e., the ones with the right
-  /// type and used in memory accesses.
-  /// More precisely, a sext instruction is considered as interesting if it
-  /// is used in a "complex" getelementptr or it exits at least another
-  /// sext instruction that sign extended the same initial value.
-  /// A getelementptr is considered as "complex" if it has more than 2
-  // operands.
-  void analyzeSExtension(Instructions &SExtInsts);
-
-  /// Merge redundant sign extension operations in common dominator.
-  void mergeSExts(ValueToInsts &ValToSExtendedUses,
-                  SetOfInstructions &ToRemove);
-};
-} // end anonymous namespace.
-
-char ARM64AddressTypePromotion::ID = 0;
-
-INITIALIZE_PASS_BEGIN(ARM64AddressTypePromotion, "arm64-type-promotion",
-                      "ARM64 Type Promotion Pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ARM64AddressTypePromotion, "arm64-type-promotion",
-                    "ARM64 Type Promotion Pass", false, false)
-
-FunctionPass *llvm::createARM64AddressTypePromotionPass() {
-  return new ARM64AddressTypePromotion();
-}
-
-bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
-  if (isa<SExtInst>(Inst))
-    return true;
-
-  const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
-  if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
-      (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap()))
-    return true;
-
-  // sext(trunc(sext)) --> sext
-  if (isa<TruncInst>(Inst) && isa<SExtInst>(Inst->getOperand(0))) {
-    const Instruction *Opnd = cast<Instruction>(Inst->getOperand(0));
-    // Check that the truncate just drop sign extended bits.
-    if (Inst->getType()->getIntegerBitWidth() >=
-            Opnd->getOperand(0)->getType()->getIntegerBitWidth() &&
-        Inst->getOperand(0)->getType()->getIntegerBitWidth() <=
-            ConsideredSExtType->getIntegerBitWidth())
-      return true;
-  }
-
-  return false;
-}
-
-bool ARM64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) {
-  // If the type of the sext is the same as the considered one, this sext
-  // will become useless.
-  // Otherwise, we will have to do something to preserve the original value,
-  // unless it is used once.
-  if (isa<SExtInst>(Inst) &&
-      (Inst->getType() == ConsideredSExtType || Inst->hasOneUse()))
-    return true;
-
-  // If the Inst is used more that once, we may need to insert truncate
-  // operations and we don't do that at the moment.
-  if (!Inst->hasOneUse())
-    return false;
-
-  // This truncate is used only once, thus if we can get thourgh, it will become
-  // useless.
-  if (isa<TruncInst>(Inst))
-    return true;
-
-  // If both operands are not constant, a new sext will be created here.
-  // Current heuristic is: each step should be profitable.
-  // Therefore we don't allow to increase the number of sext even if it may
-  // be profitable later on.
-  if (isa<BinaryOperator>(Inst) && isa<ConstantInt>(Inst->getOperand(1)))
-    return true;
-
-  return false;
-}
-
-static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
-  if (isa<SelectInst>(Inst) && OpIdx == 0)
-    return false;
-  return true;
-}
-
-bool
-ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
-  if (SExt->getType() != ConsideredSExtType)
-    return false;
-
-  for (Value::const_use_iterator UseIt = SExt->use_begin(),
-                                 EndUseIt = SExt->use_end();
-       UseIt != EndUseIt; ++UseIt) {
-    if (isa<GetElementPtrInst>(*UseIt))
-      return true;
-  }
-
-  return false;
-}
-
-// Input:
-// - SExtInsts contains all the sext instructions that are use direclty in
-//   GetElementPtrInst, i.e., access to memory.
-// Algorithm:
-// - For each sext operation in SExtInsts:
-//   Let var be the operand of sext.
-//   while it is profitable (see shouldGetThrough), legal, and safe
-//   (see canGetThrough) to move sext through var's definition:
-//   * promote the type of var's definition.
-//   * fold var into sext uses.
-//   * move sext above var's definition.
-//   * update sext operand to use the operand of var that should be sign
-//     extended (by construction there is only one).
-//
-//   E.g.,
-//   a = ... i32 c, 3
-//   b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a'
-//   ...
-//   = b
-// => Yes, update the code
-//   b = sext i32 c to i64
-//   a = ... i64 b, 3
-//   ...
-//   = a
-// Iterate on 'c'.
-bool
-ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
-  DEBUG(dbgs() << "*** Propagate Sign Extension ***\n");
-
-  bool LocalChange = false;
-  SetOfInstructions ToRemove;
-  ValueToInsts ValToSExtendedUses;
-  while (!SExtInsts.empty()) {
-    // Get through simple chain.
-    Instruction *SExt = SExtInsts.pop_back_val();
-
-    DEBUG(dbgs() << "Consider:\n" << *SExt << '\n');
-
-    // If this SExt has already been merged continue.
-    if (SExt->use_empty() && ToRemove.count(SExt)) {
-      DEBUG(dbgs() << "No uses => marked as delete\n");
-      continue;
-    }
-
-    // Now try to get through the chain of definitions.
-    while (isa<Instruction>(SExt->getOperand(0))) {
-      Instruction *Inst = dyn_cast<Instruction>(SExt->getOperand(0));
-      DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n');
-      if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) {
-        // We cannot get through something that is not an Instruction
-        // or not safe to SExt.
-        DEBUG(dbgs() << "Cannot get through\n");
-        break;
-      }
-
-      LocalChange = true;
-      // If this is a sign extend, it becomes useless.
-      if (isa<SExtInst>(Inst) || isa<TruncInst>(Inst)) {
-        DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n");
-        // We cannot use replaceAllUsesWith here because we may trigger some
-        // assertion on the type as all involved sext operation may have not
-        // been moved yet.
-        while (!Inst->use_empty()) {
-          Value::use_iterator UseIt = Inst->use_begin();
-          Instruction *UseInst = dyn_cast<Instruction>(*UseIt);
-          assert(UseInst && "Use of sext is not an Instruction!");
-          UseInst->setOperand(UseIt->getOperandNo(), SExt);
-        }
-        ToRemove.insert(Inst);
-        SExt->setOperand(0, Inst->getOperand(0));
-        SExt->moveBefore(Inst);
-        continue;
-      }
-
-      // Get through the Instruction:
-      // 1. Update its type.
-      // 2. Replace the uses of SExt by Inst.
-      // 3. Sign extend each operand that needs to be sign extended.
-
-      // Step #1.
-      Inst->mutateType(SExt->getType());
-      // Step #2.
-      SExt->replaceAllUsesWith(Inst);
-      // Step #3.
-      Instruction *SExtForOpnd = SExt;
-
-      DEBUG(dbgs() << "Propagate SExt to operands\n");
-      for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx;
-           ++OpIdx) {
-        DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n');
-        if (Inst->getOperand(OpIdx)->getType() == SExt->getType() ||
-            !shouldSExtOperand(Inst, OpIdx)) {
-          DEBUG(dbgs() << "No need to propagate\n");
-          continue;
-        }
-        // Check if we can statically sign extend the operand.
-        Value *Opnd = Inst->getOperand(OpIdx);
-        if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
-          DEBUG(dbgs() << "Statically sign extend\n");
-          Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(),
-                                                         Cst->getSExtValue()));
-          continue;
-        }
-        // UndefValue are typed, so we have to statically sign extend them.
-        if (isa<UndefValue>(Opnd)) {
-          DEBUG(dbgs() << "Statically sign extend\n");
-          Inst->setOperand(OpIdx, UndefValue::get(SExt->getType()));
-          continue;
-        }
-
-        // Otherwise we have to explicity sign extend it.
-        assert(SExtForOpnd &&
-               "Only one operand should have been sign extended");
-
-        SExtForOpnd->setOperand(0, Opnd);
-
-        DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n");
-        // Move the sign extension before the insertion point.
-        SExtForOpnd->moveBefore(Inst);
-        Inst->setOperand(OpIdx, SExtForOpnd);
-        // If more sext are required, new instructions will have to be created.
-        SExtForOpnd = NULL;
-      }
-      if (SExtForOpnd == SExt) {
-        DEBUG(dbgs() << "Sign extension is useless now\n");
-        ToRemove.insert(SExt);
-        break;
-      }
-    }
-
-    // If the use is already of the right type, connect its uses to its argument
-    // and delete it.
-    // This can happen for an Instruction which all uses are sign extended.
-    if (!ToRemove.count(SExt) &&
-        SExt->getType() == SExt->getOperand(0)->getType()) {
-      DEBUG(dbgs() << "Sign extension is useless, attach its use to "
-                      "its argument\n");
-      SExt->replaceAllUsesWith(SExt->getOperand(0));
-      ToRemove.insert(SExt);
-    } else
-      ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt);
-  }
-
-  if (EnableMerge)
-    mergeSExts(ValToSExtendedUses, ToRemove);
-
-  // Remove all instructions marked as ToRemove.
-  for (Instruction *I: ToRemove)
-    I->eraseFromParent();
-  return LocalChange;
-}
-
-void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
-                                           SetOfInstructions &ToRemove) {
-  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
-  for (auto &Entry: ValToSExtendedUses) {
-    Instructions &Insts = Entry.second;
-    Instructions CurPts;
-    for (Instruction *Inst : Insts) {
-      if (ToRemove.count(Inst))
-        continue;
-      bool inserted = false;
-      for (auto Pt : CurPts) {
-        if (DT.dominates(Inst, Pt)) {
-          DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n"
-                       << *Inst << '\n');
-          (Pt)->replaceAllUsesWith(Inst);
-          ToRemove.insert(Pt);
-          Pt = Inst;
-          inserted = true;
-          break;
-        }
-        if (!DT.dominates(Pt, Inst))
-          // Give up if we need to merge in a common dominator as the
-          // expermients show it is not profitable.
-          continue;
-
-        DEBUG(dbgs() << "Replace all uses of:\n" << *Inst << "\nwith:\n"
-                     << *Pt << '\n');
-        Inst->replaceAllUsesWith(Pt);
-        ToRemove.insert(Inst);
-        inserted = true;
-        break;
-      }
-      if (!inserted)
-        CurPts.push_back(Inst);
-    }
-  }
-}
-
-void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
-  DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n");
-
-  DenseMap<Value *, Instruction *> SeenChains;
-
-  for (auto &BB : *Func) {
-    for (auto &II: BB) {
-      Instruction *SExt = &II;
-
-      // Collect all sext operation per type.
-      if (!isa<SExtInst>(SExt) || !shouldConsiderSExt(SExt))
-        continue;
-
-      DEBUG(dbgs() << "Found:\n" << (*SExt) << '\n');
-
-      // Cases where we actually perform the optimization:
-      // 1. SExt is used in a getelementptr with more than 2 operand =>
-      //    likely we can merge some computation if they are done on 64 bits.
-      // 2. The beginning of the SExt chain is SExt several time. =>
-      //    code sharing is possible.
-
-      bool insert = false;
-      // #1.
-      for (Value::use_iterator UseIt = SExt->use_begin(),
-                               EndUseIt = SExt->use_end();
-           UseIt != EndUseIt; ++UseIt) {
-        const Instruction *Inst = dyn_cast<GetElementPtrInst>(*UseIt);
-        if (Inst && Inst->getNumOperands() > 2) {
-          DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst
-                       << '\n');
-          insert = true;
-          break;
-        }
-      }
-
-      // #2.
-      // Check the head of the chain.
-      Instruction *Inst = SExt;
-      Value *Last;
-      do {
-        int OpdIdx = 0;
-        const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
-        if (BinOp && isa<ConstantInt>(BinOp->getOperand(0)))
-          OpdIdx = 1;
-        Last = Inst->getOperand(OpdIdx);
-        Inst = dyn_cast<Instruction>(Last);
-      } while (Inst && canGetThrough(Inst) && shouldGetThrough(Inst));
-
-      DEBUG(dbgs() << "Head of the chain:\n" << *Last << '\n');
-      DenseMap<Value *, Instruction *>::iterator AlreadySeen =
-          SeenChains.find(Last);
-      if (insert || AlreadySeen != SeenChains.end()) {
-        DEBUG(dbgs() << "Insert\n");
-        SExtInsts.push_back(SExt);
-        if (AlreadySeen != SeenChains.end() && AlreadySeen->second != NULL) {
-          DEBUG(dbgs() << "Insert chain member\n");
-          SExtInsts.push_back(AlreadySeen->second);
-          SeenChains[Last] = NULL;
-        }
-      } else {
-        DEBUG(dbgs() << "Record its chain membership\n");
-        SeenChains[Last] = SExt;
-      }
-    }
-  }
-}
-
-bool ARM64AddressTypePromotion::runOnFunction(Function &F) {
-  if (!EnableAddressTypePromotion || F.isDeclaration())
-    return false;
-  Func = &F;
-  ConsideredSExtType = Type::getInt64Ty(Func->getContext());
-
-  DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n');
-
-  Instructions SExtInsts;
-  analyzeSExtension(SExtInsts);
-  return propagateSignExtension(SExtInsts);
-}
diff --git a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp b/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
deleted file mode 100644
index 83f8cda..0000000
--- a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
+++ /dev/null
@@ -1,392 +0,0 @@
-//===-- ARM64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// When profitable, replace GPR targeting i64 instructions with their
-// AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined
-// as minimizing the number of cross-class register copies.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// TODO: Graph based predicate heuristics.
-// Walking the instruction list linearly will get many, perhaps most, of
-// the cases, but to do a truly throrough job of this, we need a more
-// wholistic approach.
-//
-// This optimization is very similar in spirit to the register allocator's
-// spill placement, only here we're determining where to place cross-class
-// register copies rather than spills. As such, a similar approach is
-// called for.
-//
-// We want to build up a set of graphs of all instructions which are candidates
-// for transformation along with instructions which generate their inputs and
-// consume their outputs. For each edge in the graph, we assign a weight
-// based on whether there is a copy required there (weight zero if not) and
-// the block frequency of the block containing the defining or using
-// instruction, whichever is less. Our optimization is then a graph problem
-// to minimize the total weight of all the graphs, then transform instructions
-// and add or remove copy instructions as called for to implement the
-// solution.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-simd-scalar"
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64RegisterInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-static cl::opt<bool>
-AdvSIMDScalar("arm64-simd-scalar",
-              cl::desc("enable use of AdvSIMD scalar integer instructions"),
-              cl::init(false), cl::Hidden);
-// Allow forcing all i64 operations with equivalent SIMD instructions to use
-// them. For stress-testing the transformation function.
-static cl::opt<bool>
-TransformAll("arm64-simd-scalar-force-all",
-             cl::desc("Force use of AdvSIMD scalar instructions everywhere"),
-             cl::init(false), cl::Hidden);
-
-STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
-STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
-STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
-
-namespace {
-class ARM64AdvSIMDScalar : public MachineFunctionPass {
-  MachineRegisterInfo *MRI;
-  const ARM64InstrInfo *TII;
-
-private:
-  // isProfitableToTransform - Predicate function to determine whether an
-  // instruction should be transformed to its equivalent AdvSIMD scalar
-  // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
-  bool isProfitableToTransform(const MachineInstr *MI) const;
-
-  // tranformInstruction - Perform the transformation of an instruction
-  // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
-  // to be the correct register class, minimizing cross-class copies.
-  void transformInstruction(MachineInstr *MI);
-
-  // processMachineBasicBlock - Main optimzation loop.
-  bool processMachineBasicBlock(MachineBasicBlock *MBB);
-
-public:
-  static char ID; // Pass identification, replacement for typeid.
-  explicit ARM64AdvSIMDScalar() : MachineFunctionPass(ID) {}
-
-  virtual bool runOnMachineFunction(MachineFunction &F);
-
-  const char *getPassName() const {
-    return "AdvSIMD scalar operation optimization";
-  }
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-};
-char ARM64AdvSIMDScalar::ID = 0;
-} // end anonymous namespace
-
-static bool isGPR64(unsigned Reg, unsigned SubReg,
-                    const MachineRegisterInfo *MRI) {
-  if (SubReg)
-    return false;
-  if (TargetRegisterInfo::isVirtualRegister(Reg))
-    return MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::GPR64RegClass);
-  return ARM64::GPR64RegClass.contains(Reg);
-}
-
-static bool isFPR64(unsigned Reg, unsigned SubReg,
-                    const MachineRegisterInfo *MRI) {
-  if (TargetRegisterInfo::isVirtualRegister(Reg))
-    return (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR64RegClass) &&
-            SubReg == 0) ||
-           (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) &&
-            SubReg == ARM64::dsub);
-  // Physical register references just check the regist class directly.
-  return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
-         (ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub);
-}
-
-// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
-// copy instruction. Return zero_reg if the instruction is not a copy.
-static unsigned getSrcFromCopy(const MachineInstr *MI,
-                               const MachineRegisterInfo *MRI,
-                               unsigned &SubReg) {
-  SubReg = 0;
-  // The "FMOV Xd, Dn" instruction is the typical form.
-  if (MI->getOpcode() == ARM64::FMOVDXr || MI->getOpcode() == ARM64::FMOVXDr)
-    return MI->getOperand(1).getReg();
-  // A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see
-  // these at this stage, but it's easy to check for.
-  if (MI->getOpcode() == ARM64::UMOVvi64 && MI->getOperand(2).getImm() == 0) {
-    SubReg = ARM64::dsub;
-    return MI->getOperand(1).getReg();
-  }
-  // Or just a plain COPY instruction. This can be directly to/from FPR64,
-  // or it can be a dsub subreg reference to an FPR128.
-  if (MI->getOpcode() == ARM64::COPY) {
-    if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
-                MRI) &&
-        isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI))
-      return MI->getOperand(1).getReg();
-    if (isGPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
-                MRI) &&
-        isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(),
-                MRI)) {
-      SubReg = ARM64::dsub;
-      return MI->getOperand(1).getReg();
-    }
-  }
-
-  // Otherwise, this is some other kind of instruction.
-  return 0;
-}
-
-// getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent
-// that we're considering transforming to, return that AdvSIMD opcode. For all
-// others, return the original opcode.
-static int getTransformOpcode(unsigned Opc) {
-  switch (Opc) {
-  default:
-    break;
-  // FIXME: Lots more possibilities.
-  case ARM64::ADDXrr:
-    return ARM64::ADDv1i64;
-  case ARM64::SUBXrr:
-    return ARM64::SUBv1i64;
-  }
-  // No AdvSIMD equivalent, so just return the original opcode.
-  return Opc;
-}
-
-static bool isTransformable(const MachineInstr *MI) {
-  int Opc = MI->getOpcode();
-  return Opc != getTransformOpcode(Opc);
-}
-
-// isProfitableToTransform - Predicate function to determine whether an
-// instruction should be transformed to its equivalent AdvSIMD scalar
-// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
-bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
-  // If this instruction isn't eligible to be transformed (no SIMD equivalent),
-  // early exit since that's the common case.
-  if (!isTransformable(MI))
-    return false;
-
-  // Count the number of copies we'll need to add and approximate the number
-  // of copies that a transform will enable us to remove.
-  unsigned NumNewCopies = 3;
-  unsigned NumRemovableCopies = 0;
-
-  unsigned OrigSrc0 = MI->getOperand(1).getReg();
-  unsigned OrigSrc1 = MI->getOperand(2).getReg();
-  unsigned Src0 = 0, SubReg0;
-  unsigned Src1 = 0, SubReg1;
-  if (!MRI->def_empty(OrigSrc0)) {
-    MachineRegisterInfo::def_instr_iterator Def =
-        MRI->def_instr_begin(OrigSrc0);
-    assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
-    Src0 = getSrcFromCopy(&*Def, MRI, SubReg0);
-    // If the source was from a copy, we don't need to insert a new copy.
-    if (Src0)
-      --NumNewCopies;
-    // If there are no other users of the original source, we can delete
-    // that instruction.
-    if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0))
-      ++NumRemovableCopies;
-  }
-  if (!MRI->def_empty(OrigSrc1)) {
-    MachineRegisterInfo::def_instr_iterator Def =
-        MRI->def_instr_begin(OrigSrc1);
-    assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
-    Src1 = getSrcFromCopy(&*Def, MRI, SubReg1);
-    if (Src1)
-      --NumNewCopies;
-    // If there are no other users of the original source, we can delete
-    // that instruction.
-    if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1))
-      ++NumRemovableCopies;
-  }
-
-  // If any of the uses of the original instructions is a cross class copy,
-  // that's a copy that will be removable if we transform. Likewise, if
-  // any of the uses is a transformable instruction, it's likely the tranforms
-  // will chain, enabling us to save a copy there, too. This is an aggressive
-  // heuristic that approximates the graph based cost analysis described above.
-  unsigned Dst = MI->getOperand(0).getReg();
-  bool AllUsesAreCopies = true;
-  for (MachineRegisterInfo::use_instr_nodbg_iterator
-           Use = MRI->use_instr_nodbg_begin(Dst),
-           E = MRI->use_instr_nodbg_end();
-       Use != E; ++Use) {
-    unsigned SubReg;
-    if (getSrcFromCopy(&*Use, MRI, SubReg) || isTransformable(&*Use))
-      ++NumRemovableCopies;
-    // If the use is an INSERT_SUBREG, that's still something that can
-    // directly use the FPR64, so we don't invalidate AllUsesAreCopies. It's
-    // preferable to have it use the FPR64 in most cases, as if the source
-    // vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely.
-    // Ditto for a lane insert.
-    else if (Use->getOpcode() == ARM64::INSERT_SUBREG ||
-             Use->getOpcode() == ARM64::INSvi64gpr)
-      ;
-    else
-      AllUsesAreCopies = false;
-  }
-  // If all of the uses of the original destination register are copies to
-  // FPR64, then we won't end up having a new copy back to GPR64 either.
-  if (AllUsesAreCopies)
-    --NumNewCopies;
-
-  // If a tranform will not increase the number of cross-class copies required,
-  // return true.
-  if (NumNewCopies <= NumRemovableCopies)
-    return true;
-
-  // Finally, even if we otherwise wouldn't transform, check if we're forcing
-  // transformation of everything.
-  return TransformAll;
-}
-
-static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI,
-                                unsigned Dst, unsigned Src, bool IsKill) {
-  MachineInstrBuilder MIB =
-      BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(ARM64::COPY),
-              Dst)
-          .addReg(Src, getKillRegState(IsKill));
-  DEBUG(dbgs() << "    adding copy: " << *MIB);
-  ++NumCopiesInserted;
-  return MIB;
-}
-
-// tranformInstruction - Perform the transformation of an instruction
-// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
-// to be the correct register class, minimizing cross-class copies.
-void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
-  DEBUG(dbgs() << "Scalar transform: " << *MI);
-
-  MachineBasicBlock *MBB = MI->getParent();
-  int OldOpc = MI->getOpcode();
-  int NewOpc = getTransformOpcode(OldOpc);
-  assert(OldOpc != NewOpc && "transform an instruction to itself?!");
-
-  // Check if we need a copy for the source registers.
-  unsigned OrigSrc0 = MI->getOperand(1).getReg();
-  unsigned OrigSrc1 = MI->getOperand(2).getReg();
-  unsigned Src0 = 0, SubReg0;
-  unsigned Src1 = 0, SubReg1;
-  if (!MRI->def_empty(OrigSrc0)) {
-    MachineRegisterInfo::def_instr_iterator Def =
-        MRI->def_instr_begin(OrigSrc0);
-    assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
-    Src0 = getSrcFromCopy(&*Def, MRI, SubReg0);
-    // If there are no other users of the original source, we can delete
-    // that instruction.
-    if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) {
-      assert(Src0 && "Can't delete copy w/o a valid original source!");
-      Def->eraseFromParent();
-      ++NumCopiesDeleted;
-    }
-  }
-  if (!MRI->def_empty(OrigSrc1)) {
-    MachineRegisterInfo::def_instr_iterator Def =
-        MRI->def_instr_begin(OrigSrc1);
-    assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
-    Src1 = getSrcFromCopy(&*Def, MRI, SubReg1);
-    // If there are no other users of the original source, we can delete
-    // that instruction.
-    if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) {
-      assert(Src1 && "Can't delete copy w/o a valid original source!");
-      Def->eraseFromParent();
-      ++NumCopiesDeleted;
-    }
-  }
-  // If we weren't able to reference the original source directly, create a
-  // copy.
-  if (!Src0) {
-    SubReg0 = 0;
-    Src0 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
-    insertCopy(TII, MI, Src0, OrigSrc0, true);
-  }
-  if (!Src1) {
-    SubReg1 = 0;
-    Src1 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
-    insertCopy(TII, MI, Src1, OrigSrc1, true);
-  }
-
-  // Create a vreg for the destination.
-  // FIXME: No need to do this if the ultimate user expects an FPR64.
-  // Check for that and avoid the copy if possible.
-  unsigned Dst = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
-
-  // For now, all of the new instructions have the same simple three-register
-  // form, so no need to special case based on what instruction we're
-  // building.
-  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(NewOpc), Dst)
-      .addReg(Src0, getKillRegState(true), SubReg0)
-      .addReg(Src1, getKillRegState(true), SubReg1);
-
-  // Now copy the result back out to a GPR.
-  // FIXME: Try to avoid this if all uses could actually just use the FPR64
-  // directly.
-  insertCopy(TII, MI, MI->getOperand(0).getReg(), Dst, true);
-
-  // Erase the old instruction.
-  MI->eraseFromParent();
-
-  ++NumScalarInsnsUsed;
-}
-
-// processMachineBasicBlock - Main optimzation loop.
-bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
-  bool Changed = false;
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
-    MachineInstr *MI = I;
-    ++I;
-    if (isProfitableToTransform(MI)) {
-      transformInstruction(MI);
-      Changed = true;
-    }
-  }
-  return Changed;
-}
-
-// runOnMachineFunction - Pass entry point from PassManager.
-bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
-  // Early exit if pass disabled.
-  if (!AdvSIMDScalar)
-    return false;
-
-  bool Changed = false;
-  DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n");
-
-  const TargetMachine &TM = mf.getTarget();
-  MRI = &mf.getRegInfo();
-  TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
-
-  // Just check things on a one-block-at-a-time basis.
-  for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
-    if (processMachineBasicBlock(I))
-      Changed = true;
-  return Changed;
-}
-
-// createARM64AdvSIMDScalar - Factory function used by ARM64TargetMachine
-// to add the pass to the PassManager.
-FunctionPass *llvm::createARM64AdvSIMDScalar() {
-  return new ARM64AdvSIMDScalar();
-}
diff --git a/lib/Target/ARM64/ARM64AsmPrinter.cpp b/lib/Target/ARM64/ARM64AsmPrinter.cpp
deleted file mode 100644
index d0aa6af..0000000
--- a/lib/Target/ARM64/ARM64AsmPrinter.cpp
+++ /dev/null
@@ -1,563 +0,0 @@
-//===-- ARM64AsmPrinter.cpp - ARM64 LLVM assembly writer ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to the ARM64 assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "ARM64.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64MCInstLower.h"
-#include "ARM64RegisterInfo.h"
-#include "InstPrinter/ARM64InstPrinter.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstBuilder.h"
-#include "llvm/MC/MCLinkerOptimizationHint.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-namespace {
-
-class ARM64AsmPrinter : public AsmPrinter {
-  ARM64MCInstLower MCInstLowering;
-  StackMaps SM;
-
-public:
-  ARM64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer), MCInstLowering(OutContext, *Mang, *this),
-        SM(*this), ARM64FI(NULL), LOHLabelCounter(0) {}
-
-  virtual const char *getPassName() const { return "ARM64 Assembly Printer"; }
-
-  /// \brief Wrapper for MCInstLowering.lowerOperand() for the
-  /// tblgen'erated pseudo lowering.
-  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
-    return MCInstLowering.lowerOperand(MO, MCOp);
-  }
-
-  void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
-                     const MachineInstr &MI);
-  void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
-                       const MachineInstr &MI);
-  /// \brief tblgen'erated driver function for lowering simple MI->MC
-  /// pseudo instructions.
-  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
-                                   const MachineInstr *MI);
-
-  void EmitInstruction(const MachineInstr *MI);
-
-  void getAnalysisUsage(AnalysisUsage &AU) const {
-    AsmPrinter::getAnalysisUsage(AU);
-    AU.setPreservesAll();
-  }
-
-  bool runOnMachineFunction(MachineFunction &F) {
-    ARM64FI = F.getInfo<ARM64FunctionInfo>();
-    return AsmPrinter::runOnMachineFunction(F);
-  }
-
-private:
-  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
-  void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O);
-  bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
-  bool printAsmRegInClass(const MachineOperand &MO,
-                          const TargetRegisterClass *RC, bool isVector,
-                          raw_ostream &O);
-
-  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &O);
-  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &O);
-
-  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
-
-  void EmitFunctionBodyEnd();
-
-  MCSymbol *GetCPISymbol(unsigned CPID) const;
-  void EmitEndOfAsmFile(Module &M);
-  ARM64FunctionInfo *ARM64FI;
-
-  /// \brief Emit the LOHs contained in ARM64FI.
-  void EmitLOHs();
-
-  typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
-  MInstToMCSymbol LOHInstToLabel;
-  unsigned LOHLabelCounter;
-};
-
-} // end of anonymous namespace
-
-//===----------------------------------------------------------------------===//
-
-void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) {
-  // Funny Darwin hack: This flag tells the linker that no global symbols
-  // contain code that falls through to other global symbols (e.g. the obvious
-  // implementation of multiple entry points).  If this doesn't occur, the
-  // linker can safely perform dead code stripping.  Since LLVM never
-  // generates code that does this, it is always safe to set.
-  OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
-  SM.serializeToStackMapSection();
-}
-
-MachineLocation
-ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
-  MachineLocation Location;
-  assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
-  // Frame address.  Currently handles register +- offset only.
-  if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
-    Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
-  else {
-    DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
-  }
-  return Location;
-}
-
-void ARM64AsmPrinter::EmitLOHs() {
-  SmallVector<MCSymbol *, 3> MCArgs;
-
-  for (const auto &D : ARM64FI->getLOHContainer()) {
-    for (const MachineInstr *MI : D.getArgs()) {
-      MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(MI);
-      assert(LabelIt != LOHInstToLabel.end() &&
-             "Label hasn't been inserted for LOH related instruction");
-      MCArgs.push_back(LabelIt->second);
-    }
-    OutStreamer.EmitLOHDirective(D.getKind(), MCArgs);
-    MCArgs.clear();
-  }
-}
-
-void ARM64AsmPrinter::EmitFunctionBodyEnd() {
-  if (!ARM64FI->getLOHRelated().empty())
-    EmitLOHs();
-}
-
-/// GetCPISymbol - Return the symbol for the specified constant pool entry.
-MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const {
-  // Darwin uses a linker-private symbol name for constant-pools (to
-  // avoid addends on the relocation?), ELF has no such concept and
-  // uses a normal private symbol.
-  if (getDataLayout().getLinkerPrivateGlobalPrefix()[0])
-    return OutContext.GetOrCreateSymbol(
-        Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" +
-        Twine(getFunctionNumber()) + "_" + Twine(CPID));
-
-  return OutContext.GetOrCreateSymbol(
-      Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" +
-      Twine(getFunctionNumber()) + "_" + Twine(CPID));
-}
-
-void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
-                                   raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  switch (MO.getType()) {
-  default:
-    assert(0 && "<unknown operand type>");
-  case MachineOperand::MO_Register: {
-    unsigned Reg = MO.getReg();
-    assert(TargetRegisterInfo::isPhysicalRegister(Reg));
-    assert(!MO.getSubReg() && "Subregs should be eliminated!");
-    O << ARM64InstPrinter::getRegisterName(Reg);
-    break;
-  }
-  case MachineOperand::MO_Immediate: {
-    int64_t Imm = MO.getImm();
-    O << '#' << Imm;
-    break;
-  }
-  }
-}
-
-bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
-                                        raw_ostream &O) {
-  unsigned Reg = MO.getReg();
-  switch (Mode) {
-  default:
-    return true; // Unknown mode.
-  case 'w':
-    Reg = getWRegFromXReg(Reg);
-    break;
-  case 'x':
-    Reg = getXRegFromWReg(Reg);
-    break;
-  }
-
-  O << ARM64InstPrinter::getRegisterName(Reg);
-  return false;
-}
-
-// Prints the register in MO using class RC using the offset in the
-// new register class. This should not be used for cross class
-// printing.
-bool ARM64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
-                                         const TargetRegisterClass *RC,
-                                         bool isVector, raw_ostream &O) {
-  assert(MO.isReg() && "Should only get here with a register!");
-  const ARM64RegisterInfo *RI =
-      static_cast<const ARM64RegisterInfo *>(TM.getRegisterInfo());
-  unsigned Reg = MO.getReg();
-  unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
-  assert(RI->regsOverlap(RegToPrint, Reg));
-  O << ARM64InstPrinter::getRegisterName(
-           RegToPrint, isVector ? ARM64::vreg : ARM64::NoRegAltName);
-  return false;
-}
-
-bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                                      unsigned AsmVariant,
-                                      const char *ExtraCode, raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  // Does this asm operand have a single letter operand modifier?
-  if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0)
-      return true; // Unknown modifier.
-
-    switch (ExtraCode[0]) {
-    default:
-      return true; // Unknown modifier.
-    case 'w':      // Print W register
-    case 'x':      // Print X register
-      if (MO.isReg())
-        return printAsmMRegister(MO, ExtraCode[0], O);
-      if (MO.isImm() && MO.getImm() == 0) {
-        unsigned Reg = ExtraCode[0] == 'w' ? ARM64::WZR : ARM64::XZR;
-        O << ARM64InstPrinter::getRegisterName(Reg);
-        return false;
-      }
-      printOperand(MI, OpNum, O);
-      return false;
-    case 'b': // Print B register.
-    case 'h': // Print H register.
-    case 's': // Print S register.
-    case 'd': // Print D register.
-    case 'q': // Print Q register.
-      if (MO.isReg()) {
-        const TargetRegisterClass *RC;
-        switch (ExtraCode[0]) {
-        case 'b':
-          RC = &ARM64::FPR8RegClass;
-          break;
-        case 'h':
-          RC = &ARM64::FPR16RegClass;
-          break;
-        case 's':
-          RC = &ARM64::FPR32RegClass;
-          break;
-        case 'd':
-          RC = &ARM64::FPR64RegClass;
-          break;
-        case 'q':
-          RC = &ARM64::FPR128RegClass;
-          break;
-        default:
-          return true;
-        }
-        return printAsmRegInClass(MO, RC, false /* vector */, O);
-      }
-      printOperand(MI, OpNum, O);
-      return false;
-    }
-  }
-
-  // According to ARM, we should emit x and v registers unless we have a
-  // modifier.
-  if (MO.isReg()) {
-    unsigned Reg = MO.getReg();
-
-    // If this is a w or x register, print an x register.
-    if (ARM64::GPR32allRegClass.contains(Reg) ||
-        ARM64::GPR64allRegClass.contains(Reg))
-      return printAsmMRegister(MO, 'x', O);
-
-    // If this is a b, h, s, d, or q register, print it as a v register.
-    return printAsmRegInClass(MO, &ARM64::FPR128RegClass, true /* vector */, O);
-  }
-
-  printOperand(MI, OpNum, O);
-  return false;
-}
-
-bool ARM64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNum, unsigned AsmVariant,
-                                            const char *ExtraCode,
-                                            raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
-
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isReg() && "unexpected inline asm memory operand");
-  O << "[" << ARM64InstPrinter::getRegisterName(MO.getReg()) << "]";
-  return false;
-}
-
-void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
-                                             raw_ostream &OS) {
-  unsigned NOps = MI->getNumOperands();
-  assert(NOps == 4);
-  OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
-  // cast away const; DIetc do not take const operands for some reason.
-  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata()));
-  OS << V.getName();
-  OS << " <- ";
-  // Frame address.  Currently handles register +- offset only.
-  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
-  OS << '[';
-  printOperand(MI, 0, OS);
-  OS << '+';
-  printOperand(MI, 1, OS);
-  OS << ']';
-  OS << "+";
-  printOperand(MI, NOps - 2, OS);
-}
-
-void ARM64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
-                                    const MachineInstr &MI) {
-  unsigned NumNOPBytes = MI.getOperand(1).getImm();
-
-  SM.recordStackMap(MI);
-  // Emit padding.
-  assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
-  for (unsigned i = 0; i < NumNOPBytes; i += 4)
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
-}
-
-// Lower a patchpoint of the form:
-// [<def>], <id>, <numBytes>, <target>, <numArgs>
-void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
-                                      const MachineInstr &MI) {
-  SM.recordPatchPoint(MI);
-
-  PatchPointOpers Opers(&MI);
-
-  int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
-  unsigned EncodedBytes = 0;
-  if (CallTarget) {
-    assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
-           "High 16 bits of call target should be zero.");
-    unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
-    EncodedBytes = 16;
-    // Materialize the jump address:
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVZWi)
-                                    .addReg(ScratchReg)
-                                    .addImm((CallTarget >> 32) & 0xFFFF)
-                                    .addImm(32));
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
-                                    .addReg(ScratchReg)
-                                    .addReg(ScratchReg)
-                                    .addImm((CallTarget >> 16) & 0xFFFF)
-                                    .addImm(16));
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
-                                    .addReg(ScratchReg)
-                                    .addReg(ScratchReg)
-                                    .addImm(CallTarget & 0xFFFF)
-                                    .addImm(0));
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::BLR).addReg(ScratchReg));
-  }
-  // Emit padding.
-  unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
-  assert(NumBytes >= EncodedBytes &&
-         "Patchpoint can't request size less than the length of a call.");
-  assert((NumBytes - EncodedBytes) % 4 == 0 &&
-         "Invalid number of NOP bytes requested!");
-  for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
-    EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
-}
-
-// Simple pseudo-instructions have their lowering (with expansion to real
-// instructions) auto-generated.
-#include "ARM64GenMCPseudoLowering.inc"
-
-static unsigned getRealIndexedOpcode(unsigned Opc) {
-  switch (Opc) {
-  case ARM64::LDRXpre_isel:    return ARM64::LDRXpre;
-  case ARM64::LDRWpre_isel:    return ARM64::LDRWpre;
-  case ARM64::LDRDpre_isel:    return ARM64::LDRDpre;
-  case ARM64::LDRSpre_isel:    return ARM64::LDRSpre;
-  case ARM64::LDRBBpre_isel:   return ARM64::LDRBBpre;
-  case ARM64::LDRHHpre_isel:   return ARM64::LDRHHpre;
-  case ARM64::LDRSBWpre_isel:  return ARM64::LDRSBWpre;
-  case ARM64::LDRSBXpre_isel:  return ARM64::LDRSBXpre;
-  case ARM64::LDRSHWpre_isel:  return ARM64::LDRSHWpre;
-  case ARM64::LDRSHXpre_isel:  return ARM64::LDRSHXpre;
-  case ARM64::LDRSWpre_isel:   return ARM64::LDRSWpre;
-
-  case ARM64::LDRDpost_isel:   return ARM64::LDRDpost;
-  case ARM64::LDRSpost_isel:   return ARM64::LDRSpost;
-  case ARM64::LDRXpost_isel:   return ARM64::LDRXpost;
-  case ARM64::LDRWpost_isel:   return ARM64::LDRWpost;
-  case ARM64::LDRHHpost_isel:  return ARM64::LDRHHpost;
-  case ARM64::LDRBBpost_isel:  return ARM64::LDRBBpost;
-  case ARM64::LDRSWpost_isel:  return ARM64::LDRSWpost;
-  case ARM64::LDRSHWpost_isel: return ARM64::LDRSHWpost;
-  case ARM64::LDRSHXpost_isel: return ARM64::LDRSHXpost;
-  case ARM64::LDRSBWpost_isel: return ARM64::LDRSBWpost;
-  case ARM64::LDRSBXpost_isel: return ARM64::LDRSBXpost;
-
-  case ARM64::STRXpre_isel:    return ARM64::STRXpre;
-  case ARM64::STRWpre_isel:    return ARM64::STRWpre;
-  case ARM64::STRHHpre_isel:   return ARM64::STRHHpre;
-  case ARM64::STRBBpre_isel:   return ARM64::STRBBpre;
-  case ARM64::STRDpre_isel:    return ARM64::STRDpre;
-  case ARM64::STRSpre_isel:    return ARM64::STRSpre;
-  }
-  llvm_unreachable("Unexpected pre-indexed opcode!");
-}
-
-void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  // Do any auto-generated pseudo lowerings.
-  if (emitPseudoExpansionLowering(OutStreamer, MI))
-    return;
-
-  if (ARM64FI->getLOHRelated().count(MI)) {
-    // Generate a label for LOH related instruction
-    MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
-    // Associate the instruction with the label
-    LOHInstToLabel[MI] = LOHLabel;
-    OutStreamer.EmitLabel(LOHLabel);
-  }
-
-  // Do any manual lowerings.
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case ARM64::DBG_VALUE: {
-    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
-      SmallString<128> TmpStr;
-      raw_svector_ostream OS(TmpStr);
-      PrintDebugValueComment(MI, OS);
-      OutStreamer.EmitRawText(StringRef(OS.str()));
-    }
-    return;
-  }
-  // Indexed loads and stores use a pseudo to handle complex operand
-  // tricks and writeback to the base register. We strip off the writeback
-  // operand and switch the opcode here. Post-indexed stores were handled by the
-  // tablegen'erated pseudos above. (The complex operand <--> simple
-  // operand isel is beyond tablegen's ability, so we do these manually).
-  case ARM64::LDRHHpre_isel:
-  case ARM64::LDRBBpre_isel:
-  case ARM64::LDRXpre_isel:
-  case ARM64::LDRWpre_isel:
-  case ARM64::LDRDpre_isel:
-  case ARM64::LDRSpre_isel:
-  case ARM64::LDRSBWpre_isel:
-  case ARM64::LDRSBXpre_isel:
-  case ARM64::LDRSHWpre_isel:
-  case ARM64::LDRSHXpre_isel:
-  case ARM64::LDRSWpre_isel:
-  case ARM64::LDRDpost_isel:
-  case ARM64::LDRSpost_isel:
-  case ARM64::LDRXpost_isel:
-  case ARM64::LDRWpost_isel:
-  case ARM64::LDRHHpost_isel:
-  case ARM64::LDRBBpost_isel:
-  case ARM64::LDRSWpost_isel:
-  case ARM64::LDRSHWpost_isel:
-  case ARM64::LDRSHXpost_isel:
-  case ARM64::LDRSBWpost_isel:
-  case ARM64::LDRSBXpost_isel: {
-    MCInst TmpInst;
-    // For loads, the writeback operand to be skipped is the second.
-    TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode()));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg()));
-    TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
-    EmitToStreamer(OutStreamer, TmpInst);
-    return;
-  }
-  case ARM64::STRXpre_isel:
-  case ARM64::STRWpre_isel:
-  case ARM64::STRHHpre_isel:
-  case ARM64::STRBBpre_isel:
-  case ARM64::STRDpre_isel:
-  case ARM64::STRSpre_isel: {
-    MCInst TmpInst;
-    // For loads, the writeback operand to be skipped is the first.
-    TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode()));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg()));
-    TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
-    EmitToStreamer(OutStreamer, TmpInst);
-    return;
-  }
-
-  // Tail calls use pseudo instructions so they have the proper code-gen
-  // attributes (isCall, isReturn, etc.). We lower them to the real
-  // instruction here.
-  case ARM64::TCRETURNri: {
-    MCInst TmpInst;
-    TmpInst.setOpcode(ARM64::BR);
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    EmitToStreamer(OutStreamer, TmpInst);
-    return;
-  }
-  case ARM64::TCRETURNdi: {
-    MCOperand Dest;
-    MCInstLowering.lowerOperand(MI->getOperand(0), Dest);
-    MCInst TmpInst;
-    TmpInst.setOpcode(ARM64::B);
-    TmpInst.addOperand(Dest);
-    EmitToStreamer(OutStreamer, TmpInst);
-    return;
-  }
-  case ARM64::TLSDESC_BLR: {
-    MCOperand Callee, Sym;
-    MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
-    MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
-
-    // First emit a relocation-annotation. This expands to no code, but requests
-    // the following instruction gets an R_AARCH64_TLSDESC_CALL.
-    MCInst TLSDescCall;
-    TLSDescCall.setOpcode(ARM64::TLSDESCCALL);
-    TLSDescCall.addOperand(Sym);
-    EmitToStreamer(OutStreamer, TLSDescCall);
-
-    // Other than that it's just a normal indirect call to the function loaded
-    // from the descriptor.
-    MCInst BLR;
-    BLR.setOpcode(ARM64::BLR);
-    BLR.addOperand(Callee);
-    EmitToStreamer(OutStreamer, BLR);
-
-    return;
-  }
-
-  case TargetOpcode::STACKMAP:
-    return LowerSTACKMAP(OutStreamer, SM, *MI);
-
-  case TargetOpcode::PATCHPOINT:
-    return LowerPATCHPOINT(OutStreamer, SM, *MI);
-  }
-
-  // Finally, do the automated lowerings for everything else.
-  MCInst TmpInst;
-  MCInstLowering.Lower(MI, TmpInst);
-  EmitToStreamer(OutStreamer, TmpInst);
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeARM64AsmPrinter() {
-  RegisterAsmPrinter<ARM64AsmPrinter> X(TheARM64Target);
-}
diff --git a/lib/Target/ARM64/ARM64BranchRelaxation.cpp b/lib/Target/ARM64/ARM64BranchRelaxation.cpp
deleted file mode 100644
index a9bbef5..0000000
--- a/lib/Target/ARM64/ARM64BranchRelaxation.cpp
+++ /dev/null
@@ -1,505 +0,0 @@
-//===-- ARM64BranchRelaxation.cpp - ARM64 branch relaxation ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-branch-relax"
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-using namespace llvm;
-
-static cl::opt<bool>
-BranchRelaxation("arm64-branch-relax", cl::Hidden, cl::init(true),
-                 cl::desc("Relax out of range conditional branches"));
-
-static cl::opt<unsigned>
-TBZDisplacementBits("arm64-tbz-offset-bits", cl::Hidden, cl::init(14),
-                    cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
-
-static cl::opt<unsigned>
-CBZDisplacementBits("arm64-cbz-offset-bits", cl::Hidden, cl::init(19),
-                    cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
-
-static cl::opt<unsigned>
-BCCDisplacementBits("arm64-bcc-offset-bits", cl::Hidden, cl::init(19),
-                    cl::desc("Restrict range of Bcc instructions (DEBUG)"));
-
-STATISTIC(NumSplit, "Number of basic blocks split");
-STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
-
-namespace {
-class ARM64BranchRelaxation : public MachineFunctionPass {
-  /// BasicBlockInfo - Information about the offset and size of a single
-  /// basic block.
-  struct BasicBlockInfo {
-    /// Offset - Distance from the beginning of the function to the beginning
-    /// of this basic block.
-    ///
-    /// The offset is always aligned as required by the basic block.
-    unsigned Offset;
-
-    /// Size - Size of the basic block in bytes.  If the block contains
-    /// inline assembly, this is a worst case estimate.
-    ///
-    /// The size does not include any alignment padding whether from the
-    /// beginning of the block, or from an aligned jump table at the end.
-    unsigned Size;
-
-    BasicBlockInfo() : Offset(0), Size(0) {}
-
-    /// Compute the offset immediately following this block.  If LogAlign is
-    /// specified, return the offset the successor block will get if it has
-    /// this alignment.
-    unsigned postOffset(unsigned LogAlign = 0) const {
-      unsigned PO = Offset + Size;
-      unsigned Align = 1 << LogAlign;
-      return (PO + Align - 1) / Align * Align;
-    }
-  };
-
-  SmallVector<BasicBlockInfo, 16> BlockInfo;
-
-  MachineFunction *MF;
-  const ARM64InstrInfo *TII;
-
-  bool relaxBranchInstructions();
-  void scanFunction();
-  MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
-  void adjustBlockOffsets(MachineBasicBlock *BB);
-  bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
-  bool fixupConditionalBranch(MachineInstr *MI);
-  void computeBlockSize(MachineBasicBlock *MBB);
-  unsigned getInstrOffset(MachineInstr *MI) const;
-  void dumpBBs();
-  void verify();
-
-public:
-  static char ID;
-  ARM64BranchRelaxation() : MachineFunctionPass(ID) {}
-
-  virtual bool runOnMachineFunction(MachineFunction &MF);
-
-  virtual const char *getPassName() const {
-    return "ARM64 branch relaxation pass";
-  }
-};
-char ARM64BranchRelaxation::ID = 0;
-}
-
-/// verify - check BBOffsets, BBSizes, alignment of islands
-void ARM64BranchRelaxation::verify() {
-#ifndef NDEBUG
-  unsigned PrevNum = MF->begin()->getNumber();
-  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E;
-       ++MBBI) {
-    MachineBasicBlock *MBB = MBBI;
-    unsigned Align = MBB->getAlignment();
-    unsigned Num = MBB->getNumber();
-    assert(BlockInfo[Num].Offset % (1u << Align) == 0);
-    assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset);
-    PrevNum = Num;
-  }
-#endif
-}
-
-/// print block size and offset information - debugging
-void ARM64BranchRelaxation::dumpBBs() {
-  for (auto &MBB: *MF) {
-    const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
-    dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
-           << format("size=%#x\n", BBI.Size);
-  }
-}
-
-/// BBHasFallthrough - Return true if the specified basic block can fallthrough
-/// into the block immediately after it.
-static bool BBHasFallthrough(MachineBasicBlock *MBB) {
-  // Get the next machine basic block in the function.
-  MachineFunction::iterator MBBI = MBB;
-  // Can't fall off end of function.
-  if (std::next(MBBI) == MBB->getParent()->end())
-    return false;
-
-  MachineBasicBlock *NextBB = std::next(MBBI);
-  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
-                                        E = MBB->succ_end();
-       I != E; ++I)
-    if (*I == NextBB)
-      return true;
-
-  return false;
-}
-
-/// scanFunction - Do the initial scan of the function, building up
-/// information about each block.
-void ARM64BranchRelaxation::scanFunction() {
-  BlockInfo.clear();
-  BlockInfo.resize(MF->getNumBlockIDs());
-
-  // First thing, compute the size of all basic blocks, and see if the function
-  // has any inline assembly in it. If so, we have to be conservative about
-  // alignment assumptions, as we don't know for sure the size of any
-  // instructions in the inline assembly.
-  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
-    computeBlockSize(I);
-
-  // Compute block offsets and known bits.
-  adjustBlockOffsets(MF->begin());
-}
-
-/// computeBlockSize - Compute the size for MBB.
-/// This function updates BlockInfo directly.
-void ARM64BranchRelaxation::computeBlockSize(MachineBasicBlock *MBB) {
-  unsigned Size = 0;
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-       ++I)
-    Size += TII->GetInstSizeInBytes(I);
-  BlockInfo[MBB->getNumber()].Size = Size;
-}
-
-/// getInstrOffset - Return the current offset of the specified machine
-/// instruction from the start of the function.  This offset changes as stuff is
-/// moved around inside the function.
-unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
-  MachineBasicBlock *MBB = MI->getParent();
-
-  // The offset is composed of two things: the sum of the sizes of all MBB's
-  // before this instruction's block, and the offset from the start of the block
-  // it is in.
-  unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
-
-  // Sum instructions before MI in MBB.
-  for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
-    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
-    Offset += TII->GetInstSizeInBytes(I);
-  }
-  return Offset;
-}
-
-void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock *Start) {
-  unsigned PrevNum = Start->getNumber();
-  MachineFunction::iterator MBBI = Start, E = MF->end();
-  for (++MBBI; MBBI != E; ++MBBI) {
-    MachineBasicBlock *MBB = MBBI;
-    unsigned Num = MBB->getNumber();
-    if (!Num) // block zero is never changed from offset zero.
-      continue;
-    // Get the offset and known bits at the end of the layout predecessor.
-    // Include the alignment of the current block.
-    unsigned LogAlign = MBBI->getAlignment();
-    BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign);
-    PrevNum = Num;
-  }
-}
-
-/// Split the basic block containing MI into two blocks, which are joined by
-/// an unconditional branch.  Update data structures and renumber blocks to
-/// account for this change and returns the newly created block.
-/// NOTE: Successor list of the original BB is out of date after this function,
-/// and must be updated by the caller! Other transforms follow using this
-/// utility function, so no point updating now rather than waiting.
-MachineBasicBlock *
-ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
-  MachineBasicBlock *OrigBB = MI->getParent();
-
-  // Create a new MBB for the code after the OrigBB.
-  MachineBasicBlock *NewBB =
-      MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
-  MachineFunction::iterator MBBI = OrigBB;
-  ++MBBI;
-  MF->insert(MBBI, NewBB);
-
-  // Splice the instructions starting with MI over to NewBB.
-  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
-
-  // Add an unconditional branch from OrigBB to NewBB.
-  // Note the new unconditional branch is not being recorded.
-  // There doesn't seem to be meaningful DebugInfo available; this doesn't
-  // correspond to anything in the source.
-  BuildMI(OrigBB, DebugLoc(), TII->get(ARM64::B)).addMBB(NewBB);
-
-  // Insert an entry into BlockInfo to align it properly with the block numbers.
-  BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
-
-  // Figure out how large the OrigBB is.  As the first half of the original
-  // block, it cannot contain a tablejump.  The size includes
-  // the new jump we added.  (It should be possible to do this without
-  // recounting everything, but it's very confusing, and this is rarely
-  // executed.)
-  computeBlockSize(OrigBB);
-
-  // Figure out how large the NewMBB is.  As the second half of the original
-  // block, it may contain a tablejump.
-  computeBlockSize(NewBB);
-
-  // All BBOffsets following these blocks must be modified.
-  adjustBlockOffsets(OrigBB);
-
-  ++NumSplit;
-
-  return NewBB;
-}
-
-/// isBlockInRange - Returns true if the distance between specific MI and
-/// specific BB can fit in MI's displacement field.
-bool ARM64BranchRelaxation::isBlockInRange(MachineInstr *MI,
-                                           MachineBasicBlock *DestBB,
-                                           unsigned Bits) {
-  unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
-  unsigned BrOffset = getInstrOffset(MI);
-  unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset;
-
-  DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
-               << " from BB#" << MI->getParent()->getNumber()
-               << " max delta=" << MaxOffs << " from " << getInstrOffset(MI)
-               << " to " << DestOffset << " offset "
-               << int(DestOffset - BrOffset) << "\t" << *MI);
-
-  // Branch before the Dest.
-  if (BrOffset <= DestOffset)
-    return (DestOffset - BrOffset <= MaxOffs);
-  return (BrOffset - DestOffset <= MaxOffs);
-}
-
-static bool isConditionalBranch(unsigned Opc) {
-  switch (Opc) {
-  default:
-    return false;
-  case ARM64::TBZ:
-  case ARM64::TBNZ:
-  case ARM64::CBZW:
-  case ARM64::CBNZW:
-  case ARM64::CBZX:
-  case ARM64::CBNZX:
-  case ARM64::Bcc:
-    return true;
-  }
-}
-
-static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
-  switch (MI->getOpcode()) {
-  default:
-    assert(0 && "unexpected opcode!");
-  case ARM64::TBZ:
-  case ARM64::TBNZ:
-    return MI->getOperand(2).getMBB();
-  case ARM64::CBZW:
-  case ARM64::CBNZW:
-  case ARM64::CBZX:
-  case ARM64::CBNZX:
-  case ARM64::Bcc:
-    return MI->getOperand(1).getMBB();
-  }
-}
-
-static unsigned getOppositeConditionOpcode(unsigned Opc) {
-  switch (Opc) {
-  default:
-    assert(0 && "unexpected opcode!");
-  case ARM64::TBNZ:    return ARM64::TBZ;
-  case ARM64::TBZ:     return ARM64::TBNZ;
-  case ARM64::CBNZW:   return ARM64::CBZW;
-  case ARM64::CBNZX:   return ARM64::CBZX;
-  case ARM64::CBZW:    return ARM64::CBNZW;
-  case ARM64::CBZX:    return ARM64::CBNZX;
-  case ARM64::Bcc:     return ARM64::Bcc; // Condition is an operand for Bcc.
-  }
-}
-
-static unsigned getBranchDisplacementBits(unsigned Opc) {
-  switch (Opc) {
-  default:
-    assert(0 && "unexpected opcode!");
-  case ARM64::TBNZ:
-  case ARM64::TBZ:
-    return TBZDisplacementBits;
-  case ARM64::CBNZW:
-  case ARM64::CBZW:
-  case ARM64::CBNZX:
-  case ARM64::CBZX:
-    return CBZDisplacementBits;
-  case ARM64::Bcc:
-    return BCCDisplacementBits;
-  }
-}
-
-static inline void invertBccCondition(MachineInstr *MI) {
-  assert(MI->getOpcode() == ARM64::Bcc && "Unexpected opcode!");
-  ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(0).getImm();
-  CC = ARM64CC::getInvertedCondCode(CC);
-  MI->getOperand(0).setImm((int64_t)CC);
-}
-
-/// fixupConditionalBranch - Fix up a conditional branch whose destination is
-/// too far away to fit in its displacement field. It is converted to an inverse
-/// conditional branch + an unconditional branch to the destination.
-bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
-  MachineBasicBlock *DestBB = getDestBlock(MI);
-
-  // Add an unconditional branch to the destination and invert the branch
-  // condition to jump over it:
-  // tbz L1
-  // =>
-  // tbnz L2
-  // b   L1
-  // L2:
-
-  // If the branch is at the end of its MBB and that has a fall-through block,
-  // direct the updated conditional branch to the fall-through block. Otherwise,
-  // split the MBB before the next instruction.
-  MachineBasicBlock *MBB = MI->getParent();
-  MachineInstr *BMI = &MBB->back();
-  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
-
-  if (BMI != MI) {
-    if (std::next(MachineBasicBlock::iterator(MI)) ==
-            std::prev(MBB->getLastNonDebugInstr()) &&
-        BMI->getOpcode() == ARM64::B) {
-      // Last MI in the BB is an unconditional branch. Can we simply invert the
-      // condition and swap destinations:
-      // beq L1
-      // b   L2
-      // =>
-      // bne L2
-      // b   L1
-      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
-      if (isBlockInRange(MI, NewDest,
-                         getBranchDisplacementBits(MI->getOpcode()))) {
-        DEBUG(dbgs() << "  Invert condition and swap its destination with "
-                     << *BMI);
-        BMI->getOperand(0).setMBB(DestBB);
-        unsigned OpNum =
-            (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ)
-                ? 2
-                : 1;
-        MI->getOperand(OpNum).setMBB(NewDest);
-        MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode())));
-        if (MI->getOpcode() == ARM64::Bcc)
-          invertBccCondition(MI);
-        return true;
-      }
-    }
-  }
-
-  if (NeedSplit) {
-    // Analyze the branch so we know how to update the successor lists.
-    MachineBasicBlock *TBB, *FBB;
-    SmallVector<MachineOperand, 2> Cond;
-    TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, false);
-
-    MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI);
-    // No need for the branch to the next block. We're adding an unconditional
-    // branch to the destination.
-    int delta = TII->GetInstSizeInBytes(&MBB->back());
-    BlockInfo[MBB->getNumber()].Size -= delta;
-    MBB->back().eraseFromParent();
-    // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below
-
-    // Update the successor lists according to the transformation to follow.
-    // Do it here since if there's no split, no update is needed.
-    MBB->replaceSuccessor(FBB, NewBB);
-    NewBB->addSuccessor(FBB);
-  }
-  MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
-
-  DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
-               << ", invert condition and change dest. to BB#"
-               << NextBB->getNumber() << "\n");
-
-  // Insert a new conditional branch and a new unconditional branch.
-  MachineInstrBuilder MIB = BuildMI(
-      MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode())))
-                                .addOperand(MI->getOperand(0));
-  if (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ)
-    MIB.addOperand(MI->getOperand(1));
-  if (MI->getOpcode() == ARM64::Bcc)
-    invertBccCondition(MIB);
-  MIB.addMBB(NextBB);
-  BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
-  BuildMI(MBB, DebugLoc(), TII->get(ARM64::B)).addMBB(DestBB);
-  BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
-
-  // Remove the old conditional branch.  It may or may not still be in MBB.
-  BlockInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
-  MI->eraseFromParent();
-
-  // Finally, keep the block offsets up to date.
-  adjustBlockOffsets(MBB);
-  return true;
-}
-
-bool ARM64BranchRelaxation::relaxBranchInstructions() {
-  bool Changed = false;
-  // Relaxing branches involves creating new basic blocks, so re-eval
-  // end() for termination.
-  for (auto &MBB : *MF) {
-    MachineInstr *MI = MBB.getFirstTerminator();
-    if (isConditionalBranch(MI->getOpcode()) &&
-        !isBlockInRange(MI, getDestBlock(MI),
-                        getBranchDisplacementBits(MI->getOpcode()))) {
-      fixupConditionalBranch(MI);
-      ++NumRelaxed;
-      Changed = true;
-    }
-  }
-  return Changed;
-}
-
-bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
-  MF = &mf;
-
-  // If the pass is disabled, just bail early.
-  if (!BranchRelaxation)
-    return false;
-
-  DEBUG(dbgs() << "***** ARM64BranchRelaxation *****\n");
-
-  TII = (const ARM64InstrInfo *)MF->getTarget().getInstrInfo();
-
-  // Renumber all of the machine basic blocks in the function, guaranteeing that
-  // the numbers agree with the position of the block in the function.
-  MF->RenumberBlocks();
-
-  // Do the initial scan of the function, building up information about the
-  // sizes of each block.
-  scanFunction();
-
-  DEBUG(dbgs() << "  Basic blocks before relaxation\n");
-  DEBUG(dumpBBs());
-
-  bool MadeChange = false;
-  while (relaxBranchInstructions())
-    MadeChange = true;
-
-  // After a while, this might be made debug-only, but it is not expensive.
-  verify();
-
-  DEBUG(dbgs() << "  Basic blocks after relaxation\n");
-  DEBUG(dbgs() << '\n'; dumpBBs());
-
-  BlockInfo.clear();
-
-  return MadeChange;
-}
-
-/// createARM64BranchRelaxation - returns an instance of the constpool
-/// island pass.
-FunctionPass *llvm::createARM64BranchRelaxation() {
-  return new ARM64BranchRelaxation();
-}
diff --git a/lib/Target/ARM64/ARM64CallingConv.h b/lib/Target/ARM64/ARM64CallingConv.h
deleted file mode 100644
index 0128236..0000000
--- a/lib/Target/ARM64/ARM64CallingConv.h
+++ /dev/null
@@ -1,94 +0,0 @@
-//=== ARM64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the custom routines for the ARM64 Calling Convention that
-// aren't done by tablegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64CALLINGCONV_H
-#define ARM64CALLINGCONV_H
-
-#include "ARM64InstrInfo.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-namespace llvm {
-
-/// CC_ARM64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via
-/// register. Here, ValVT can be i1/i8/i16 or i32 depending on whether the
-/// argument is already promoted and LocVT is i1/i8/i16. We only promote the
-/// argument to i32 if we are sure this argument will be passed in register.
-static bool CC_ARM64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
-                                        CCValAssign::LocInfo LocInfo,
-                                        ISD::ArgFlagsTy ArgFlags,
-                                        CCState &State,
-                                        bool IsWebKitJS = false) {
-  static const uint16_t RegList1[] = { ARM64::W0, ARM64::W1, ARM64::W2,
-                                       ARM64::W3, ARM64::W4, ARM64::W5,
-                                       ARM64::W6, ARM64::W7 };
-  static const uint16_t RegList2[] = { ARM64::X0, ARM64::X1, ARM64::X2,
-                                       ARM64::X3, ARM64::X4, ARM64::X5,
-                                       ARM64::X6, ARM64::X7 };
-  static const uint16_t WebKitRegList1[] = { ARM64::W0 };
-  static const uint16_t WebKitRegList2[] = { ARM64::X0 };
-
-  const uint16_t *List1 = IsWebKitJS ? WebKitRegList1 : RegList1;
-  const uint16_t *List2 = IsWebKitJS ? WebKitRegList2 : RegList2;
-
-  if (unsigned Reg = State.AllocateReg(List1, List2, 8)) {
-    // Customized extra section for handling i1/i8/i16:
-    // We need to promote the argument to i32 if it is not done already.
-    if (ValVT != MVT::i32) {
-      if (ArgFlags.isSExt())
-        LocInfo = CCValAssign::SExt;
-      else if (ArgFlags.isZExt())
-        LocInfo = CCValAssign::ZExt;
-      else
-        LocInfo = CCValAssign::AExt;
-      ValVT = MVT::i32;
-    }
-    // Set LocVT to i32 as well if passing via register.
-    LocVT = MVT::i32;
-    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-    return true;
-  }
-  return false;
-}
-
-/// CC_ARM64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16
-/// via register. This behaves the same as CC_ARM64_Custom_i1i8i16_Reg, but only
-/// uses the first register.
-static bool CC_ARM64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
-                                           CCValAssign::LocInfo LocInfo,
-                                           ISD::ArgFlagsTy ArgFlags,
-                                           CCState &State) {
-  return CC_ARM64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
-                                     State, true);
-}
-
-/// CC_ARM64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on
-/// stack. Here, ValVT can be i1/i8/i16 or i32 depending on whether the argument
-/// is already promoted and LocVT is i1/i8/i16. If ValVT is already promoted,
-/// it will be truncated back to i1/i8/i16.
-static bool CC_ARM64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT,
-                                          CCValAssign::LocInfo LocInfo,
-                                          ISD::ArgFlagsTy ArgFlags,
-                                          CCState &State) {
-  unsigned Space = ((LocVT == MVT::i1 || LocVT == MVT::i8) ? 1 : 2);
-  unsigned Offset12 = State.AllocateStack(Space, Space);
-  ValVT = LocVT;
-  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset12, LocVT, LocInfo));
-  return true;
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/ARM64/ARM64CallingConvention.td b/lib/Target/ARM64/ARM64CallingConvention.td
deleted file mode 100644
index 9ac888f..0000000
--- a/lib/Target/ARM64/ARM64CallingConvention.td
+++ /dev/null
@@ -1,210 +0,0 @@
-//===- ARM64CallingConv.td - Calling Conventions for ARM64 -*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for ARM64 architecture.
-//
-//===----------------------------------------------------------------------===//
-
-/// CCIfAlign - Match of the original alignment of the arg
-class CCIfAlign<string Align, CCAction A> :
-  CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
-
-//===----------------------------------------------------------------------===//
-// ARM AAPCS64 Calling Convention
-//===----------------------------------------------------------------------===//
-
-def CC_ARM64_AAPCS : CallingConv<[
-  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
-  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
-
-  // An SRet is passed in X8, not X0 like a normal pointer parameter.
-  CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
-
-  // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
-  // up to eight each of GPR and FPR.
-  CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
-  // i128 is split to two i64s, we can't fit half to register X7.
-  CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
-                                                    [X0, X1, X3, X5]>>>,
-
-  // i128 is split to two i64s, and its stack alignment is 16 bytes.
-  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
-
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
-           CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                   [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
-           CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-
-  // If more than will fit in registers, pass them on the stack instead.
-  CCIfType<[i1, i8, i16], CCAssignToStack<8, 8>>,
-  CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
-  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
-           CCAssignToStack<8, 8>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
-]>;
-
-def RetCC_ARM64_AAPCS : CallingConv<[
-  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
-  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
-
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
-      CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                              [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
-      CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
-]>;
-
-
-// Darwin uses a calling convention which differs in only two ways
-// from the standard one at this level:
-//     + i128s (i.e. split i64s) don't need even registers.
-//     + Stack slots are sized as needed rather than being at least 64-bit.
-def CC_ARM64_DarwinPCS : CallingConv<[
-  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
-  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
-
-  // An SRet is passed in X8, not X0 like a normal pointer parameter.
-  CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
-
-  // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
-  // up to eight each of GPR and FPR.
-  CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
-  // i128 is split to two i64s, we can't fit half to register X7.
-  CCIfType<[i64],
-           CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6],
-                                             [W0, W1, W2, W3, W4, W5, W6]>>>,
-  // i128 is split to two i64s, and its stack alignment is 16 bytes.
-  CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
-
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
-           CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                   [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
-           CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-
-  // If more than will fit in registers, pass them on the stack instead.
-  CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Stack">>,
-  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
-  CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
-           CCAssignToStack<8, 8>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
-]>;
-
-def CC_ARM64_DarwinPCS_VarArg : CallingConv<[
-  CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
-  CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
-
-  // Handle all scalar types as either i64 or f64.
-  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
-  CCIfType<[f32],          CCPromoteToType<f64>>,
-
-  // Everything is on the stack.
-  // i128 is split to two i64s, and its stack alignment is 16 bytes.
-  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
-  CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], CCAssignToStack<8, 8>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],   CCAssignToStack<16, 16>>
-]>;
-
-// The WebKit_JS calling convention only passes the first argument (the callee)
-// in register and the remaining arguments on stack. We allow 32bit stack slots,
-// so that WebKit can write partial values in the stack and define the other
-// 32bit quantity as undef.
-def CC_ARM64_WebKit_JS : CallingConv<[
-  // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
-  CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_WebKit_JS_i1i8i16_Reg">>,
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
-
-  // Pass the remaining arguments on the stack instead.
-  CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>,
-  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
-  CCIfType<[i64, f64], CCAssignToStack<8, 8>>
-]>;
-
-def RetCC_ARM64_WebKit_JS : CallingConv<[
-  CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
-                                          [X0, X1, X2, X3, X4, X5, X6, X7]>>,
-  CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
-                                          [W0, W1, W2, W3, W4, W5, W6, W7]>>,
-  CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-  CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
-                                          [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
-]>;
-
-// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
-// presumably a callee to someone. External functions may not do so, but this
-// is currently safe since BL has LR as an implicit-def and what happens after a
-// tail call doesn't matter.
-//
-// It would be better to model its preservation semantics properly (create a
-// vreg on entry, use it in RET & tail call generation; make that vreg def if we
-// end up saving LR as part of a call frame). Watch this space...
-def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
-                                           X23, X24, X25, X26, X27, X28,
-                                           D8,  D9,  D10, D11,
-                                           D12, D13, D14, D15)>;
-
-// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
-// 'this' and the pointer return value are both passed in X0 in these cases,
-// this can be partially modelled by treating X0 as a callee-saved register;
-// only the resulting RegMask is used; the SaveList is ignored
-//
-// (For generic ARM 64-bit ABI code, clang will not generate constructors or
-// destructors with 'this' returns, so this RegMask will not be used in that
-// case)
-def CSR_ARM64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_ARM64_AAPCS, X0)>;
-
-// The function used by Darwin to obtain the address of a thread-local variable
-// guarantees more than a normal AAPCS function. x16 and x17 are used on the
-// fast path for calculation, but other registers except X0 (argument/return)
-// and LR (it is a call, after all) are preserved.
-def CSR_ARM64_TLS_Darwin
-    : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
-                           FP,
-                           (sequence "Q%u", 0, 31))>;
-
-// The ELF stub used for TLS-descriptor access saves every feasible
-// register. Only X0 and LR are clobbered.
-def CSR_ARM64_TLS_ELF
-    : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP,
-                           (sequence "Q%u", 0, 31))>;
-
-def CSR_ARM64_AllRegs
-    : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP,
-                           (sequence "X%u", 0, 28), FP, LR, SP,
-                           (sequence "B%u", 0, 31), (sequence "H%u", 0, 31),
-                           (sequence "S%u", 0, 31), (sequence "D%u", 0, 31),
-                           (sequence "Q%u", 0, 31))>;
-
diff --git a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp b/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp
deleted file mode 100644
index e3f8248..0000000
--- a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-//===-- ARM64CleanupLocalDynamicTLSPass.cpp -----------------------*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Local-dynamic access to thread-local variables proceeds in three stages.
-//
-// 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated
-//    in much the same way as a general-dynamic TLS-descriptor access against
-//    the special symbol _TLS_MODULE_BASE.
-// 2. The variable's offset from _TLS_MODULE_BASE_ is calculated using
-//    instructions with "dtprel" modifiers.
-// 3. These two are added, together with TPIDR_EL0, to obtain the variable's
-//    true address.
-//
-// This is only better than general-dynamic access to the variable if two or
-// more of the first stage TLS-descriptor calculations can be combined. This
-// pass looks through a function and performs such combinations.
-//
-//===----------------------------------------------------------------------===//
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64TargetMachine.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-using namespace llvm;
-
-namespace {
-struct LDTLSCleanup : public MachineFunctionPass {
-  static char ID;
-  LDTLSCleanup() : MachineFunctionPass(ID) {}
-
-  virtual bool runOnMachineFunction(MachineFunction &MF) {
-    ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
-    if (AFI->getNumLocalDynamicTLSAccesses() < 2) {
-      // No point folding accesses if there isn't at least two.
-      return false;
-    }
-
-    MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
-    return VisitNode(DT->getRootNode(), 0);
-  }
-
-  // Visit the dominator subtree rooted at Node in pre-order.
-  // If TLSBaseAddrReg is non-null, then use that to replace any
-  // TLS_base_addr instructions. Otherwise, create the register
-  // when the first such instruction is seen, and then use it
-  // as we encounter more instructions.
-  bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
-    MachineBasicBlock *BB = Node->getBlock();
-    bool Changed = false;
-
-    // Traverse the current block.
-    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
-         ++I) {
-      switch (I->getOpcode()) {
-      case ARM64::TLSDESC_BLR:
-        // Make sure it's a local dynamic access.
-        if (!I->getOperand(1).isSymbol() ||
-            strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
-          break;
-
-        if (TLSBaseAddrReg)
-          I = replaceTLSBaseAddrCall(I, TLSBaseAddrReg);
-        else
-          I = setRegister(I, &TLSBaseAddrReg);
-        Changed = true;
-        break;
-      default:
-        break;
-      }
-    }
-
-    // Visit the children of this block in the dominator tree.
-    for (MachineDomTreeNode *N : *Node) {
-      Changed |= VisitNode(N, TLSBaseAddrReg);
-    }
-
-    return Changed;
-  }
-
-  // Replace the TLS_base_addr instruction I with a copy from
-  // TLSBaseAddrReg, returning the new instruction.
-  MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I,
-                                       unsigned TLSBaseAddrReg) {
-    MachineFunction *MF = I->getParent()->getParent();
-    const ARM64TargetMachine *TM =
-        static_cast<const ARM64TargetMachine *>(&MF->getTarget());
-    const ARM64InstrInfo *TII = TM->getInstrInfo();
-
-    // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
-    // code sequence assumes the address will be.
-    MachineInstr *Copy =
-        BuildMI(*I->getParent(), I, I->getDebugLoc(),
-                TII->get(TargetOpcode::COPY), ARM64::X0).addReg(TLSBaseAddrReg);
-
-    // Erase the TLS_base_addr instruction.
-    I->eraseFromParent();
-
-    return Copy;
-  }
-
-  // Create a virtal register in *TLSBaseAddrReg, and populate it by
-  // inserting a copy instruction after I. Returns the new instruction.
-  MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
-    MachineFunction *MF = I->getParent()->getParent();
-    const ARM64TargetMachine *TM =
-        static_cast<const ARM64TargetMachine *>(&MF->getTarget());
-    const ARM64InstrInfo *TII = TM->getInstrInfo();
-
-    // Create a virtual register for the TLS base address.
-    MachineRegisterInfo &RegInfo = MF->getRegInfo();
-    *TLSBaseAddrReg = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
-
-    // Insert a copy from X0 to TLSBaseAddrReg for later.
-    MachineInstr *Next = I->getNextNode();
-    MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
-                                 TII->get(TargetOpcode::COPY),
-                                 *TLSBaseAddrReg).addReg(ARM64::X0);
-
-    return Copy;
-  }
-
-  virtual const char *getPassName() const {
-    return "Local Dynamic TLS Access Clean-up";
-  }
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesCFG();
-    AU.addRequired<MachineDominatorTree>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-};
-}
-
-char LDTLSCleanup::ID = 0;
-FunctionPass *llvm::createARM64CleanupLocalDynamicTLSPass() {
-  return new LDTLSCleanup();
-}
diff --git a/lib/Target/ARM64/ARM64CollectLOH.cpp b/lib/Target/ARM64/ARM64CollectLOH.cpp
deleted file mode 100644
index f52778f..0000000
--- a/lib/Target/ARM64/ARM64CollectLOH.cpp
+++ /dev/null
@@ -1,1157 +0,0 @@
-//===-------------- ARM64CollectLOH.cpp - ARM64 collect LOH pass --*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass that collect the Linker Optimization Hint (LOH).
-// This pass should be run at the very end of the compilation flow, just before
-// assembly printer.
-// To be useful for the linker, the LOH must be printed into the assembly file.
-//
-// A LOH describes a sequence of instructions that may be optimized by the
-// linker.
-// This same sequence cannot be optimized by the compiler because some of
-// the information will be known at link time.
-// For instance, consider the following sequence:
-//     L1: adrp xA, sym@PAGE
-//     L2: add xB, xA, sym@PAGEOFF
-//     L3: ldr xC, [xB, #imm]
-// This sequence can be turned into:
-// A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
-//     L3: ldr xC, sym+#imm
-// It may also be turned into either the following more efficient
-// code sequences:
-// - If sym@PAGEOFF + #imm fits the encoding space of L3.
-//     L1: adrp xA, sym@PAGE
-//     L3: ldr xC, [xB, sym@PAGEOFF + #imm]
-// - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
-//     L1: adr xA, sym
-//     L3: ldr xC, [xB, #imm]
-//
-// To be valid a LOH must meet all the requirements needed by all the related
-// possible linker transformations.
-// For instance, using the running example, the constraints to emit
-// ".loh AdrpAddLdr" are:
-// - L1, L2, and L3 instructions are of the expected type, i.e.,
-//   respectively ADRP, ADD (immediate), and LD.
-// - The result of L1 is used only by L2.
-// - The register argument (xA) used in the ADD instruction is defined
-//   only by L1.
-// - The result of L2 is used only by L3.
-// - The base address (xB) in L3 is defined only L2.
-// - The ADRP in L1 and the ADD in L2 must reference the same symbol using
-//   @PAGE/@PAGEOFF with no additional constants
-//
-// Currently supported LOHs are:
-// * So called non-ADRP-related:
-//   - .loh AdrpAddLdr L1, L2, L3:
-//     L1: adrp xA, sym@PAGE
-//     L2: add xB, xA, sym@PAGEOFF
-//     L3: ldr xC, [xB, #imm]
-//   - .loh AdrpLdrGotLdr L1, L2, L3:
-//     L1: adrp xA, sym@GOTPAGE
-//     L2: ldr xB, [xA, sym@GOTPAGEOFF]
-//     L3: ldr xC, [xB, #imm]
-//   - .loh AdrpLdr L1, L3:
-//     L1: adrp xA, sym@PAGE
-//     L3: ldr xC, [xA, sym@PAGEOFF]
-//   - .loh AdrpAddStr L1, L2, L3:
-//     L1: adrp xA, sym@PAGE
-//     L2: add xB, xA, sym@PAGEOFF
-//     L3: str xC, [xB, #imm]
-//   - .loh AdrpLdrGotStr L1, L2, L3:
-//     L1: adrp xA, sym@GOTPAGE
-//     L2: ldr xB, [xA, sym@GOTPAGEOFF]
-//     L3: str xC, [xB, #imm]
-//   - .loh AdrpAdd L1, L2:
-//     L1: adrp xA, sym@PAGE
-//     L2: add xB, xA, sym@PAGEOFF
-//   For all these LOHs, L1, L2, L3 form a simple chain:
-//   L1 result is used only by L2 and L2 result by L3.
-//   L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
-//   by L1.
-// All these LOHs aim at using more efficient load/store patterns by folding
-// some instructions used to compute the address directly into the load/store.
-//
-// * So called ADRP-related:
-//  - .loh AdrpAdrp L2, L1:
-//    L2: ADRP xA, sym1@PAGE
-//    L1: ADRP xA, sym2@PAGE
-//    L2 dominates L1 and xA is not redifined between L2 and L1
-// This LOH aims at getting rid of redundant ADRP instructions.
-//
-// The overall design for emitting the LOHs is:
-// 1. ARM64CollectLOH (this pass) records the LOHs in the ARM64FunctionInfo.
-// 2. ARM64AsmPrinter reads the LOHs from ARM64FunctionInfo and it:
-//     1. Associates them a label.
-//     2. Emits them in a MCStreamer (EmitLOHDirective).
-//         - The MCMachOStreamer records them into the MCAssembler.
-//         - The MCAsmStreamer prints them.
-//         - Other MCStreamers ignore them.
-//     3. Closes the MCStreamer:
-//         - The MachObjectWriter gets them from the MCAssembler and writes
-//           them in the object file.
-//         - Other ObjectWriters ignore them.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-collect-loh"
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-static cl::opt<bool>
-PreCollectRegister("arm64-collect-loh-pre-collect-register", cl::Hidden,
-                   cl::desc("Restrict analysis to registers invovled"
-                            " in LOHs"),
-                   cl::init(true));
-
-static cl::opt<bool>
-BasicBlockScopeOnly("arm64-collect-loh-bb-only", cl::Hidden,
-                    cl::desc("Restrict analysis at basic block scope"),
-                    cl::init(true));
-
-STATISTIC(NumADRPSimpleCandidate,
-          "Number of simplifiable ADRP dominate by another");
-STATISTIC(NumADRPComplexCandidate2,
-          "Number of simplifiable ADRP reachable by 2 defs");
-STATISTIC(NumADRPComplexCandidate3,
-          "Number of simplifiable ADRP reachable by 3 defs");
-STATISTIC(NumADRPComplexCandidateOther,
-          "Number of simplifiable ADRP reachable by 4 or more defs");
-STATISTIC(NumADDToSTRWithImm,
-          "Number of simplifiable STR with imm reachable by ADD");
-STATISTIC(NumLDRToSTRWithImm,
-          "Number of simplifiable STR with imm reachable by LDR");
-STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
-STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
-STATISTIC(NumADDToLDRWithImm,
-          "Number of simplifiable LDR with imm reachable by ADD");
-STATISTIC(NumLDRToLDRWithImm,
-          "Number of simplifiable LDR with imm reachable by LDR");
-STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
-STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
-STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
-STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
-STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
-STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
-STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
-STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
-STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
-
-namespace llvm {
-void initializeARM64CollectLOHPass(PassRegistry &);
-}
-
-namespace {
-struct ARM64CollectLOH : public MachineFunctionPass {
-  static char ID;
-  ARM64CollectLOH() : MachineFunctionPass(ID) {
-    initializeARM64CollectLOHPass(*PassRegistry::getPassRegistry());
-  }
-
-  virtual bool runOnMachineFunction(MachineFunction &Fn);
-
-  virtual const char *getPassName() const {
-    return "ARM64 Collect Linker Optimization Hint (LOH)";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesAll();
-    MachineFunctionPass::getAnalysisUsage(AU);
-    AU.addRequired<MachineDominatorTree>();
-  }
-
-private:
-};
-
-/// A set of MachineInstruction.
-typedef SetVector<const MachineInstr *> SetOfMachineInstr;
-/// Map a basic block to a set of instructions per register.
-/// This is used to represent the exposed uses of a basic block
-/// per register.
-typedef MapVector<const MachineBasicBlock *, SetOfMachineInstr *>
-BlockToSetOfInstrsPerColor;
-/// Map a basic block to an instruction per register.
-/// This is used to represent the live-out definitions of a basic block
-/// per register.
-typedef MapVector<const MachineBasicBlock *, const MachineInstr **>
-BlockToInstrPerColor;
-/// Map an instruction to a set of instructions. Used to represent the
-/// mapping def to reachable uses or use to definitions.
-typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
-/// Map a basic block to a BitVector.
-/// This is used to record the kill registers per basic block.
-typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
-
-/// Map a register to a dense id.
-typedef DenseMap<unsigned, unsigned> MapRegToId;
-/// Map a dense id to a register. Used for debug purposes.
-typedef SmallVector<unsigned, 32> MapIdToReg;
-} // end anonymous namespace.
-
-char ARM64CollectLOH::ID = 0;
-
-INITIALIZE_PASS_BEGIN(ARM64CollectLOH, "arm64-collect-loh",
-                      "ARM64 Collect Linker Optimization Hint (LOH)", false,
-                      false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(ARM64CollectLOH, "arm64-collect-loh",
-                    "ARM64 Collect Linker Optimization Hint (LOH)", false,
-                    false)
-
-/// Given a couple (MBB, reg) get the corresponding set of instruction from
-/// the given "sets".
-/// If this couple does not reference any set, an empty set is added to "sets"
-/// for this couple and returned.
-/// \param nbRegs is used internally allocate some memory. It must be consistent
-/// with the way sets is used.
-static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
-                                 const MachineBasicBlock *MBB, unsigned reg,
-                                 unsigned nbRegs) {
-  SetOfMachineInstr *result;
-  BlockToSetOfInstrsPerColor::iterator it = sets.find(MBB);
-  if (it != sets.end()) {
-    result = it->second;
-  } else {
-    result = sets[MBB] = new SetOfMachineInstr[nbRegs];
-  }
-
-  return result[reg];
-}
-
-/// Given a couple (reg, MI) get the corresponding set of instructions from the
-/// the given "sets".
-/// This is used to get the uses record in sets of a definition identified by
-/// MI and reg, i.e., MI defines reg.
-/// If the couple does not reference anything, an empty set is added to
-/// "sets[reg]".
-/// \pre set[reg] is valid.
-static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
-                                  const MachineInstr *MI) {
-  return sets[reg][MI];
-}
-
-/// Same as getUses but does not modify the input map: sets.
-/// \return NULL if the couple (reg, MI) is not in sets.
-static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
-                                        const MachineInstr *MI) {
-  InstrToInstrs::const_iterator Res = sets[reg].find(MI);
-  if (Res != sets[reg].end())
-    return &(Res->second);
-  return NULL;
-}
-
-/// Initialize the reaching definition algorithm:
-/// For each basic block BB in MF, record:
-/// - its kill set.
-/// - its reachable uses (uses that are exposed to BB's predecessors).
-/// - its the generated definitions.
-/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
-/// the list of uses of exposed defintions.
-/// \param ADRPMode specifies to only consider ADRP instructions for generated
-/// definition. It also consider definitions of ADRP instructions as uses and
-/// ignore other uses. The ADRPMode is used to collect the information for LHO
-/// that involve ADRP operation only.
-static void initReachingDef(MachineFunction *MF,
-                            InstrToInstrs *ColorOpToReachedUses,
-                            BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
-                            BlockToSetOfInstrsPerColor &ReachableUses,
-                            const MapRegToId &RegToId,
-                            const MachineInstr *DummyOp, bool ADRPMode) {
-  const TargetMachine &TM = MF->getTarget();
-  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
-  unsigned NbReg = RegToId.size();
-
-  for (MachineFunction::const_iterator IMBB = MF->begin(), IMBBEnd = MF->end();
-       IMBB != IMBBEnd; ++IMBB) {
-    const MachineBasicBlock *MBB = &(*IMBB);
-    const MachineInstr **&BBGen = Gen[MBB];
-    BBGen = new const MachineInstr *[NbReg];
-    memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg);
-
-    BitVector &BBKillSet = Kill[MBB];
-    BBKillSet.resize(NbReg);
-    for (MachineBasicBlock::const_iterator II = MBB->begin(), IEnd = MBB->end();
-         II != IEnd; ++II) {
-      bool IsADRP = II->getOpcode() == ARM64::ADRP;
-
-      // Process uses first.
-      if (IsADRP || !ADRPMode)
-        for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
-                                              IOEnd = II->operands_end();
-             IO != IOEnd; ++IO) {
-          // Treat ADRP def as use, as the goal of the analysis is to find
-          // ADRP defs reached by other ADRP defs.
-          if (!IO->isReg() || (!ADRPMode && !IO->isUse()) ||
-              (ADRPMode && (!IsADRP || !IO->isDef())))
-            continue;
-          unsigned CurReg = IO->getReg();
-          MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
-          if (ItCurRegId == RegToId.end())
-            continue;
-          CurReg = ItCurRegId->second;
-
-          // if CurReg has not been defined, this use is reachable.
-          if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
-            getSet(ReachableUses, MBB, CurReg, NbReg).insert(&(*II));
-          // current basic block definition for this color, if any, is in Gen.
-          if (BBGen[CurReg])
-            getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(&(*II));
-        }
-
-      // Process clobbers.
-      for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
-                                            IOEnd = II->operands_end();
-           IO != IOEnd; ++IO) {
-        if (!IO->isRegMask())
-          continue;
-        // Clobbers kill the related colors.
-        const uint32_t *PreservedRegs = IO->getRegMask();
-
-        // Set generated regs.
-        for (const auto Entry : RegToId) {
-          unsigned Reg = Entry.second;
-          // Use the global register ID when querying APIs external to this
-          // pass.
-          if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
-            // Do not register clobbered definition for no ADRP.
-            // This definition is not used anyway (otherwise register
-            // allocation is wrong).
-            BBGen[Reg] = ADRPMode ? II : NULL;
-            BBKillSet.set(Reg);
-          }
-        }
-      }
-
-      // Process defs
-      for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
-                                            IOEnd = II->operands_end();
-           IO != IOEnd; ++IO) {
-        if (!IO->isReg() || !IO->isDef())
-          continue;
-        unsigned CurReg = IO->getReg();
-        MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
-        if (ItCurRegId == RegToId.end())
-          continue;
-
-        for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
-          MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
-          assert(ItRegId != RegToId.end() &&
-                 "Sub-register of an "
-                 "involved register, not recorded as involved!");
-          BBKillSet.set(ItRegId->second);
-          BBGen[ItRegId->second] = &(*II);
-        }
-        BBGen[ItCurRegId->second] = &(*II);
-      }
-    }
-
-    // If we restrict our analysis to basic block scope, conservatively add a
-    // dummy
-    // use for each generated value.
-    if (!ADRPMode && DummyOp && !MBB->succ_empty())
-      for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
-        if (BBGen[CurReg])
-          getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(DummyOp);
-  }
-}
-
-/// Reaching def core algorithm:
-/// while an Out has changed
-///    for each bb
-///       for each color
-///           In[bb][color] = U Out[bb.predecessors][color]
-///           insert reachableUses[bb][color] in each in[bb][color]
-///                 op.reachedUses
-///
-///           Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(MachineFunction *MF,
-                                 InstrToInstrs *ColorOpToReachedUses,
-                                 BlockToSetOfInstrsPerColor &In,
-                                 BlockToSetOfInstrsPerColor &Out,
-                                 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
-                                 BlockToSetOfInstrsPerColor &ReachableUses,
-                                 unsigned NbReg) {
-  bool HasChanged;
-  do {
-    HasChanged = false;
-    for (MachineFunction::const_iterator IMBB = MF->begin(),
-                                         IMBBEnd = MF->end();
-         IMBB != IMBBEnd; ++IMBB) {
-      const MachineBasicBlock *MBB = &(*IMBB);
-      unsigned CurReg;
-      for (CurReg = 0; CurReg < NbReg; ++CurReg) {
-        SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
-        SetOfMachineInstr &BBReachableUses =
-            getSet(ReachableUses, MBB, CurReg, NbReg);
-        SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
-        unsigned Size = BBOutSet.size();
-        //   In[bb][color] = U Out[bb.predecessors][color]
-        for (MachineBasicBlock::const_pred_iterator
-                 PredMBB = MBB->pred_begin(),
-                 EndPredMBB = MBB->pred_end();
-             PredMBB != EndPredMBB; ++PredMBB) {
-          SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
-          BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
-        }
-        //   insert reachableUses[bb][color] in each in[bb][color] op.reachedses
-        for (const MachineInstr *MI: BBInSet) {
-          SetOfMachineInstr &OpReachedUses =
-              getUses(ColorOpToReachedUses, CurReg, MI);
-          OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
-        }
-        //           Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-        if (!Kill[MBB].test(CurReg))
-          BBOutSet.insert(BBInSet.begin(), BBInSet.end());
-        if (Gen[MBB][CurReg])
-          BBOutSet.insert(Gen[MBB][CurReg]);
-        HasChanged |= BBOutSet.size() != Size;
-      }
-    }
-  } while (HasChanged);
-}
-
-/// Release all memory dynamically allocated during the reaching
-/// definition algorithm.
-static void finitReachingDef(BlockToSetOfInstrsPerColor &In,
-                             BlockToSetOfInstrsPerColor &Out,
-                             BlockToInstrPerColor &Gen,
-                             BlockToSetOfInstrsPerColor &ReachableUses) {
-  for (BlockToSetOfInstrsPerColor::const_iterator IT = Out.begin(),
-                                                  End = Out.end();
-       IT != End; ++IT)
-    delete[] IT->second;
-  for (BlockToSetOfInstrsPerColor::const_iterator IT = In.begin(),
-                                                  End = In.end();
-       IT != End; ++IT)
-    delete[] IT->second;
-  for (BlockToSetOfInstrsPerColor::const_iterator IT = ReachableUses.begin(),
-                                                  End = ReachableUses.end();
-       IT != End; ++IT)
-    delete[] IT->second;
-  for (BlockToInstrPerColor::const_iterator IT = Gen.begin(), End = Gen.end();
-       IT != End; ++IT)
-    delete[] IT->second;
-}
-
-/// Reaching definiton algorithm.
-/// \param MF function on which the algorithm will operate.
-/// \param[out] ColorOpToReachedUses will contain the result of the reaching
-/// def algorithm.
-/// \param ADRPMode specify whether the reaching def algorithm should be tuned
-/// for ADRP optimization. \see initReachingDef for more details.
-/// \param DummyOp if not NULL, the algorithm will work at
-/// basic block scope and will set for every exposed defintion a use to
-/// @p DummyOp.
-/// \pre ColorOpToReachedUses is an array of at least number of registers of
-/// InstrToInstrs.
-static void reachingDef(MachineFunction *MF,
-                        InstrToInstrs *ColorOpToReachedUses,
-                        const MapRegToId &RegToId, bool ADRPMode = false,
-                        const MachineInstr *DummyOp = NULL) {
-  // structures:
-  // For each basic block.
-  // Out: a set per color of definitions that reach the
-  //      out boundary of this block.
-  // In: Same as Out but for in boundary.
-  // Gen: generated color in this block (one operation per color).
-  // Kill: register set of killed color in this block.
-  // ReachableUses: a set per color of uses (operation) reachable
-  //                for "In" definitions.
-  BlockToSetOfInstrsPerColor Out, In, ReachableUses;
-  BlockToInstrPerColor Gen;
-  BlockToRegSet Kill;
-
-  // Initialize Gen, kill and reachableUses.
-  initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
-                  DummyOp, ADRPMode);
-
-  // Algo.
-  if (!DummyOp)
-    reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
-                         ReachableUses, RegToId.size());
-
-  // finit.
-  finitReachingDef(In, Out, Gen, ReachableUses);
-}
-
-#ifndef NDEBUG
-/// print the result of the reaching definition algorithm.
-static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
-                             unsigned NbReg, const TargetRegisterInfo *TRI,
-                             const MapIdToReg &IdToReg) {
-  unsigned CurReg;
-  for (CurReg = 0; CurReg < NbReg; ++CurReg) {
-    if (ColorOpToReachedUses[CurReg].empty())
-      continue;
-    DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
-
-    InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin();
-    InstrToInstrs::const_iterator DefsItEnd =
-        ColorOpToReachedUses[CurReg].end();
-    for (; DefsIt != DefsItEnd; ++DefsIt) {
-      DEBUG(dbgs() << "Def:\n");
-      DEBUG(DefsIt->first->print(dbgs()));
-      DEBUG(dbgs() << "Reachable uses:\n");
-      for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(),
-                                             UsesItEnd = DefsIt->second.end();
-           UsesIt != UsesItEnd; ++UsesIt) {
-        DEBUG((*UsesIt)->print(dbgs()));
-      }
-    }
-  }
-}
-#endif // NDEBUG
-
-/// Answer the following question: Can Def be one of the definition
-/// involved in a part of a LOH?
-static bool canDefBePartOfLOH(const MachineInstr *Def) {
-  unsigned Opc = Def->getOpcode();
-  // Accept ADRP, ADDLow and LOADGot.
-  switch (Opc) {
-  default:
-    return false;
-  case ARM64::ADRP:
-    return true;
-  case ARM64::ADDXri:
-    // Check immediate to see if the immediate is an address.
-    switch (Def->getOperand(2).getType()) {
-    default:
-      return false;
-    case MachineOperand::MO_GlobalAddress:
-    case MachineOperand::MO_JumpTableIndex:
-    case MachineOperand::MO_ConstantPoolIndex:
-    case MachineOperand::MO_BlockAddress:
-      return true;
-    }
-  case ARM64::LDRXui:
-    // Check immediate to see if the immediate is an address.
-    switch (Def->getOperand(2).getType()) {
-    default:
-      return false;
-    case MachineOperand::MO_GlobalAddress:
-      return true;
-    }
-  }
-  // Unreachable.
-  return false;
-}
-
-/// Check whether the given instruction can the end of a LOH chain involving a
-/// store.
-static bool isCandidateStore(const MachineInstr *Instr) {
-  switch (Instr->getOpcode()) {
-  default:
-    return false;
-  case ARM64::STRBui:
-  case ARM64::STRHui:
-  case ARM64::STRWui:
-  case ARM64::STRXui:
-  case ARM64::STRSui:
-  case ARM64::STRDui:
-  case ARM64::STRQui:
-    // In case we have str xA, [xA, #imm], this is two different uses
-    // of xA and we cannot fold, otherwise the xA stored may be wrong,
-    // even if #imm == 0.
-    if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
-      return true;
-  }
-  return false;
-}
-
-/// Given the result of a reaching defintion algorithm in ColorOpToReachedUses,
-/// Build the Use to Defs information and filter out obvious non-LOH candidates.
-/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
-/// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
-/// i.e., no simple chain.
-/// \param ADRPMode -- \see initReachingDef.
-static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
-                              const InstrToInstrs *ColorOpToReachedUses,
-                              const MapRegToId &RegToId,
-                              bool ADRPMode = false) {
-
-  SetOfMachineInstr NotCandidate;
-  unsigned NbReg = RegToId.size();
-  MapRegToId::const_iterator EndIt = RegToId.end();
-  for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
-    // If this color is never defined, continue.
-    if (ColorOpToReachedUses[CurReg].empty())
-      continue;
-
-    InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin();
-    InstrToInstrs::const_iterator DefsItEnd =
-        ColorOpToReachedUses[CurReg].end();
-    for (; DefsIt != DefsItEnd; ++DefsIt) {
-      for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(),
-                                             UsesItEnd = DefsIt->second.end();
-           UsesIt != UsesItEnd; ++UsesIt) {
-        const MachineInstr *Def = DefsIt->first;
-        MapRegToId::const_iterator It;
-        // if all the reaching defs are not adrp, this use will not be
-        // simplifiable.
-        if ((ADRPMode && Def->getOpcode() != ARM64::ADRP) ||
-            (!ADRPMode && !canDefBePartOfLOH(Def)) ||
-            (!ADRPMode && isCandidateStore(*UsesIt) &&
-             // store are LOH candidate iff the end of the chain is used as
-             // base.
-             ((It = RegToId.find((*UsesIt)->getOperand(1).getReg())) == EndIt ||
-              It->second != CurReg))) {
-          NotCandidate.insert(*UsesIt);
-          continue;
-        }
-        // Do not consider self reaching as a simplifiable case for ADRP.
-        if (!ADRPMode || *UsesIt != DefsIt->first) {
-          UseToReachingDefs[*UsesIt].insert(DefsIt->first);
-          // If UsesIt has several reaching definitions, it is not
-          // candidate for simplificaton in non-ADRPMode.
-          if (!ADRPMode && UseToReachingDefs[*UsesIt].size() > 1)
-            NotCandidate.insert(*UsesIt);
-        }
-      }
-    }
-  }
-  for (const MachineInstr *Elem : NotCandidate) {
-    DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
-    // It would have been better if we could just remove the entry
-    // from the map.  Because of that, we have to filter the garbage
-    // (second.empty) in the subsequence analysis.
-    UseToReachingDefs[Elem].clear();
-  }
-}
-
-/// Based on the use to defs information (in ADRPMode), compute the
-/// opportunities of LOH ADRP-related.
-static void computeADRP(const InstrToInstrs &UseToDefs,
-                        ARM64FunctionInfo &ARM64FI,
-                        const MachineDominatorTree *MDT) {
-  DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
-  for (const auto &Entry: UseToDefs) {
-    unsigned Size = Entry.second.size();
-    if (Size == 0)
-      continue;
-    if (Size == 1) {
-      const MachineInstr *L2 = *Entry.second.begin();
-      const MachineInstr *L1 = Entry.first;
-      if (!MDT->dominates(L2, L1)) {
-        DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
-                     << '\n');
-        continue;
-      }
-      DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
-      SmallVector<const MachineInstr *, 2> Args;
-      Args.push_back(L2);
-      Args.push_back(L1);
-      ARM64FI.addLOHDirective(MCLOH_AdrpAdrp, Args);
-      ++NumADRPSimpleCandidate;
-    }
-#ifdef DEBUG
-    else if (Size == 2)
-      ++NumADRPComplexCandidate2;
-    else if (Size == 3)
-      ++NumADRPComplexCandidate3;
-    else
-      ++NumADRPComplexCandidateOther;
-#endif
-    // if Size < 1, the use should have been removed from the candidates
-    assert(Size >= 1 && "No reaching defs for that use!");
-  }
-}
-
-/// Check whether the given instruction can be the end of a LOH chain
-/// involving a load.
-static bool isCandidateLoad(const MachineInstr *Instr) {
-  switch (Instr->getOpcode()) {
-  default:
-    return false;
-  case ARM64::LDRSBWui:
-  case ARM64::LDRSBXui:
-  case ARM64::LDRSHWui:
-  case ARM64::LDRSHXui:
-  case ARM64::LDRSWui:
-  case ARM64::LDRBui:
-  case ARM64::LDRHui:
-  case ARM64::LDRWui:
-  case ARM64::LDRXui:
-  case ARM64::LDRSui:
-  case ARM64::LDRDui:
-  case ARM64::LDRQui:
-    if (Instr->getOperand(2).getTargetFlags() & ARM64II::MO_GOT)
-      return false;
-    return true;
-  }
-  // Unreachable.
-  return false;
-}
-
-/// Check whether the given instruction can load a litteral.
-static bool supportLoadFromLiteral(const MachineInstr *Instr) {
-  switch (Instr->getOpcode()) {
-  default:
-    return false;
-  case ARM64::LDRSWui:
-  case ARM64::LDRWui:
-  case ARM64::LDRXui:
-  case ARM64::LDRSui:
-  case ARM64::LDRDui:
-  case ARM64::LDRQui:
-    return true;
-  }
-  // Unreachable.
-  return false;
-}
-
-/// Check whether the given instruction is a LOH candidate.
-/// \param UseToDefs is used to check that Instr is at the end of LOH supported
-/// chain.
-/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
-/// already been filtered out.
-static bool isCandidate(const MachineInstr *Instr,
-                        const InstrToInstrs &UseToDefs,
-                        const MachineDominatorTree *MDT) {
-  if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
-    return false;
-
-  const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
-  if (Def->getOpcode() != ARM64::ADRP) {
-    // At this point, Def is ADDXri or LDRXui of the right type of
-    // symbol, because we filtered out the uses that were not defined
-    // by these kind of instructions (+ ADRP).
-
-    // Check if this forms a simple chain: each intermediate node must
-    // dominates the next one.
-    if (!MDT->dominates(Def, Instr))
-      return false;
-    // Move one node up in the simple chain.
-    if (UseToDefs.find(Def) == UseToDefs.end()
-                               // The map may contain garbage we have to ignore.
-        ||
-        UseToDefs.find(Def)->second.empty())
-      return false;
-    Instr = Def;
-    Def = *UseToDefs.find(Def)->second.begin();
-  }
-  // Check if we reached the top of the simple chain:
-  // - top is ADRP.
-  // - check the simple chain property: each intermediate node must
-  // dominates the next one.
-  if (Def->getOpcode() == ARM64::ADRP)
-    return MDT->dominates(Def, Instr);
-  return false;
-}
-
-static bool registerADRCandidate(const MachineInstr *Use,
-                                 const InstrToInstrs &UseToDefs,
-                                 const InstrToInstrs *DefsPerColorToUses,
-                                 ARM64FunctionInfo &ARM64FI,
-                                 SetOfMachineInstr *InvolvedInLOHs,
-                                 const MapRegToId &RegToId) {
-  // Look for opportunities to turn ADRP -> ADD or
-  // ADRP -> LDR GOTPAGEOFF into ADR.
-  // If ADRP has more than one use. Give up.
-  if (Use->getOpcode() != ARM64::ADDXri &&
-      (Use->getOpcode() != ARM64::LDRXui ||
-       !(Use->getOperand(2).getTargetFlags() & ARM64II::MO_GOT)))
-    return false;
-  InstrToInstrs::const_iterator It = UseToDefs.find(Use);
-  // The map may contain garbage that we need to ignore.
-  if (It == UseToDefs.end() || It->second.empty())
-    return false;
-  const MachineInstr *Def = *It->second.begin();
-  if (Def->getOpcode() != ARM64::ADRP)
-    return false;
-  // Check the number of users of ADRP.
-  const SetOfMachineInstr *Users =
-      getUses(DefsPerColorToUses,
-              RegToId.find(Def->getOperand(0).getReg())->second, Def);
-  if (Users->size() > 1) {
-    ++NumADRComplexCandidate;
-    return false;
-  }
-  ++NumADRSimpleCandidate;
-  assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Def)) &&
-         "ADRP already involved in LOH.");
-  assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Use)) &&
-         "ADD already involved in LOH.");
-  DEBUG(dbgs() << "Record AdrpAdd\n" << *Def << '\n' << *Use << '\n');
-
-  SmallVector<const MachineInstr *, 2> Args;
-  Args.push_back(Def);
-  Args.push_back(Use);
-
-  ARM64FI.addLOHDirective(Use->getOpcode() == ARM64::ADDXri ? MCLOH_AdrpAdd
-                                                            : MCLOH_AdrpLdrGot,
-                          Args);
-  return true;
-}
-
-/// Based on the use to defs information (in non-ADRPMode), compute the
-/// opportunities of LOH non-ADRP-related
-static void computeOthers(const InstrToInstrs &UseToDefs,
-                          const InstrToInstrs *DefsPerColorToUses,
-                          ARM64FunctionInfo &ARM64FI, const MapRegToId &RegToId,
-                          const MachineDominatorTree *MDT) {
-  SetOfMachineInstr *InvolvedInLOHs = NULL;
-#ifdef DEBUG
-  SetOfMachineInstr InvolvedInLOHsStorage;
-  InvolvedInLOHs = &InvolvedInLOHsStorage;
-#endif // DEBUG
-  DEBUG(dbgs() << "*** Compute LOH for Others\n");
-  // ADRP -> ADD/LDR -> LDR/STR pattern.
-  // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
-
-  // FIXME: When the statistics are not important,
-  // This initial filtering loop can be merged into the next loop.
-  // Currently, we didn't do it to have the same code for both DEBUG and
-  // NDEBUG builds. Indeed, the iterator of the second loop would need
-  // to be changed.
-  SetOfMachineInstr PotentialCandidates;
-  SetOfMachineInstr PotentialADROpportunities;
-  for (InstrToInstrs::const_iterator UseIt = UseToDefs.begin(),
-                                     EndUseIt = UseToDefs.end();
-       UseIt != EndUseIt; ++UseIt) {
-    // If no definition is available, this is a non candidate.
-    if (UseIt->second.empty())
-      continue;
-    // Keep only instructions that are load or store and at the end of
-    // a ADRP -> ADD/LDR/Nothing chain.
-    // We already filtered out the no-chain cases.
-    if (!isCandidate(UseIt->first, UseToDefs, MDT)) {
-      PotentialADROpportunities.insert(UseIt->first);
-      continue;
-    }
-    PotentialCandidates.insert(UseIt->first);
-  }
-
-  // Make the following distinctions for statistics as the linker does
-  // know how to decode instructions:
-  // - ADD/LDR/Nothing make there different patterns.
-  // - LDR/STR make two different patterns.
-  // Hence, 6 - 1 base patterns.
-  // (because ADRP-> Nothing -> STR is not simplifiable)
-
-  // The linker is only able to have a simple semantic, i.e., if pattern A
-  // do B.
-  // However, we want to see the opportunity we may miss if we were able to
-  // catch more complex cases.
-
-  // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
-  // A potential candidate becomes a candidate, if its current immediate
-  // operand is zero and all nodes of the chain have respectively only one user
-  SetOfMachineInstr::const_iterator CandidateIt, EndCandidateIt;
-#ifdef DEBUG
-  SetOfMachineInstr DefsOfPotentialCandidates;
-#endif
-  for (CandidateIt = PotentialCandidates.begin(),
-      EndCandidateIt = PotentialCandidates.end();
-       CandidateIt != EndCandidateIt; ++CandidateIt) {
-    const MachineInstr *Candidate = *CandidateIt;
-    // Get the definition of the candidate i.e., ADD or LDR.
-    const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
-    // Record the elements of the chain.
-    const MachineInstr *L1 = Def;
-    const MachineInstr *L2 = NULL;
-    unsigned ImmediateDefOpc = Def->getOpcode();
-    if (Def->getOpcode() != ARM64::ADRP) {
-      // Check the number of users of this node.
-      const SetOfMachineInstr *Users =
-          getUses(DefsPerColorToUses,
-                  RegToId.find(Def->getOperand(0).getReg())->second, Def);
-      if (Users->size() > 1) {
-#ifdef DEBUG
-        // if all the uses of this def are in potential candidate, this is
-        // a complex candidate of level 2.
-        SetOfMachineInstr::const_iterator UseIt = Users->begin();
-        SetOfMachineInstr::const_iterator EndUseIt = Users->end();
-        for (; UseIt != EndUseIt; ++UseIt) {
-          if (!PotentialCandidates.count(*UseIt)) {
-            ++NumTooCplxLvl2;
-            break;
-          }
-        }
-        if (UseIt == EndUseIt)
-          ++NumCplxLvl2;
-#endif // DEBUG
-        PotentialADROpportunities.insert(Def);
-        continue;
-      }
-      L2 = Def;
-      Def = *UseToDefs.find(Def)->second.begin();
-      L1 = Def;
-    } // else the element in the middle of the chain is nothing, thus
-      // Def already contains the first element of the chain.
-
-    // Check the number of users of the first node in the chain, i.e., ADRP
-    const SetOfMachineInstr *Users =
-        getUses(DefsPerColorToUses,
-                RegToId.find(Def->getOperand(0).getReg())->second, Def);
-    if (Users->size() > 1) {
-#ifdef DEBUG
-      // if all the uses of this def are in the defs of the potential candidate,
-      // this is a complex candidate of level 1
-      if (DefsOfPotentialCandidates.empty()) {
-        // lazy init
-        DefsOfPotentialCandidates = PotentialCandidates;
-        for (const MachineInstr *Candidate : PotentialCandidates) {
-          if (!UseToDefs.find(Candidate)->second.empty())
-            DefsOfPotentialCandidates.insert(
-                *UseToDefs.find(Candidate)->second.begin());
-        }
-      }
-      bool Found = false;
-      for (auto &Use: *Users) {
-        if (!DefsOfPotentialCandidates.count(Use)) {
-          ++NumTooCplxLvl1;
-          Found = true;
-          break;
-        }
-      }
-      if (!Found)
-        ++NumCplxLvl1;
-#endif // DEBUG
-      continue;
-    }
-
-    bool IsL2Add = (ImmediateDefOpc == ARM64::ADDXri);
-    // If the chain is three instructions long and ldr is the second element,
-    // then this ldr must load form GOT, otherwise this is not a correct chain.
-    if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != ARM64II::MO_GOT)
-      continue;
-    SmallVector<const MachineInstr *, 3> Args;
-    MCLOHType Kind;
-    if (isCandidateLoad(Candidate)) {
-      if (L2 == NULL) {
-        // At this point, the candidate LOH indicates that the ldr instruction
-        // may use a direct access to the symbol. There is not such encoding
-        // for loads of byte and half.
-        if (!supportLoadFromLiteral(Candidate))
-          continue;
-
-        DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
-                     << '\n');
-        Kind = MCLOH_AdrpLdr;
-        Args.push_back(L1);
-        Args.push_back(Candidate);
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
-               "L1 already involved in LOH.");
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
-               "Candidate already involved in LOH.");
-        ++NumADRPToLDR;
-      } else {
-        DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
-                     << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
-                     << '\n');
-
-        Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
-        Args.push_back(L1);
-        Args.push_back(L2);
-        Args.push_back(Candidate);
-
-        PotentialADROpportunities.remove(L2);
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
-               "L1 already involved in LOH.");
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
-               "L2 already involved in LOH.");
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
-               "Candidate already involved in LOH.");
-#ifdef DEBUG
-        // get the immediate of the load
-        if (Candidate->getOperand(2).getImm() == 0)
-          if (ImmediateDefOpc == ARM64::ADDXri)
-            ++NumADDToLDR;
-          else
-            ++NumLDRToLDR;
-        else if (ImmediateDefOpc == ARM64::ADDXri)
-          ++NumADDToLDRWithImm;
-        else
-          ++NumLDRToLDRWithImm;
-#endif // DEBUG
-      }
-    } else {
-      if (ImmediateDefOpc == ARM64::ADRP)
-        continue;
-      else {
-
-        DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
-                     << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
-                     << '\n');
-
-        Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
-        Args.push_back(L1);
-        Args.push_back(L2);
-        Args.push_back(Candidate);
-
-        PotentialADROpportunities.remove(L2);
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
-               "L1 already involved in LOH.");
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
-               "L2 already involved in LOH.");
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
-               "Candidate already involved in LOH.");
-#ifdef DEBUG
-        // get the immediate of the store
-        if (Candidate->getOperand(2).getImm() == 0)
-          if (ImmediateDefOpc == ARM64::ADDXri)
-            ++NumADDToSTR;
-          else
-            ++NumLDRToSTR;
-        else if (ImmediateDefOpc == ARM64::ADDXri)
-          ++NumADDToSTRWithImm;
-        else
-          ++NumLDRToSTRWithImm;
-#endif // DEBUG
-      }
-    }
-    ARM64FI.addLOHDirective(Kind, Args);
-  }
-
-  // Now, we grabbed all the big patterns, check ADR opportunities.
-  for (const MachineInstr *Candidate: PotentialADROpportunities)
-    registerADRCandidate(Candidate, UseToDefs, DefsPerColorToUses, ARM64FI,
-                         InvolvedInLOHs, RegToId);
-}
-
-/// Look for every register defined by potential LOHs candidates.
-/// Map these registers with dense id in @p RegToId and vice-versa in
-/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
-static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
-                               MapIdToReg &IdToReg,
-                               const TargetRegisterInfo *TRI) {
-  unsigned CurRegId = 0;
-  if (!PreCollectRegister) {
-    unsigned NbReg = TRI->getNumRegs();
-    for (; CurRegId < NbReg; ++CurRegId) {
-      RegToId[CurRegId] = CurRegId;
-      DEBUG(IdToReg.push_back(CurRegId));
-      DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
-    }
-    return;
-  }
-
-  DEBUG(dbgs() << "** Collect Involved Register\n");
-  for (MachineFunction::const_iterator IMBB = MF.begin(), IMBBEnd = MF.end();
-       IMBB != IMBBEnd; ++IMBB)
-    for (MachineBasicBlock::const_iterator II = IMBB->begin(),
-                                           IEnd = IMBB->end();
-         II != IEnd; ++II) {
-
-      if (!canDefBePartOfLOH(II))
-        continue;
-
-      // Process defs
-      for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
-                                            IOEnd = II->operands_end();
-           IO != IOEnd; ++IO) {
-        if (!IO->isReg() || !IO->isDef())
-          continue;
-        unsigned CurReg = IO->getReg();
-        for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
-          if (RegToId.find(*AI) == RegToId.end()) {
-            DEBUG(IdToReg.push_back(*AI);
-                  assert(IdToReg[CurRegId] == *AI &&
-                         "Reg index mismatches insertion index."));
-            RegToId[*AI] = CurRegId++;
-            DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
-          }
-      }
-    }
-}
-
-bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &Fn) {
-  const TargetMachine &TM = Fn.getTarget();
-  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-  const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
-
-  MapRegToId RegToId;
-  MapIdToReg IdToReg;
-  ARM64FunctionInfo *ARM64FI = Fn.getInfo<ARM64FunctionInfo>();
-  assert(ARM64FI && "No MachineFunctionInfo for this function!");
-
-  DEBUG(dbgs() << "Looking for LOH in " << Fn.getName() << '\n');
-
-  collectInvolvedReg(Fn, RegToId, IdToReg, TRI);
-  if (RegToId.empty())
-    return false;
-
-  MachineInstr *DummyOp = NULL;
-  if (BasicBlockScopeOnly) {
-    const ARM64InstrInfo *TII =
-        static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
-    // For local analysis, create a dummy operation to record uses that are not
-    // local.
-    DummyOp = Fn.CreateMachineInstr(TII->get(ARM64::COPY), DebugLoc());
-  }
-
-  unsigned NbReg = RegToId.size();
-  bool Modified = false;
-
-  // Start with ADRP.
-  InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
-
-  // Compute the reaching def in ADRP mode, meaning ADRP definitions
-  // are first considered as uses.
-  reachingDef(&Fn, ColorOpToReachedUses, RegToId, true, DummyOp);
-  DEBUG(dbgs() << "ADRP reaching defs\n");
-  DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
-
-  // Translate the definition to uses map into a use to definitions map to ease
-  // statistic computation.
-  InstrToInstrs ADRPToReachingDefs;
-  reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
-
-  // Compute LOH for ADRP.
-  computeADRP(ADRPToReachingDefs, *ARM64FI, MDT);
-  delete[] ColorOpToReachedUses;
-
-  // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
-  ColorOpToReachedUses = new InstrToInstrs[NbReg];
-
-  // first perform a regular reaching def analysis.
-  reachingDef(&Fn, ColorOpToReachedUses, RegToId, false, DummyOp);
-  DEBUG(dbgs() << "All reaching defs\n");
-  DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
-
-  // Turn that into a use to defs to ease statistic computation.
-  InstrToInstrs UsesToReachingDefs;
-  reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
-
-  // Compute other than AdrpAdrp LOH.
-  computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *ARM64FI, RegToId,
-                MDT);
-  delete[] ColorOpToReachedUses;
-
-  if (BasicBlockScopeOnly)
-    Fn.DeleteMachineInstr(DummyOp);
-
-  return Modified;
-}
-
-/// createARM64CollectLOHPass - returns an instance of the Statistic for
-/// linker optimization pass.
-FunctionPass *llvm::createARM64CollectLOHPass() {
-  return new ARM64CollectLOH();
-}
diff --git a/lib/Target/ARM64/ARM64ConditionalCompares.cpp b/lib/Target/ARM64/ARM64ConditionalCompares.cpp
deleted file mode 100644
index b495afa..0000000
--- a/lib/Target/ARM64/ARM64ConditionalCompares.cpp
+++ /dev/null
@@ -1,918 +0,0 @@
-//===-- ARM64ConditionalCompares.cpp --- CCMP formation for ARM64 ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ARM64ConditionalCompares pass which reduces
-// branching and code size by using the conditional compare instructions CCMP,
-// CCMN, and FCMP.
-//
-// The CFG transformations for forming conditional compares are very similar to
-// if-conversion, and this pass should run immediately before the early
-// if-conversion pass.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-ccmp"
-#include "ARM64.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SparseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineTraceMetrics.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-using namespace llvm;
-
-// Absolute maximum number of instructions allowed per speculated block.
-// This bypasses all other heuristics, so it should be set fairly high.
-static cl::opt<unsigned> BlockInstrLimit(
-    "arm64-ccmp-limit", cl::init(30), cl::Hidden,
-    cl::desc("Maximum number of instructions per speculated block."));
-
-// Stress testing mode - disable heuristics.
-static cl::opt<bool> Stress("arm64-stress-ccmp", cl::Hidden,
-                            cl::desc("Turn all knobs to 11"));
-
-STATISTIC(NumConsidered, "Number of ccmps considered");
-STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
-STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
-STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
-STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
-STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
-STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
-STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
-STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
-STATISTIC(NumMultCPSRUses, "Number of ccmps rejected (CPSR used)");
-STATISTIC(NumUnknCPSRDefs, "Number of ccmps rejected (CPSR def unknown)");
-
-STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
-
-STATISTIC(NumConverted, "Number of ccmp instructions created");
-STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
-
-//===----------------------------------------------------------------------===//
-//                                 SSACCmpConv
-//===----------------------------------------------------------------------===//
-//
-// The SSACCmpConv class performs ccmp-conversion on SSA form machine code
-// after determining if it is possible. The class contains no heuristics;
-// external code should be used to determine when ccmp-conversion is a good
-// idea.
-//
-// CCmp-formation works on a CFG representing chained conditions, typically
-// from C's short-circuit || and && operators:
-//
-//   From:         Head            To:         Head
-//                 / |                         CmpBB
-//                /  |                         / |
-//               |  CmpBB                     /  |
-//               |  / |                    Tail  |
-//               | /  |                      |   |
-//              Tail  |                      |   |
-//                |   |                      |   |
-//               ... ...                    ... ...
-//
-// The Head block is terminated by a br.cond instruction, and the CmpBB block
-// contains compare + br.cond. Tail must be a successor of both.
-//
-// The cmp-conversion turns the compare instruction in CmpBB into a conditional
-// compare, and merges CmpBB into Head, speculatively executing its
-// instructions. The ARM64 conditional compare instructions have an immediate
-// operand that specifies the NZCV flag values when the condition is false and
-// the compare isn't executed. This makes it possible to chain compares with
-// different condition codes.
-//
-// Example:
-//
-//    if (a == 5 || b == 17)
-//      foo();
-//
-//    Head:
-//       cmp  w0, #5
-//       b.eq Tail
-//    CmpBB:
-//       cmp  w1, #17
-//       b.eq Tail
-//    ...
-//    Tail:
-//      bl _foo
-//
-//  Becomes:
-//
-//    Head:
-//       cmp  w0, #5
-//       ccmp w1, #17, 4, ne  ; 4 = nZcv
-//       b.eq Tail
-//    ...
-//    Tail:
-//      bl _foo
-//
-// The ccmp condition code is the one that would cause the Head terminator to
-// branch to CmpBB.
-//
-// FIXME: It should also be possible to speculate a block on the critical edge
-// between Head and Tail, just like if-converting a diamond.
-//
-// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
-
-namespace {
-class SSACCmpConv {
-  MachineFunction *MF;
-  const TargetInstrInfo *TII;
-  const TargetRegisterInfo *TRI;
-  MachineRegisterInfo *MRI;
-
-public:
-  /// The first block containing a conditional branch, dominating everything
-  /// else.
-  MachineBasicBlock *Head;
-
-  /// The block containing cmp+br.cond with a sucessor shared with Head.
-  MachineBasicBlock *CmpBB;
-
-  /// The common successor for Head and CmpBB.
-  MachineBasicBlock *Tail;
-
-  /// The compare instruction in CmpBB that can be converted to a ccmp.
-  MachineInstr *CmpMI;
-
-private:
-  /// The branch condition in Head as determined by AnalyzeBranch.
-  SmallVector<MachineOperand, 4> HeadCond;
-
-  /// The condition code that makes Head branch to CmpBB.
-  ARM64CC::CondCode HeadCmpBBCC;
-
-  /// The branch condition in CmpBB.
-  SmallVector<MachineOperand, 4> CmpBBCond;
-
-  /// The condition code that makes CmpBB branch to Tail.
-  ARM64CC::CondCode CmpBBTailCC;
-
-  /// Check if the Tail PHIs are trivially convertible.
-  bool trivialTailPHIs();
-
-  /// Remove CmpBB from the Tail PHIs.
-  void updateTailPHIs();
-
-  /// Check if an operand defining DstReg is dead.
-  bool isDeadDef(unsigned DstReg);
-
-  /// Find the compare instruction in MBB that controls the conditional branch.
-  /// Return NULL if a convertible instruction can't be found.
-  MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
-
-  /// Return true if all non-terminator instructions in MBB can be safely
-  /// speculated.
-  bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
-
-public:
-  /// runOnMachineFunction - Initialize per-function data structures.
-  void runOnMachineFunction(MachineFunction &MF) {
-    this->MF = &MF;
-    TII = MF.getTarget().getInstrInfo();
-    TRI = MF.getTarget().getRegisterInfo();
-    MRI = &MF.getRegInfo();
-  }
-
-  /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
-  /// internal state, and return true.
-  bool canConvert(MachineBasicBlock *MBB);
-
-  /// Cmo-convert the last block passed to canConvertCmp(), assuming
-  /// it is possible. Add any erased blocks to RemovedBlocks.
-  void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
-
-  /// Return the expected code size delta if the conversion into a
-  /// conditional compare is performed.
-  int expectedCodeSizeDelta() const;
-};
-} // end anonymous namespace
-
-// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
-// This means that no if-conversion is required when merging CmpBB into Head.
-bool SSACCmpConv::trivialTailPHIs() {
-  for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
-       I != E && I->isPHI(); ++I) {
-    unsigned HeadReg = 0, CmpBBReg = 0;
-    // PHI operands come in (VReg, MBB) pairs.
-    for (unsigned oi = 1, oe = I->getNumOperands(); oi != oe; oi += 2) {
-      MachineBasicBlock *MBB = I->getOperand(oi + 1).getMBB();
-      unsigned Reg = I->getOperand(oi).getReg();
-      if (MBB == Head) {
-        assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
-        HeadReg = Reg;
-      }
-      if (MBB == CmpBB) {
-        assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
-        CmpBBReg = Reg;
-      }
-    }
-    if (HeadReg != CmpBBReg)
-      return false;
-  }
-  return true;
-}
-
-// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
-// removing the CmpBB operands. The Head operands will be identical.
-void SSACCmpConv::updateTailPHIs() {
-  for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
-       I != E && I->isPHI(); ++I) {
-    // I is a PHI. It can have multiple entries for CmpBB.
-    for (unsigned oi = I->getNumOperands(); oi > 2; oi -= 2) {
-      // PHI operands are (Reg, MBB) at (oi-2, oi-1).
-      if (I->getOperand(oi - 1).getMBB() == CmpBB) {
-        I->RemoveOperand(oi - 1);
-        I->RemoveOperand(oi - 2);
-      }
-    }
-  }
-}
-
-// This pass runs before the ARM64DeadRegisterDefinitions pass, so compares are
-// still writing virtual registers without any uses.
-bool SSACCmpConv::isDeadDef(unsigned DstReg) {
-  // Writes to the zero register are dead.
-  if (DstReg == ARM64::WZR || DstReg == ARM64::XZR)
-    return true;
-  if (!TargetRegisterInfo::isVirtualRegister(DstReg))
-    return false;
-  // A virtual register def without any uses will be marked dead later, and
-  // eventually replaced by the zero register.
-  return MRI->use_nodbg_empty(DstReg);
-}
-
-// Parse a condition code returned by AnalyzeBranch, and compute the CondCode
-// corresponding to TBB.
-// Return
-static bool parseCond(ArrayRef<MachineOperand> Cond, ARM64CC::CondCode &CC) {
-  // A normal br.cond simply has the condition code.
-  if (Cond[0].getImm() != -1) {
-    assert(Cond.size() == 1 && "Unknown Cond array format");
-    CC = (ARM64CC::CondCode)(int)Cond[0].getImm();
-    return true;
-  }
-  // For tbz and cbz instruction, the opcode is next.
-  switch (Cond[1].getImm()) {
-  default:
-    // This includes tbz / tbnz branches which can't be converted to
-    // ccmp + br.cond.
-    return false;
-  case ARM64::CBZW:
-  case ARM64::CBZX:
-    assert(Cond.size() == 3 && "Unknown Cond array format");
-    CC = ARM64CC::EQ;
-    return true;
-  case ARM64::CBNZW:
-  case ARM64::CBNZX:
-    assert(Cond.size() == 3 && "Unknown Cond array format");
-    CC = ARM64CC::NE;
-    return true;
-  }
-}
-
-MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
-  MachineBasicBlock::iterator I = MBB->getFirstTerminator();
-  if (I == MBB->end())
-    return 0;
-  // The terminator must be controlled by the flags.
-  if (!I->readsRegister(ARM64::CPSR)) {
-    switch (I->getOpcode()) {
-    case ARM64::CBZW:
-    case ARM64::CBZX:
-    case ARM64::CBNZW:
-    case ARM64::CBNZX:
-      // These can be converted into a ccmp against #0.
-      return I;
-    }
-    ++NumCmpTermRejs;
-    DEBUG(dbgs() << "Flags not used by terminator: " << *I);
-    return 0;
-  }
-
-  // Now find the instruction controlling the terminator.
-  for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
-    --I;
-    assert(!I->isTerminator() && "Spurious terminator");
-    switch (I->getOpcode()) {
-    // cmp is an alias for subs with a dead destination register.
-    case ARM64::SUBSWri:
-    case ARM64::SUBSXri:
-    // cmn is an alias for adds with a dead destination register.
-    case ARM64::ADDSWri:
-    case ARM64::ADDSXri:
-      // Check that the immediate operand is within range, ccmp wants a uimm5.
-      // Rd = SUBSri Rn, imm, shift
-      if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
-        DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
-        ++NumImmRangeRejs;
-        return 0;
-      }
-    // Fall through.
-    case ARM64::SUBSWrr:
-    case ARM64::SUBSXrr:
-    case ARM64::ADDSWrr:
-    case ARM64::ADDSXrr:
-      if (isDeadDef(I->getOperand(0).getReg()))
-        return I;
-      DEBUG(dbgs() << "Can't convert compare with live destination: " << *I);
-      ++NumLiveDstRejs;
-      return 0;
-    case ARM64::FCMPSrr:
-    case ARM64::FCMPDrr:
-    case ARM64::FCMPESrr:
-    case ARM64::FCMPEDrr:
-      return I;
-    }
-
-    // Check for flag reads and clobbers.
-    MIOperands::PhysRegInfo PRI =
-        MIOperands(I).analyzePhysReg(ARM64::CPSR, TRI);
-
-    if (PRI.Reads) {
-      // The ccmp doesn't produce exactly the same flags as the original
-      // compare, so reject the transform if there are uses of the flags
-      // besides the terminators.
-      DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
-      ++NumMultCPSRUses;
-      return 0;
-    }
-
-    if (PRI.Clobbers) {
-      DEBUG(dbgs() << "Not convertible compare: " << *I);
-      ++NumUnknCPSRDefs;
-      return 0;
-    }
-  }
-  DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
-  return 0;
-}
-
-/// Determine if all the instructions in MBB can safely
-/// be speculated. The terminators are not considered.
-///
-/// Only CmpMI is allowed to clobber the flags.
-///
-bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
-                                     const MachineInstr *CmpMI) {
-  // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
-  // get right.
-  if (!MBB->livein_empty()) {
-    DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
-    return false;
-  }
-
-  unsigned InstrCount = 0;
-
-  // Check all instructions, except the terminators. It is assumed that
-  // terminators never have side effects or define any used register values.
-  for (MachineBasicBlock::iterator I = MBB->begin(),
-                                   E = MBB->getFirstTerminator();
-       I != E; ++I) {
-    if (I->isDebugValue())
-      continue;
-
-    if (++InstrCount > BlockInstrLimit && !Stress) {
-      DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
-                   << BlockInstrLimit << " instructions.\n");
-      return false;
-    }
-
-    // There shouldn't normally be any phis in a single-predecessor block.
-    if (I->isPHI()) {
-      DEBUG(dbgs() << "Can't hoist: " << *I);
-      return false;
-    }
-
-    // Don't speculate loads. Note that it may be possible and desirable to
-    // speculate GOT or constant pool loads that are guaranteed not to trap,
-    // but we don't support that for now.
-    if (I->mayLoad()) {
-      DEBUG(dbgs() << "Won't speculate load: " << *I);
-      return false;
-    }
-
-    // We never speculate stores, so an AA pointer isn't necessary.
-    bool DontMoveAcrossStore = true;
-    if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) {
-      DEBUG(dbgs() << "Can't speculate: " << *I);
-      return false;
-    }
-
-    // Only CmpMI is alowed to clobber the flags.
-    if (&*I != CmpMI && I->modifiesRegister(ARM64::CPSR, TRI)) {
-      DEBUG(dbgs() << "Clobbers flags: " << *I);
-      return false;
-    }
-  }
-  return true;
-}
-
-/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
-/// candidate for cmp-conversion. Fill out the internal state.
-///
-bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
-  Head = MBB;
-  Tail = CmpBB = 0;
-
-  if (Head->succ_size() != 2)
-    return false;
-  MachineBasicBlock *Succ0 = Head->succ_begin()[0];
-  MachineBasicBlock *Succ1 = Head->succ_begin()[1];
-
-  // CmpBB can only have a single predecessor. Tail is allowed many.
-  if (Succ0->pred_size() != 1)
-    std::swap(Succ0, Succ1);
-
-  // Succ0 is our candidate for CmpBB.
-  if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
-    return false;
-
-  CmpBB = Succ0;
-  Tail = Succ1;
-
-  if (!CmpBB->isSuccessor(Tail))
-    return false;
-
-  // The CFG topology checks out.
-  DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#"
-               << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n');
-  ++NumConsidered;
-
-  // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
-  //
-  // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
-  // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
-  // always be safe to sink the ccmp down to immediately before the CmpBB
-  // terminators.
-  if (!trivialTailPHIs()) {
-    DEBUG(dbgs() << "Can't handle phis in Tail.\n");
-    ++NumPhiRejs;
-    return false;
-  }
-
-  if (!Tail->livein_empty()) {
-    DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
-    ++NumPhysRejs;
-    return false;
-  }
-
-  // CmpBB should never have PHIs since Head is its only predecessor.
-  // FIXME: Clean them up if it happens.
-  if (!CmpBB->empty() && CmpBB->front().isPHI()) {
-    DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
-    ++NumPhi2Rejs;
-    return false;
-  }
-
-  if (!CmpBB->livein_empty()) {
-    DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
-    ++NumPhysRejs;
-    return false;
-  }
-
-  // The branch we're looking to eliminate must be analyzable.
-  HeadCond.clear();
-  MachineBasicBlock *TBB = 0, *FBB = 0;
-  if (TII->AnalyzeBranch(*Head, TBB, FBB, HeadCond)) {
-    DEBUG(dbgs() << "Head branch not analyzable.\n");
-    ++NumHeadBranchRejs;
-    return false;
-  }
-
-  // This is weird, probably some sort of degenerate CFG, or an edge to a
-  // landing pad.
-  if (!TBB || HeadCond.empty()) {
-    DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
-    ++NumHeadBranchRejs;
-    return false;
-  }
-
-  if (!parseCond(HeadCond, HeadCmpBBCC)) {
-    DEBUG(dbgs() << "Unsupported branch type on Head\n");
-    ++NumHeadBranchRejs;
-    return false;
-  }
-
-  // Make sure the branch direction is right.
-  if (TBB != CmpBB) {
-    assert(TBB == Tail && "Unexpected TBB");
-    HeadCmpBBCC = ARM64CC::getInvertedCondCode(HeadCmpBBCC);
-  }
-
-  CmpBBCond.clear();
-  TBB = FBB = 0;
-  if (TII->AnalyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
-    DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
-    ++NumCmpBranchRejs;
-    return false;
-  }
-
-  if (!TBB || CmpBBCond.empty()) {
-    DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
-    ++NumCmpBranchRejs;
-    return false;
-  }
-
-  if (!parseCond(CmpBBCond, CmpBBTailCC)) {
-    DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
-    ++NumCmpBranchRejs;
-    return false;
-  }
-
-  if (TBB != Tail)
-    CmpBBTailCC = ARM64CC::getInvertedCondCode(CmpBBTailCC);
-
-  DEBUG(dbgs() << "Head->CmpBB on " << ARM64CC::getCondCodeName(HeadCmpBBCC)
-               << ", CmpBB->Tail on " << ARM64CC::getCondCodeName(CmpBBTailCC)
-               << '\n');
-
-  CmpMI = findConvertibleCompare(CmpBB);
-  if (!CmpMI)
-    return false;
-
-  if (!canSpeculateInstrs(CmpBB, CmpMI)) {
-    ++NumSpeculateRejs;
-    return false;
-  }
-  return true;
-}
-
-void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
-  DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#"
-               << Head->getNumber() << ":\n" << *CmpBB);
-
-  // All CmpBB instructions are moved into Head, and CmpBB is deleted.
-  // Update the CFG first.
-  updateTailPHIs();
-  Head->removeSuccessor(CmpBB);
-  CmpBB->removeSuccessor(Tail);
-  Head->transferSuccessorsAndUpdatePHIs(CmpBB);
-  DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
-  TII->RemoveBranch(*Head);
-
-  // If the Head terminator was one of the cbz / tbz branches with built-in
-  // compare, we need to insert an explicit compare instruction in its place.
-  if (HeadCond[0].getImm() == -1) {
-    ++NumCompBranches;
-    unsigned Opc = 0;
-    switch (HeadCond[1].getImm()) {
-    case ARM64::CBZW:
-    case ARM64::CBNZW:
-      Opc = ARM64::SUBSWri;
-      break;
-    case ARM64::CBZX:
-    case ARM64::CBNZX:
-      Opc = ARM64::SUBSXri;
-      break;
-    default:
-      llvm_unreachable("Cannot convert Head branch");
-    }
-    const MCInstrDesc &MCID = TII->get(Opc);
-    // Create a dummy virtual register for the SUBS def.
-    unsigned DestReg =
-        MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
-    // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
-    BuildMI(*Head, Head->end(), TermDL, MCID)
-        .addReg(DestReg, RegState::Define | RegState::Dead)
-        .addOperand(HeadCond[2])
-        .addImm(0)
-        .addImm(0);
-    // SUBS uses the GPR*sp register classes.
-    MRI->constrainRegClass(HeadCond[2].getReg(),
-                           TII->getRegClass(MCID, 1, TRI, *MF));
-  }
-
-  Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
-
-  // Now replace CmpMI with a ccmp instruction that also considers the incoming
-  // flags.
-  unsigned Opc = 0;
-  unsigned FirstOp = 1;   // First CmpMI operand to copy.
-  bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
-  switch (CmpMI->getOpcode()) {
-  default:
-    llvm_unreachable("Unknown compare opcode");
-  case ARM64::SUBSWri:    Opc = ARM64::CCMPWi; break;
-  case ARM64::SUBSWrr:    Opc = ARM64::CCMPWr; break;
-  case ARM64::SUBSXri:    Opc = ARM64::CCMPXi; break;
-  case ARM64::SUBSXrr:    Opc = ARM64::CCMPXr; break;
-  case ARM64::ADDSWri:    Opc = ARM64::CCMNWi; break;
-  case ARM64::ADDSWrr:    Opc = ARM64::CCMNWr; break;
-  case ARM64::ADDSXri:    Opc = ARM64::CCMNXi; break;
-  case ARM64::ADDSXrr:    Opc = ARM64::CCMNXr; break;
-  case ARM64::FCMPSrr:    Opc = ARM64::FCCMPSrr; FirstOp = 0; break;
-  case ARM64::FCMPDrr:    Opc = ARM64::FCCMPDrr; FirstOp = 0; break;
-  case ARM64::FCMPESrr:   Opc = ARM64::FCCMPESrr; FirstOp = 0; break;
-  case ARM64::FCMPEDrr:   Opc = ARM64::FCCMPEDrr; FirstOp = 0; break;
-  case ARM64::CBZW:
-  case ARM64::CBNZW:
-    Opc = ARM64::CCMPWi;
-    FirstOp = 0;
-    isZBranch = true;
-    break;
-  case ARM64::CBZX:
-  case ARM64::CBNZX:
-    Opc = ARM64::CCMPXi;
-    FirstOp = 0;
-    isZBranch = true;
-    break;
-  }
-
-  // The ccmp instruction should set the flags according to the comparison when
-  // Head would have branched to CmpBB.
-  // The NZCV immediate operand should provide flags for the case where Head
-  // would have branched to Tail. These flags should cause the new Head
-  // terminator to branch to tail.
-  unsigned NZCV = ARM64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
-  const MCInstrDesc &MCID = TII->get(Opc);
-  MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
-                         TII->getRegClass(MCID, 0, TRI, *MF));
-  if (CmpMI->getOperand(FirstOp + 1).isReg())
-    MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
-                           TII->getRegClass(MCID, 1, TRI, *MF));
-  MachineInstrBuilder MIB =
-      BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
-          .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn
-  if (isZBranch)
-    MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
-  else
-    MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
-  MIB.addImm(NZCV).addImm(HeadCmpBBCC);
-
-  // If CmpMI was a terminator, we need a new conditional branch to replace it.
-  // This now becomes a Head terminator.
-  if (isZBranch) {
-    bool isNZ = CmpMI->getOpcode() == ARM64::CBNZW ||
-                CmpMI->getOpcode() == ARM64::CBNZX;
-    BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(ARM64::Bcc))
-        .addImm(isNZ ? ARM64CC::NE : ARM64CC::EQ)
-        .addOperand(CmpMI->getOperand(1)); // Branch target.
-  }
-  CmpMI->eraseFromParent();
-  Head->updateTerminator();
-
-  RemovedBlocks.push_back(CmpBB);
-  CmpBB->eraseFromParent();
-  DEBUG(dbgs() << "Result:\n" << *Head);
-  ++NumConverted;
-}
-
-int SSACCmpConv::expectedCodeSizeDelta() const {
-  int delta = 0;
-  // If the Head terminator was one of the cbz / tbz branches with built-in
-  // compare, we need to insert an explicit compare instruction in its place
-  // plus a branch instruction.
-  if (HeadCond[0].getImm() == -1) {
-    switch (HeadCond[1].getImm()) {
-    case ARM64::CBZW:
-    case ARM64::CBNZW:
-    case ARM64::CBZX:
-    case ARM64::CBNZX:
-      // Therefore delta += 1
-      delta = 1;
-      break;
-    default:
-      llvm_unreachable("Cannot convert Head branch");
-    }
-  }
-  // If the Cmp terminator was one of the cbz / tbz branches with
-  // built-in compare, it will be turned into a compare instruction
-  // into Head, but we do not save any instruction.
-  // Otherwise, we save the branch instruction.
-  switch (CmpMI->getOpcode()) {
-  default:
-    --delta;
-    break;
-  case ARM64::CBZW:
-  case ARM64::CBNZW:
-  case ARM64::CBZX:
-  case ARM64::CBNZX:
-    break;
-  }
-  return delta;
-}
-
-//===----------------------------------------------------------------------===//
-//                       ARM64ConditionalCompares Pass
-//===----------------------------------------------------------------------===//
-
-namespace {
-class ARM64ConditionalCompares : public MachineFunctionPass {
-  const TargetInstrInfo *TII;
-  const TargetRegisterInfo *TRI;
-  const MCSchedModel *SchedModel;
-  // Does the proceeded function has Oz attribute.
-  bool MinSize;
-  MachineRegisterInfo *MRI;
-  MachineDominatorTree *DomTree;
-  MachineLoopInfo *Loops;
-  MachineTraceMetrics *Traces;
-  MachineTraceMetrics::Ensemble *MinInstr;
-  SSACCmpConv CmpConv;
-
-public:
-  static char ID;
-  ARM64ConditionalCompares() : MachineFunctionPass(ID) {}
-  void getAnalysisUsage(AnalysisUsage &AU) const;
-  bool runOnMachineFunction(MachineFunction &MF);
-  const char *getPassName() const { return "ARM64 Conditional Compares"; }
-
-private:
-  bool tryConvert(MachineBasicBlock *);
-  void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
-  void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
-  void invalidateTraces();
-  bool shouldConvert();
-};
-} // end anonymous namespace
-
-char ARM64ConditionalCompares::ID = 0;
-
-namespace llvm {
-void initializeARM64ConditionalComparesPass(PassRegistry &);
-}
-
-INITIALIZE_PASS_BEGIN(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass",
-                      false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
-INITIALIZE_PASS_END(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass",
-                    false, false)
-
-FunctionPass *llvm::createARM64ConditionalCompares() {
-  return new ARM64ConditionalCompares();
-}
-
-void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<MachineBranchProbabilityInfo>();
-  AU.addRequired<MachineDominatorTree>();
-  AU.addPreserved<MachineDominatorTree>();
-  AU.addRequired<MachineLoopInfo>();
-  AU.addPreserved<MachineLoopInfo>();
-  AU.addRequired<MachineTraceMetrics>();
-  AU.addPreserved<MachineTraceMetrics>();
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-/// Update the dominator tree after if-conversion erased some blocks.
-void
-ARM64ConditionalCompares::updateDomTree(ArrayRef<MachineBasicBlock *> Removed) {
-  // convert() removes CmpBB which was previously dominated by Head.
-  // CmpBB children should be transferred to Head.
-  MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
-  for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
-    MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
-    assert(Node != HeadNode && "Cannot erase the head node");
-    assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
-    while (Node->getNumChildren())
-      DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
-    DomTree->eraseNode(Removed[i]);
-  }
-}
-
-/// Update LoopInfo after if-conversion.
-void
-ARM64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
-  if (!Loops)
-    return;
-  for (unsigned i = 0, e = Removed.size(); i != e; ++i)
-    Loops->removeBlock(Removed[i]);
-}
-
-/// Invalidate MachineTraceMetrics before if-conversion.
-void ARM64ConditionalCompares::invalidateTraces() {
-  Traces->invalidate(CmpConv.Head);
-  Traces->invalidate(CmpConv.CmpBB);
-}
-
-/// Apply cost model and heuristics to the if-conversion in IfConv.
-/// Return true if the conversion is a good idea.
-///
-bool ARM64ConditionalCompares::shouldConvert() {
-  // Stress testing mode disables all cost considerations.
-  if (Stress)
-    return true;
-  if (!MinInstr)
-    MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
-
-  // Head dominates CmpBB, so it is always included in its trace.
-  MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
-
-  // If code size is the main concern
-  if (MinSize) {
-    int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
-    DEBUG(dbgs() << "Code size delta:  " << CodeSizeDelta << '\n');
-    // If we are minimizing the code size, do the conversion whatever
-    // the cost is.
-    if (CodeSizeDelta < 0)
-      return true;
-    if (CodeSizeDelta > 0) {
-      DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
-      return false;
-    }
-    // CodeSizeDelta == 0, continue with the regular heuristics
-  }
-
-  // Heuristic: The compare conversion delays the execution of the branch
-  // instruction because we must wait for the inputs to the second compare as
-  // well. The branch has no dependent instructions, but delaying it increases
-  // the cost of a misprediction.
-  //
-  // Set a limit on the delay we will accept.
-  unsigned DelayLimit = SchedModel->MispredictPenalty * 3 / 4;
-
-  // Instruction depths can be computed for all trace instructions above CmpBB.
-  unsigned HeadDepth =
-      Trace.getInstrCycles(CmpConv.Head->getFirstTerminator()).Depth;
-  unsigned CmpBBDepth =
-      Trace.getInstrCycles(CmpConv.CmpBB->getFirstTerminator()).Depth;
-  DEBUG(dbgs() << "Head depth:  " << HeadDepth
-               << "\nCmpBB depth: " << CmpBBDepth << '\n');
-  if (CmpBBDepth > HeadDepth + DelayLimit) {
-    DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
-                 << " cycles.\n");
-    return false;
-  }
-
-  // Check the resource depth at the bottom of CmpBB - these instructions will
-  // be speculated.
-  unsigned ResDepth = Trace.getResourceDepth(true);
-  DEBUG(dbgs() << "Resources:   " << ResDepth << '\n');
-
-  // Heuristic: The speculatively executed instructions must all be able to
-  // merge into the Head block. The Head critical path should dominate the
-  // resource cost of the speculated instructions.
-  if (ResDepth > HeadDepth) {
-    DEBUG(dbgs() << "Too many instructions to speculate.\n");
-    return false;
-  }
-  return true;
-}
-
-bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
-  bool Changed = false;
-  while (CmpConv.canConvert(MBB) && shouldConvert()) {
-    invalidateTraces();
-    SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
-    CmpConv.convert(RemovedBlocks);
-    Changed = true;
-    updateDomTree(RemovedBlocks);
-    updateLoops(RemovedBlocks);
-  }
-  return Changed;
-}
-
-bool ARM64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "********** ARM64 Conditional Compares **********\n"
-               << "********** Function: " << MF.getName() << '\n');
-  TII = MF.getTarget().getInstrInfo();
-  TRI = MF.getTarget().getRegisterInfo();
-  SchedModel =
-      MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
-  MRI = &MF.getRegInfo();
-  DomTree = &getAnalysis<MachineDominatorTree>();
-  Loops = getAnalysisIfAvailable<MachineLoopInfo>();
-  Traces = &getAnalysis<MachineTraceMetrics>();
-  MinInstr = 0;
-  MinSize = MF.getFunction()->getAttributes().hasAttribute(
-      AttributeSet::FunctionIndex, Attribute::MinSize);
-
-  bool Changed = false;
-  CmpConv.runOnMachineFunction(MF);
-
-  // Visit blocks in dominator tree pre-order. The pre-order enables multiple
-  // cmp-conversions from the same head block.
-  // Note that updateDomTree() modifies the children of the DomTree node
-  // currently being visited. The df_iterator supports that, it doesn't look at
-  // child_begin() / child_end() until after a node has been visited.
-  for (df_iterator<MachineDominatorTree *> I = df_begin(DomTree),
-                                           E = df_end(DomTree);
-       I != E; ++I)
-    if (tryConvert(I->getBlock()))
-      Changed = true;
-
-  return Changed;
-}
diff --git a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp b/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp
deleted file mode 100644
index 3e410e5..0000000
--- a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//===-- ARM64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// When allowed by the instruction, replace a dead definition of a GPR with
-// the zero register. This makes the code a bit friendlier towards the
-// hardware's register renamer.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-dead-defs"
-#include "ARM64.h"
-#include "ARM64RegisterInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
-
-namespace {
-class ARM64DeadRegisterDefinitions : public MachineFunctionPass {
-private:
-  bool processMachineBasicBlock(MachineBasicBlock *MBB);
-
-public:
-  static char ID; // Pass identification, replacement for typeid.
-  explicit ARM64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
-
-  virtual bool runOnMachineFunction(MachineFunction &F);
-
-  const char *getPassName() const { return "Dead register definitions"; }
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-};
-char ARM64DeadRegisterDefinitions::ID = 0;
-} // end anonymous namespace
-
-bool
-ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock *MBB) {
-  bool Changed = false;
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-       ++I) {
-    MachineInstr *MI = I;
-    for (int i = 0, e = MI->getDesc().getNumDefs(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg() && MO.isDead() && MO.isDef()) {
-        assert(!MO.isImplicit() && "Unexpected implicit def!");
-        DEBUG(dbgs() << "  Dead def operand #" << i << " in:\n    ";
-              MI->print(dbgs()));
-        // Be careful not to change the register if it's a tied operand.
-        if (MI->isRegTiedToUseOperand(i)) {
-          DEBUG(dbgs() << "    Ignoring, def is tied operand.\n");
-          continue;
-        }
-        // Make sure the instruction take a register class that contains
-        // the zero register and replace it if so.
-        unsigned NewReg;
-        switch (MI->getDesc().OpInfo[i].RegClass) {
-        default:
-          DEBUG(dbgs() << "    Ignoring, register is not a GPR.\n");
-          continue;
-        case ARM64::GPR32RegClassID:
-          NewReg = ARM64::WZR;
-          break;
-        case ARM64::GPR64RegClassID:
-          NewReg = ARM64::XZR;
-          break;
-        }
-        DEBUG(dbgs() << "    Replacing with zero register. New:\n      ");
-        MO.setReg(NewReg);
-        DEBUG(MI->print(dbgs()));
-        ++NumDeadDefsReplaced;
-      }
-    }
-  }
-  return Changed;
-}
-
-// Scan the function for instructions that have a dead definition of a
-// register. Replace that register with the zero register when possible.
-bool ARM64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &mf) {
-  MachineFunction *MF = &mf;
-  bool Changed = false;
-  DEBUG(dbgs() << "***** ARM64DeadRegisterDefinitions *****\n");
-
-  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
-    if (processMachineBasicBlock(I))
-      Changed = true;
-  return Changed;
-}
-
-FunctionPass *llvm::createARM64DeadRegisterDefinitions() {
-  return new ARM64DeadRegisterDefinitions();
-}
diff --git a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp b/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
deleted file mode 100644
index e082baf..0000000
--- a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
+++ /dev/null
@@ -1,737 +0,0 @@
-//===-- ARM64ExpandPseudoInsts.cpp - Expand pseudo instructions ---*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass that expands pseudo instructions into target
-// instructions to allow proper scheduling and other late optimizations.  This
-// pass should be run after register allocation but before the post-regalloc
-// scheduling pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "ARM64InstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/MathExtras.h"
-using namespace llvm;
-
-namespace {
-class ARM64ExpandPseudo : public MachineFunctionPass {
-public:
-  static char ID;
-  ARM64ExpandPseudo() : MachineFunctionPass(ID) {}
-
-  const ARM64InstrInfo *TII;
-
-  virtual bool runOnMachineFunction(MachineFunction &Fn);
-
-  virtual const char *getPassName() const {
-    return "ARM64 pseudo instruction expansion pass";
-  }
-
-private:
-  bool expandMBB(MachineBasicBlock &MBB);
-  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
-  bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                    unsigned BitSize);
-};
-char ARM64ExpandPseudo::ID = 0;
-}
-
-/// \brief Transfer implicit operands on the pseudo instruction to the
-/// instructions created from the expansion.
-static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
-                           MachineInstrBuilder &DefMI) {
-  const MCInstrDesc &Desc = OldMI.getDesc();
-  for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
-       ++i) {
-    const MachineOperand &MO = OldMI.getOperand(i);
-    assert(MO.isReg() && MO.getReg());
-    if (MO.isUse())
-      UseMI.addOperand(MO);
-    else
-      DefMI.addOperand(MO);
-  }
-}
-
-/// \brief Helper function which extracts the specified 16-bit chunk from a
-/// 64-bit value.
-static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
-  assert(ChunkIdx < 4 && "Out of range chunk index specified!");
-
-  return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
-}
-
-/// \brief Helper function which replicates a 16-bit chunk within a 64-bit
-/// value. Indices correspond to element numbers in a v4i16.
-static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
-  assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
-  const unsigned ShiftAmt = ToIdx * 16;
-
-  // Replicate the source chunk to the destination position.
-  const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
-  // Clear the destination chunk.
-  Imm &= ~(0xFFFFLL << ShiftAmt);
-  // Insert the replicated chunk.
-  return Imm | Chunk;
-}
-
-/// \brief Helper function which tries to materialize a 64-bit value with an
-/// ORR + MOVK instruction sequence.
-static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
-                       MachineBasicBlock &MBB,
-                       MachineBasicBlock::iterator &MBBI,
-                       const ARM64InstrInfo *TII, unsigned ChunkIdx) {
-  assert(ChunkIdx < 4 && "Out of range chunk index specified!");
-  const unsigned ShiftAmt = ChunkIdx * 16;
-
-  uint64_t Encoding;
-  if (ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
-    // Create the ORR-immediate instruction.
-    MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
-            .addOperand(MI.getOperand(0))
-            .addReg(ARM64::XZR)
-            .addImm(Encoding);
-
-    // Create the MOVK instruction.
-    const unsigned Imm16 = getChunk(UImm, ChunkIdx);
-    const unsigned DstReg = MI.getOperand(0).getReg();
-    const bool DstIsDead = MI.getOperand(0).isDead();
-    MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
-            .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-            .addReg(DstReg)
-            .addImm(Imm16)
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
-
-    transferImpOps(MI, MIB, MIB1);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  return false;
-}
-
-/// \brief Check whether the given 16-bit chunk replicated to full 64-bit width
-/// can be materialized with an ORR instruction.
-static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
-  Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
-
-  return ARM64_AM::processLogicalImmediate(Chunk, 64, Encoding);
-}
-
-/// \brief Check for identical 16-bit chunks within the constant and if so
-/// materialize them with a single ORR instruction. The remaining one or two
-/// 16-bit chunks will be materialized with MOVK instructions.
-///
-/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
-/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
-/// an ORR instruction.
-///
-static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
-                                 MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator &MBBI,
-                                 const ARM64InstrInfo *TII) {
-  typedef DenseMap<uint64_t, unsigned> CountMap;
-  CountMap Counts;
-
-  // Scan the constant and count how often every chunk occurs.
-  for (unsigned Idx = 0; Idx < 4; ++Idx)
-    ++Counts[getChunk(UImm, Idx)];
-
-  // Traverse the chunks to find one which occurs more than once.
-  for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
-       Chunk != End; ++Chunk) {
-    const uint64_t ChunkVal = Chunk->first;
-    const unsigned Count = Chunk->second;
-
-    uint64_t Encoding = 0;
-
-    // We are looking for chunks which have two or three instances and can be
-    // materialized with an ORR instruction.
-    if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
-      continue;
-
-    const bool CountThree = Count == 3;
-    // Create the ORR-immediate instruction.
-    MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
-            .addOperand(MI.getOperand(0))
-            .addReg(ARM64::XZR)
-            .addImm(Encoding);
-
-    const unsigned DstReg = MI.getOperand(0).getReg();
-    const bool DstIsDead = MI.getOperand(0).isDead();
-
-    unsigned ShiftAmt = 0;
-    uint64_t Imm16 = 0;
-    // Find the first chunk not materialized with the ORR instruction.
-    for (; ShiftAmt < 64; ShiftAmt += 16) {
-      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
-
-      if (Imm16 != ChunkVal)
-        break;
-    }
-
-    // Create the first MOVK instruction.
-    MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
-            .addReg(DstReg,
-                    RegState::Define | getDeadRegState(DstIsDead && CountThree))
-            .addReg(DstReg)
-            .addImm(Imm16)
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
-
-    // In case we have three instances the whole constant is now materialized
-    // and we can exit.
-    if (CountThree) {
-      transferImpOps(MI, MIB, MIB1);
-      MI.eraseFromParent();
-      return true;
-    }
-
-    // Find the remaining chunk which needs to be materialized.
-    for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
-      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
-
-      if (Imm16 != ChunkVal)
-        break;
-    }
-
-    // Create the second MOVK instruction.
-    MachineInstrBuilder MIB2 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
-            .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-            .addReg(DstReg)
-            .addImm(Imm16)
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
-
-    transferImpOps(MI, MIB, MIB2);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  return false;
-}
-
-/// \brief Check whether this chunk matches the pattern '1...0...'. This pattern
-/// starts a contiguous sequence of ones if we look at the bits from the LSB
-/// towards the MSB.
-static bool isStartChunk(uint64_t Chunk) {
-  if (Chunk == 0 || Chunk == UINT64_MAX)
-    return false;
-
-  return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64;
-}
-
-/// \brief Check whether this chunk matches the pattern '0...1...' This pattern
-/// ends a contiguous sequence of ones if we look at the bits from the LSB
-/// towards the MSB.
-static bool isEndChunk(uint64_t Chunk) {
-  if (Chunk == 0 || Chunk == UINT64_MAX)
-    return false;
-
-  return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64;
-}
-
-/// \brief Clear or set all bits in the chunk at the given index.
-static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
-  const uint64_t Mask = 0xFFFF;
-
-  if (Clear)
-    // Clear chunk in the immediate.
-    Imm &= ~(Mask << (Idx * 16));
-  else
-    // Set all bits in the immediate for the particular chunk.
-    Imm |= Mask << (Idx * 16);
-
-  return Imm;
-}
-
-/// \brief Check whether the constant contains a sequence of contiguous ones,
-/// which might be interrupted by one or two chunks. If so, materialize the
-/// sequence of contiguous ones with an ORR instruction.
-/// Materialize the chunks which are either interrupting the sequence or outside
-/// of the sequence with a MOVK instruction.
-///
-/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
-/// which ends the sequence (0...1...). Then we are looking for constants which
-/// contain at least one S and E chunk.
-/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
-///
-/// We are also looking for constants like |S|A|B|E| where the contiguous
-/// sequence of ones wraps around the MSB into the LSB.
-///
-static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
-                              MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator &MBBI,
-                              const ARM64InstrInfo *TII) {
-  const int NotSet = -1;
-  const uint64_t Mask = 0xFFFF;
-
-  int StartIdx = NotSet;
-  int EndIdx = NotSet;
-  // Try to find the chunks which start/end a contiguous sequence of ones.
-  for (int Idx = 0; Idx < 4; ++Idx) {
-    int64_t Chunk = getChunk(UImm, Idx);
-    // Sign extend the 16-bit chunk to 64-bit.
-    Chunk = (Chunk << 48) >> 48;
-
-    if (isStartChunk(Chunk))
-      StartIdx = Idx;
-    else if (isEndChunk(Chunk))
-      EndIdx = Idx;
-  }
-
-  // Early exit in case we can't find a start/end chunk.
-  if (StartIdx == NotSet || EndIdx == NotSet)
-    return false;
-
-  // Outside of the contiguous sequence of ones everything needs to be zero.
-  uint64_t Outside = 0;
-  // Chunks between the start and end chunk need to have all their bits set.
-  uint64_t Inside = Mask;
-
-  // If our contiguous sequence of ones wraps around from the MSB into the LSB,
-  // just swap indices and pretend we are materializing a contiguous sequence
-  // of zeros surrounded by a contiguous sequence of ones.
-  if (StartIdx > EndIdx) {
-    std::swap(StartIdx, EndIdx);
-    std::swap(Outside, Inside);
-  }
-
-  uint64_t OrrImm = UImm;
-  int FirstMovkIdx = NotSet;
-  int SecondMovkIdx = NotSet;
-
-  // Find out which chunks we need to patch up to obtain a contiguous sequence
-  // of ones.
-  for (int Idx = 0; Idx < 4; ++Idx) {
-    const uint64_t Chunk = getChunk(UImm, Idx);
-
-    // Check whether we are looking at a chunk which is not part of the
-    // contiguous sequence of ones.
-    if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
-      OrrImm = updateImm(OrrImm, Idx, Outside == 0);
-
-      // Remember the index we need to patch.
-      if (FirstMovkIdx == NotSet)
-        FirstMovkIdx = Idx;
-      else
-        SecondMovkIdx = Idx;
-
-      // Check whether we are looking a chunk which is part of the contiguous
-      // sequence of ones.
-    } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
-      OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
-
-      // Remember the index we need to patch.
-      if (FirstMovkIdx == NotSet)
-        FirstMovkIdx = Idx;
-      else
-        SecondMovkIdx = Idx;
-    }
-  }
-  assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
-
-  // Create the ORR-immediate instruction.
-  uint64_t Encoding = 0;
-  ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
-  MachineInstrBuilder MIB =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
-          .addOperand(MI.getOperand(0))
-          .addReg(ARM64::XZR)
-          .addImm(Encoding);
-
-  const unsigned DstReg = MI.getOperand(0).getReg();
-  const bool DstIsDead = MI.getOperand(0).isDead();
-
-  const bool SingleMovk = SecondMovkIdx == NotSet;
-  // Create the first MOVK instruction.
-  MachineInstrBuilder MIB1 =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
-          .addReg(DstReg,
-                  RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
-          .addReg(DstReg)
-          .addImm(getChunk(UImm, FirstMovkIdx))
-          .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, FirstMovkIdx * 16));
-
-  // Early exit in case we only need to emit a single MOVK instruction.
-  if (SingleMovk) {
-    transferImpOps(MI, MIB, MIB1);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  // Create the second MOVK instruction.
-  MachineInstrBuilder MIB2 =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
-          .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-          .addReg(DstReg)
-          .addImm(getChunk(UImm, SecondMovkIdx))
-          .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, SecondMovkIdx * 16));
-
-  transferImpOps(MI, MIB, MIB2);
-  MI.eraseFromParent();
-  return true;
-}
-
-/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
-/// real move-immediate instructions to synthesize the immediate.
-bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator MBBI,
-                                     unsigned BitSize) {
-  MachineInstr &MI = *MBBI;
-  uint64_t Imm = MI.getOperand(1).getImm();
-  const unsigned Mask = 0xFFFF;
-
-  // Try a MOVI instruction (aka ORR-immediate with the zero register).
-  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
-  uint64_t Encoding;
-  if (ARM64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
-    unsigned Opc = (BitSize == 32 ? ARM64::ORRWri : ARM64::ORRXri);
-    MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
-            .addOperand(MI.getOperand(0))
-            .addReg(BitSize == 32 ? ARM64::WZR : ARM64::XZR)
-            .addImm(Encoding);
-    transferImpOps(MI, MIB, MIB);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  // Scan the immediate and count the number of 16-bit chunks which are either
-  // all ones or all zeros.
-  unsigned OneChunks = 0;
-  unsigned ZeroChunks = 0;
-  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
-    const unsigned Chunk = (Imm >> Shift) & Mask;
-    if (Chunk == Mask)
-      OneChunks++;
-    else if (Chunk == 0)
-      ZeroChunks++;
-  }
-
-  // Since we can't materialize the constant with a single ORR instruction,
-  // let's see whether we can materialize 3/4 of the constant with an ORR
-  // instruction and use an additional MOVK instruction to materialize the
-  // remaining 1/4.
-  //
-  // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|.
-  //
-  // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR,
-  // we would create the following instruction sequence:
-  //
-  // ORR x0, xzr, |A|X|A|X|
-  // MOVK x0, |B|, LSL #16
-  //
-  // Only look at 64-bit constants which can't be materialized with a single
-  // instruction e.g. which have less than either three all zero or all one
-  // chunks.
-  //
-  // Ignore 32-bit constants here, they always can be materialized with a
-  // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized
-  // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair.
-  // Thus we fall back to the default code below which in the best case creates
-  // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one).
-  //
-  if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
-    // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2
-    // identical?
-    if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
-      // See if we can come up with a constant which can be materialized with
-      // ORR-immediate by replicating element 3 into element 1.
-      uint64_t OrrImm = replicateChunk(UImm, 3, 1);
-      if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
-        return true;
-
-      // See if we can come up with a constant which can be materialized with
-      // ORR-immediate by replicating element 1 into element 3.
-      OrrImm = replicateChunk(UImm, 1, 3);
-      if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
-        return true;
-
-      // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3
-      // identical?
-    } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
-      // See if we can come up with a constant which can be materialized with
-      // ORR-immediate by replicating element 2 into element 0.
-      uint64_t OrrImm = replicateChunk(UImm, 2, 0);
-      if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
-        return true;
-
-      // See if we can come up with a constant which can be materialized with
-      // ORR-immediate by replicating element 1 into element 3.
-      OrrImm = replicateChunk(UImm, 0, 2);
-      if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
-        return true;
-    }
-  }
-
-  // Check for identical 16-bit chunks within the constant and if so materialize
-  // them with a single ORR instruction. The remaining one or two 16-bit chunks
-  // will be materialized with MOVK instructions.
-  if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
-    return true;
-
-  // Check whether the constant contains a sequence of contiguous ones, which
-  // might be interrupted by one or two chunks. If so, materialize the sequence
-  // of contiguous ones with an ORR instruction. Materialize the chunks which
-  // are either interrupting the sequence or outside of the sequence with a
-  // MOVK instruction.
-  if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
-    return true;
-
-  // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
-  // more MOVK instructions to insert additional 16-bit portions into the
-  // lower bits.
-  bool isNeg = false;
-
-  // Use MOVN to materialize the high bits if we have more all one chunks
-  // than all zero chunks.
-  if (OneChunks > ZeroChunks) {
-    isNeg = true;
-    Imm = ~Imm;
-  }
-
-  unsigned FirstOpc;
-  if (BitSize == 32) {
-    Imm &= (1LL << 32) - 1;
-    FirstOpc = (isNeg ? ARM64::MOVNWi : ARM64::MOVZWi);
-  } else {
-    FirstOpc = (isNeg ? ARM64::MOVNXi : ARM64::MOVZXi);
-  }
-  unsigned Shift = 0;     // LSL amount for high bits with MOVZ/MOVN
-  unsigned LastShift = 0; // LSL amount for last MOVK
-  if (Imm != 0) {
-    unsigned LZ = countLeadingZeros(Imm);
-    unsigned TZ = countTrailingZeros(Imm);
-    Shift = ((63 - LZ) / 16) * 16;
-    LastShift = (TZ / 16) * 16;
-  }
-  unsigned Imm16 = (Imm >> Shift) & Mask;
-  unsigned DstReg = MI.getOperand(0).getReg();
-  bool DstIsDead = MI.getOperand(0).isDead();
-  MachineInstrBuilder MIB1 =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
-          .addReg(DstReg, RegState::Define |
-                              getDeadRegState(DstIsDead && Shift == LastShift))
-          .addImm(Imm16)
-          .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
-
-  // If a MOVN was used for the high bits of a negative value, flip the rest
-  // of the bits back for use with MOVK.
-  if (isNeg)
-    Imm = ~Imm;
-
-  if (Shift == LastShift) {
-    transferImpOps(MI, MIB1, MIB1);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  MachineInstrBuilder MIB2;
-  unsigned Opc = (BitSize == 32 ? ARM64::MOVKWi : ARM64::MOVKXi);
-  while (Shift != LastShift) {
-    Shift -= 16;
-    Imm16 = (Imm >> Shift) & Mask;
-    if (Imm16 == (isNeg ? Mask : 0))
-      continue; // This 16-bit portion is already set correctly.
-    MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
-               .addReg(DstReg,
-                       RegState::Define |
-                           getDeadRegState(DstIsDead && Shift == LastShift))
-               .addReg(DstReg)
-               .addImm(Imm16)
-               .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
-  }
-
-  transferImpOps(MI, MIB1, MIB2);
-  MI.eraseFromParent();
-  return true;
-}
-
-/// \brief If MBBI references a pseudo instruction that should be expanded here,
-/// do the expansion and return true.  Otherwise return false.
-bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MBBI) {
-  MachineInstr &MI = *MBBI;
-  unsigned Opcode = MI.getOpcode();
-  switch (Opcode) {
-  default:
-    break;
-
-  case ARM64::ADDWrr:
-  case ARM64::SUBWrr:
-  case ARM64::ADDXrr:
-  case ARM64::SUBXrr:
-  case ARM64::ADDSWrr:
-  case ARM64::SUBSWrr:
-  case ARM64::ADDSXrr:
-  case ARM64::SUBSXrr:
-  case ARM64::ANDWrr:
-  case ARM64::ANDXrr:
-  case ARM64::BICWrr:
-  case ARM64::BICXrr:
-  case ARM64::EONWrr:
-  case ARM64::EONXrr:
-  case ARM64::EORWrr:
-  case ARM64::EORXrr:
-  case ARM64::ORNWrr:
-  case ARM64::ORNXrr:
-  case ARM64::ORRWrr:
-  case ARM64::ORRXrr: {
-    unsigned Opcode;
-    switch (MI.getOpcode()) {
-    default:
-      return false;
-    case ARM64::ADDWrr:      Opcode = ARM64::ADDWrs; break;
-    case ARM64::SUBWrr:      Opcode = ARM64::SUBWrs; break;
-    case ARM64::ADDXrr:      Opcode = ARM64::ADDXrs; break;
-    case ARM64::SUBXrr:      Opcode = ARM64::SUBXrs; break;
-    case ARM64::ADDSWrr:     Opcode = ARM64::ADDSWrs; break;
-    case ARM64::SUBSWrr:     Opcode = ARM64::SUBSWrs; break;
-    case ARM64::ADDSXrr:     Opcode = ARM64::ADDSXrs; break;
-    case ARM64::SUBSXrr:     Opcode = ARM64::SUBSXrs; break;
-    case ARM64::ANDWrr:      Opcode = ARM64::ANDWrs; break;
-    case ARM64::ANDXrr:      Opcode = ARM64::ANDXrs; break;
-    case ARM64::BICWrr:      Opcode = ARM64::BICWrs; break;
-    case ARM64::BICXrr:      Opcode = ARM64::BICXrs; break;
-    case ARM64::EONWrr:      Opcode = ARM64::EONWrs; break;
-    case ARM64::EONXrr:      Opcode = ARM64::EONXrs; break;
-    case ARM64::EORWrr:      Opcode = ARM64::EORWrs; break;
-    case ARM64::EORXrr:      Opcode = ARM64::EORXrs; break;
-    case ARM64::ORNWrr:      Opcode = ARM64::ORNWrs; break;
-    case ARM64::ORNXrr:      Opcode = ARM64::ORNXrs; break;
-    case ARM64::ORRWrr:      Opcode = ARM64::ORRWrs; break;
-    case ARM64::ORRXrr:      Opcode = ARM64::ORRXrs; break;
-    }
-    MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
-                MI.getOperand(0).getReg())
-            .addOperand(MI.getOperand(1))
-            .addOperand(MI.getOperand(2))
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
-    transferImpOps(MI, MIB1, MIB1);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  case ARM64::FCVTSHpseudo: {
-    MachineOperand Src = MI.getOperand(1);
-    Src.setImplicit();
-    unsigned SrcH = TII->getRegisterInfo().getSubReg(Src.getReg(), ARM64::hsub);
-    auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::FCVTSHr))
-                   .addOperand(MI.getOperand(0))
-                   .addReg(SrcH, RegState::Undef)
-                   .addOperand(Src);
-    transferImpOps(MI, MIB, MIB);
-    MI.eraseFromParent();
-    return true;
-  }
-  case ARM64::LOADgot: {
-    // Expand into ADRP + LDR.
-    unsigned DstReg = MI.getOperand(0).getReg();
-    const MachineOperand &MO1 = MI.getOperand(1);
-    unsigned Flags = MO1.getTargetFlags();
-    MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg);
-    MachineInstrBuilder MIB2 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::LDRXui))
-            .addOperand(MI.getOperand(0))
-            .addReg(DstReg);
-
-    if (MO1.isGlobal()) {
-      MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | ARM64II::MO_PAGE);
-      MIB2.addGlobalAddress(MO1.getGlobal(), 0,
-                            Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-    } else if (MO1.isSymbol()) {
-      MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | ARM64II::MO_PAGE);
-      MIB2.addExternalSymbol(MO1.getSymbolName(),
-                             Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-    } else {
-      assert(MO1.isCPI() &&
-             "Only expect globals, externalsymbols, or constant pools");
-      MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
-                                Flags | ARM64II::MO_PAGE);
-      MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
-                                Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-    }
-
-    transferImpOps(MI, MIB1, MIB2);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  case ARM64::MOVaddr:
-  case ARM64::MOVaddrJT:
-  case ARM64::MOVaddrCP:
-  case ARM64::MOVaddrBA:
-  case ARM64::MOVaddrTLS:
-  case ARM64::MOVaddrEXT: {
-    // Expand into ADRP + ADD.
-    unsigned DstReg = MI.getOperand(0).getReg();
-    MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg)
-            .addOperand(MI.getOperand(1));
-
-    MachineInstrBuilder MIB2 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADDXri))
-            .addOperand(MI.getOperand(0))
-            .addReg(DstReg)
-            .addOperand(MI.getOperand(2))
-            .addImm(0);
-
-    transferImpOps(MI, MIB1, MIB2);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  case ARM64::MOVi32imm:
-    return expandMOVImm(MBB, MBBI, 32);
-  case ARM64::MOVi64imm:
-    return expandMOVImm(MBB, MBBI, 64);
-  case ARM64::RET_ReallyLR:
-    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::RET))
-        .addReg(ARM64::LR);
-    MI.eraseFromParent();
-    return true;
-  }
-  return false;
-}
-
-/// \brief Iterate over the instructions in basic block MBB and expand any
-/// pseudo instructions.  Return true if anything was modified.
-bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
-  bool Modified = false;
-
-  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
-  while (MBBI != E) {
-    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
-    Modified |= expandMI(MBB, MBBI);
-    MBBI = NMBBI;
-  }
-
-  return Modified;
-}
-
-bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
-  TII = static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
-
-  bool Modified = false;
-  for (auto &MBB : MF)
-    Modified |= expandMBB(MBB);
-  return Modified;
-}
-
-/// \brief Returns an instance of the pseudo instruction expansion pass.
-FunctionPass *llvm::createARM64ExpandPseudoPass() {
-  return new ARM64ExpandPseudo();
-}
diff --git a/lib/Target/ARM64/ARM64FastISel.cpp b/lib/Target/ARM64/ARM64FastISel.cpp
deleted file mode 100644
index 51b0f76..0000000
--- a/lib/Target/ARM64/ARM64FastISel.cpp
+++ /dev/null
@@ -1,1929 +0,0 @@
-//===-- ARM6464FastISel.cpp - ARM64 FastISel implementation ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ARM64-specific support for the FastISel class. Some
-// of the target-specific code is generated by tablegen in the file
-// ARM64GenFastISel.inc, which is #included here.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64.h"
-#include "ARM64TargetMachine.h"
-#include "ARM64Subtarget.h"
-#include "ARM64CallingConv.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/FastISel.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/Support/CommandLine.h"
-using namespace llvm;
-
-namespace {
-
-class ARM64FastISel : public FastISel {
-
-  class Address {
-  public:
-    typedef enum {
-      RegBase,
-      FrameIndexBase
-    } BaseKind;
-
-  private:
-    BaseKind Kind;
-    union {
-      unsigned Reg;
-      int FI;
-    } Base;
-    int64_t Offset;
-
-  public:
-    Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; }
-    void setKind(BaseKind K) { Kind = K; }
-    BaseKind getKind() const { return Kind; }
-    bool isRegBase() const { return Kind == RegBase; }
-    bool isFIBase() const { return Kind == FrameIndexBase; }
-    void setReg(unsigned Reg) {
-      assert(isRegBase() && "Invalid base register access!");
-      Base.Reg = Reg;
-    }
-    unsigned getReg() const {
-      assert(isRegBase() && "Invalid base register access!");
-      return Base.Reg;
-    }
-    void setFI(unsigned FI) {
-      assert(isFIBase() && "Invalid base frame index  access!");
-      Base.FI = FI;
-    }
-    unsigned getFI() const {
-      assert(isFIBase() && "Invalid base frame index access!");
-      return Base.FI;
-    }
-    void setOffset(int64_t O) { Offset = O; }
-    int64_t getOffset() { return Offset; }
-
-    bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
-  };
-
-  /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
-  /// make the right decision when generating code for different targets.
-  const ARM64Subtarget *Subtarget;
-  LLVMContext *Context;
-
-private:
-  // Selection routines.
-  bool SelectLoad(const Instruction *I);
-  bool SelectStore(const Instruction *I);
-  bool SelectBranch(const Instruction *I);
-  bool SelectIndirectBr(const Instruction *I);
-  bool SelectCmp(const Instruction *I);
-  bool SelectSelect(const Instruction *I);
-  bool SelectFPExt(const Instruction *I);
-  bool SelectFPTrunc(const Instruction *I);
-  bool SelectFPToInt(const Instruction *I, bool Signed);
-  bool SelectIntToFP(const Instruction *I, bool Signed);
-  bool SelectRem(const Instruction *I, unsigned ISDOpcode);
-  bool SelectCall(const Instruction *I, const char *IntrMemName);
-  bool SelectIntrinsicCall(const IntrinsicInst &I);
-  bool SelectRet(const Instruction *I);
-  bool SelectTrunc(const Instruction *I);
-  bool SelectIntExt(const Instruction *I);
-  bool SelectMul(const Instruction *I);
-
-  // Utility helper routines.
-  bool isTypeLegal(Type *Ty, MVT &VT);
-  bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
-  bool ComputeAddress(const Value *Obj, Address &Addr);
-  bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
-                       bool UseUnscaled);
-  void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
-                            unsigned Flags, bool UseUnscaled);
-  bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
-  bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
-                          unsigned Alignment);
-  // Emit functions.
-  bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
-  bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
-                bool UseUnscaled = false);
-  bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
-                 bool UseUnscaled = false);
-  unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
-  unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
-
-  unsigned ARM64MaterializeFP(const ConstantFP *CFP, MVT VT);
-  unsigned ARM64MaterializeGV(const GlobalValue *GV);
-
-  // Call handling routines.
-private:
-  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
-  bool ProcessCallArgs(SmallVectorImpl<Value *> &Args,
-                       SmallVectorImpl<unsigned> &ArgRegs,
-                       SmallVectorImpl<MVT> &ArgVTs,
-                       SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
-                       SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
-                       unsigned &NumBytes);
-  bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
-                  const Instruction *I, CallingConv::ID CC, unsigned &NumBytes);
-
-public:
-  // Backend specific FastISel code.
-  virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
-  virtual unsigned TargetMaterializeConstant(const Constant *C);
-
-  explicit ARM64FastISel(FunctionLoweringInfo &funcInfo,
-                         const TargetLibraryInfo *libInfo)
-      : FastISel(funcInfo, libInfo) {
-    Subtarget = &TM.getSubtarget<ARM64Subtarget>();
-    Context = &funcInfo.Fn->getContext();
-  }
-
-  virtual bool TargetSelectInstruction(const Instruction *I);
-
-#include "ARM64GenFastISel.inc"
-};
-
-} // end anonymous namespace
-
-#include "ARM64GenCallingConv.inc"
-
-CCAssignFn *ARM64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
-  if (CC == CallingConv::WebKit_JS)
-    return CC_ARM64_WebKit_JS;
-  return Subtarget->isTargetDarwin() ? CC_ARM64_DarwinPCS : CC_ARM64_AAPCS;
-}
-
-unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
-  assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
-         "Alloca should always return a pointer.");
-
-  // Don't handle dynamic allocas.
-  if (!FuncInfo.StaticAllocaMap.count(AI))
-    return 0;
-
-  DenseMap<const AllocaInst *, int>::iterator SI =
-      FuncInfo.StaticAllocaMap.find(AI);
-
-  if (SI != FuncInfo.StaticAllocaMap.end()) {
-    unsigned ResultReg = createResultReg(&ARM64::GPR64RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri),
-            ResultReg)
-        .addFrameIndex(SI->second)
-        .addImm(0)
-        .addImm(0);
-    return ResultReg;
-  }
-
-  return 0;
-}
-
-unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) {
-  const APFloat Val = CFP->getValueAPF();
-  bool is64bit = (VT == MVT::f64);
-
-  // This checks to see if we can use FMOV instructions to materialize
-  // a constant, otherwise we have to materialize via the constant pool.
-  if (TLI.isFPImmLegal(Val, VT)) {
-    int Imm;
-    unsigned Opc;
-    if (is64bit) {
-      Imm = ARM64_AM::getFP64Imm(Val);
-      Opc = ARM64::FMOVDi;
-    } else {
-      Imm = ARM64_AM::getFP32Imm(Val);
-      Opc = ARM64::FMOVSi;
-    }
-    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-        .addImm(Imm);
-    return ResultReg;
-  }
-
-  // Materialize via constant pool.  MachineConstantPool wants an explicit
-  // alignment.
-  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
-  if (Align == 0)
-    Align = DL.getTypeAllocSize(CFP->getType());
-
-  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
-  unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
-          ADRPReg).addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGE);
-
-  unsigned Opc = is64bit ? ARM64::LDRDui : ARM64::LDRSui;
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-      .addReg(ADRPReg)
-      .addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-  return ResultReg;
-}
-
-unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) {
-  // We can't handle thread-local variables quickly yet. Unfortunately we have
-  // to peer through any aliases to find out if that rule applies.
-  const GlobalValue *TLSGV = GV;
-  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-    TLSGV = GA->getAliasedGlobal();
-
-  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(TLSGV))
-    if (GVar->isThreadLocal())
-      return 0;
-
-  unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
-
-  EVT DestEVT = TLI.getValueType(GV->getType(), true);
-  if (!DestEVT.isSimple())
-    return 0;
-  MVT DestVT = DestEVT.getSimpleVT();
-
-  unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass);
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
-
-  if (OpFlags & ARM64II::MO_GOT) {
-    // ADRP + LDRX
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
-            ADRPReg)
-        .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGE);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::LDRXui),
-            ResultReg)
-        .addReg(ADRPReg)
-        .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGEOFF |
-                                     ARM64II::MO_NC);
-  } else {
-    // ADRP + ADDX
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
-            ADRPReg).addGlobalAddress(GV, 0, ARM64II::MO_PAGE);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri),
-            ResultReg)
-        .addReg(ADRPReg)
-        .addGlobalAddress(GV, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC)
-        .addImm(0);
-  }
-  return ResultReg;
-}
-
-unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) {
-  EVT CEVT = TLI.getValueType(C->getType(), true);
-
-  // Only handle simple types.
-  if (!CEVT.isSimple())
-    return 0;
-  MVT VT = CEVT.getSimpleVT();
-
-  // FIXME: Handle ConstantInt.
-  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
-    return ARM64MaterializeFP(CFP, VT);
-  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    return ARM64MaterializeGV(GV);
-
-  return 0;
-}
-
-// Computes the address to get to an object.
-bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
-  const User *U = NULL;
-  unsigned Opcode = Instruction::UserOp1;
-  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
-    // Don't walk into other basic blocks unless the object is an alloca from
-    // another block, otherwise it may not have a virtual register assigned.
-    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
-        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
-      Opcode = I->getOpcode();
-      U = I;
-    }
-  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
-    Opcode = C->getOpcode();
-    U = C;
-  }
-
-  if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
-    if (Ty->getAddressSpace() > 255)
-      // Fast instruction selection doesn't support the special
-      // address spaces.
-      return false;
-
-  switch (Opcode) {
-  default:
-    break;
-  case Instruction::BitCast: {
-    // Look through bitcasts.
-    return ComputeAddress(U->getOperand(0), Addr);
-  }
-  case Instruction::IntToPtr: {
-    // Look past no-op inttoptrs.
-    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
-      return ComputeAddress(U->getOperand(0), Addr);
-    break;
-  }
-  case Instruction::PtrToInt: {
-    // Look past no-op ptrtoints.
-    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
-      return ComputeAddress(U->getOperand(0), Addr);
-    break;
-  }
-  case Instruction::GetElementPtr: {
-    Address SavedAddr = Addr;
-    uint64_t TmpOffset = Addr.getOffset();
-
-    // Iterate through the GEP folding the constants into offsets where
-    // we can.
-    gep_type_iterator GTI = gep_type_begin(U);
-    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
-         ++i, ++GTI) {
-      const Value *Op = *i;
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-        const StructLayout *SL = DL.getStructLayout(STy);
-        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
-        TmpOffset += SL->getElementOffset(Idx);
-      } else {
-        uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
-        for (;;) {
-          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
-            // Constant-offset addressing.
-            TmpOffset += CI->getSExtValue() * S;
-            break;
-          }
-          if (canFoldAddIntoGEP(U, Op)) {
-            // A compatible add with a constant operand. Fold the constant.
-            ConstantInt *CI =
-                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
-            TmpOffset += CI->getSExtValue() * S;
-            // Iterate on the other operand.
-            Op = cast<AddOperator>(Op)->getOperand(0);
-            continue;
-          }
-          // Unsupported
-          goto unsupported_gep;
-        }
-      }
-    }
-
-    // Try to grab the base operand now.
-    Addr.setOffset(TmpOffset);
-    if (ComputeAddress(U->getOperand(0), Addr))
-      return true;
-
-    // We failed, restore everything and try the other options.
-    Addr = SavedAddr;
-
-  unsupported_gep:
-    break;
-  }
-  case Instruction::Alloca: {
-    const AllocaInst *AI = cast<AllocaInst>(Obj);
-    DenseMap<const AllocaInst *, int>::iterator SI =
-        FuncInfo.StaticAllocaMap.find(AI);
-    if (SI != FuncInfo.StaticAllocaMap.end()) {
-      Addr.setKind(Address::FrameIndexBase);
-      Addr.setFI(SI->second);
-      return true;
-    }
-    break;
-  }
-  }
-
-  // Try to get this in a register if nothing else has worked.
-  if (!Addr.isValid())
-    Addr.setReg(getRegForValue(Obj));
-  return Addr.isValid();
-}
-
-bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
-  EVT evt = TLI.getValueType(Ty, true);
-
-  // Only handle simple types.
-  if (evt == MVT::Other || !evt.isSimple())
-    return false;
-  VT = evt.getSimpleVT();
-
-  // Handle all legal types, i.e. a register that will directly hold this
-  // value.
-  return TLI.isTypeLegal(VT);
-}
-
-bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
-  if (isTypeLegal(Ty, VT))
-    return true;
-
-  // If this is a type than can be sign or zero-extended to a basic operation
-  // go ahead and accept it now. For stores, this reflects truncation.
-  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
-    return true;
-
-  return false;
-}
-
-bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
-                                    bool UseUnscaled) {
-  bool needsLowering = false;
-  int64_t Offset = Addr.getOffset();
-  switch (VT.SimpleTy) {
-  default:
-    return false;
-  case MVT::i1:
-  case MVT::i8:
-  case MVT::i16:
-  case MVT::i32:
-  case MVT::i64:
-  case MVT::f32:
-  case MVT::f64:
-    if (!UseUnscaled)
-      // Using scaled, 12-bit, unsigned immediate offsets.
-      needsLowering = ((Offset & 0xfff) != Offset);
-    else
-      // Using unscaled, 9-bit, signed immediate offsets.
-      needsLowering = (Offset > 256 || Offset < -256);
-    break;
-  }
-
-  // FIXME: If this is a stack pointer and the offset needs to be simplified
-  // then put the alloca address into a register, set the base type back to
-  // register and continue. This should almost never happen.
-  if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
-    return false;
-  }
-
-  // Since the offset is too large for the load/store instruction get the
-  // reg+offset into a register.
-  if (needsLowering) {
-    uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
-    unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
-                                      UnscaledOffset, MVT::i64);
-    if (ResultReg == 0)
-      return false;
-    Addr.setReg(ResultReg);
-    Addr.setOffset(0);
-  }
-  return true;
-}
-
-void ARM64FastISel::AddLoadStoreOperands(Address &Addr,
-                                         const MachineInstrBuilder &MIB,
-                                         unsigned Flags, bool UseUnscaled) {
-  int64_t Offset = Addr.getOffset();
-  // Frame base works a bit differently. Handle it separately.
-  if (Addr.getKind() == Address::FrameIndexBase) {
-    int FI = Addr.getFI();
-    // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
-    // and alignment should be based on the VT.
-    MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(FI, Offset), Flags,
-        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
-    // Now add the rest of the operands.
-    MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
-  } else {
-    // Now add the rest of the operands.
-    MIB.addReg(Addr.getReg());
-    MIB.addImm(Offset);
-  }
-}
-
-bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
-                             bool UseUnscaled) {
-  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
-  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
-  if (!UseUnscaled && Addr.getOffset() < 0)
-    UseUnscaled = true;
-
-  unsigned Opc;
-  const TargetRegisterClass *RC;
-  bool VTIsi1 = false;
-  int64_t ScaleFactor = 0;
-  switch (VT.SimpleTy) {
-  default:
-    return false;
-  case MVT::i1:
-    VTIsi1 = true;
-  // Intentional fall-through.
-  case MVT::i8:
-    Opc = UseUnscaled ? ARM64::LDURBBi : ARM64::LDRBBui;
-    RC = &ARM64::GPR32RegClass;
-    ScaleFactor = 1;
-    break;
-  case MVT::i16:
-    Opc = UseUnscaled ? ARM64::LDURHHi : ARM64::LDRHHui;
-    RC = &ARM64::GPR32RegClass;
-    ScaleFactor = 2;
-    break;
-  case MVT::i32:
-    Opc = UseUnscaled ? ARM64::LDURWi : ARM64::LDRWui;
-    RC = &ARM64::GPR32RegClass;
-    ScaleFactor = 4;
-    break;
-  case MVT::i64:
-    Opc = UseUnscaled ? ARM64::LDURXi : ARM64::LDRXui;
-    RC = &ARM64::GPR64RegClass;
-    ScaleFactor = 8;
-    break;
-  case MVT::f32:
-    Opc = UseUnscaled ? ARM64::LDURSi : ARM64::LDRSui;
-    RC = TLI.getRegClassFor(VT);
-    ScaleFactor = 4;
-    break;
-  case MVT::f64:
-    Opc = UseUnscaled ? ARM64::LDURDi : ARM64::LDRDui;
-    RC = TLI.getRegClassFor(VT);
-    ScaleFactor = 8;
-    break;
-  }
-  // Scale the offset.
-  if (!UseUnscaled) {
-    int64_t Offset = Addr.getOffset();
-    if (Offset & (ScaleFactor - 1))
-      // Retry using an unscaled, 9-bit, signed immediate offset.
-      return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
-
-    Addr.setOffset(Offset / ScaleFactor);
-  }
-
-  // Simplify this down to something we can handle.
-  if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
-    return false;
-
-  // Create the base instruction, then add the operands.
-  ResultReg = createResultReg(RC);
-  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                                    TII.get(Opc), ResultReg);
-  AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
-
-  // Loading an i1 requires special handling.
-  if (VTIsi1) {
-    unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
-            ANDReg)
-        .addReg(ResultReg)
-        .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
-    ResultReg = ANDReg;
-  }
-  return true;
-}
-
-bool ARM64FastISel::SelectLoad(const Instruction *I) {
-  MVT VT;
-  // Verify we have a legal type before going any further.  Currently, we handle
-  // simple types that will directly fit in a register (i32/f32/i64/f64) or
-  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
-  if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
-    return false;
-
-  // See if we can handle this address.
-  Address Addr;
-  if (!ComputeAddress(I->getOperand(0), Addr))
-    return false;
-
-  unsigned ResultReg;
-  if (!EmitLoad(VT, ResultReg, Addr))
-    return false;
-
-  UpdateValueMap(I, ResultReg);
-  return true;
-}
-
-bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
-                              bool UseUnscaled) {
-  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
-  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
-  if (!UseUnscaled && Addr.getOffset() < 0)
-    UseUnscaled = true;
-
-  unsigned StrOpc;
-  bool VTIsi1 = false;
-  int64_t ScaleFactor = 0;
-  // Using scaled, 12-bit, unsigned immediate offsets.
-  switch (VT.SimpleTy) {
-  default:
-    return false;
-  case MVT::i1:
-    VTIsi1 = true;
-  case MVT::i8:
-    StrOpc = UseUnscaled ? ARM64::STURBBi : ARM64::STRBBui;
-    ScaleFactor = 1;
-    break;
-  case MVT::i16:
-    StrOpc = UseUnscaled ? ARM64::STURHHi : ARM64::STRHHui;
-    ScaleFactor = 2;
-    break;
-  case MVT::i32:
-    StrOpc = UseUnscaled ? ARM64::STURWi : ARM64::STRWui;
-    ScaleFactor = 4;
-    break;
-  case MVT::i64:
-    StrOpc = UseUnscaled ? ARM64::STURXi : ARM64::STRXui;
-    ScaleFactor = 8;
-    break;
-  case MVT::f32:
-    StrOpc = UseUnscaled ? ARM64::STURSi : ARM64::STRSui;
-    ScaleFactor = 4;
-    break;
-  case MVT::f64:
-    StrOpc = UseUnscaled ? ARM64::STURDi : ARM64::STRDui;
-    ScaleFactor = 8;
-    break;
-  }
-  // Scale the offset.
-  if (!UseUnscaled) {
-    int64_t Offset = Addr.getOffset();
-    if (Offset & (ScaleFactor - 1))
-      // Retry using an unscaled, 9-bit, signed immediate offset.
-      return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
-
-    Addr.setOffset(Offset / ScaleFactor);
-  }
-
-  // Simplify this down to something we can handle.
-  if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
-    return false;
-
-  // Storing an i1 requires special handling.
-  if (VTIsi1) {
-    unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
-            ANDReg)
-        .addReg(SrcReg)
-        .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
-    SrcReg = ANDReg;
-  }
-  // Create the base instruction, then add the operands.
-  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                                    TII.get(StrOpc)).addReg(SrcReg);
-  AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
-  return true;
-}
-
-bool ARM64FastISel::SelectStore(const Instruction *I) {
-  MVT VT;
-  Value *Op0 = I->getOperand(0);
-  // Verify we have a legal type before going any further.  Currently, we handle
-  // simple types that will directly fit in a register (i32/f32/i64/f64) or
-  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
-  if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
-      cast<StoreInst>(I)->isAtomic())
-    return false;
-
-  // Get the value to be stored into a register.
-  unsigned SrcReg = getRegForValue(Op0);
-  if (SrcReg == 0)
-    return false;
-
-  // See if we can handle this address.
-  Address Addr;
-  if (!ComputeAddress(I->getOperand(1), Addr))
-    return false;
-
-  if (!EmitStore(VT, SrcReg, Addr))
-    return false;
-  return true;
-}
-
-static ARM64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
-  switch (Pred) {
-  case CmpInst::FCMP_ONE:
-  case CmpInst::FCMP_UEQ:
-  default:
-    // AL is our "false" for now. The other two need more compares.
-    return ARM64CC::AL;
-  case CmpInst::ICMP_EQ:
-  case CmpInst::FCMP_OEQ:
-    return ARM64CC::EQ;
-  case CmpInst::ICMP_SGT:
-  case CmpInst::FCMP_OGT:
-    return ARM64CC::GT;
-  case CmpInst::ICMP_SGE:
-  case CmpInst::FCMP_OGE:
-    return ARM64CC::GE;
-  case CmpInst::ICMP_UGT:
-  case CmpInst::FCMP_UGT:
-    return ARM64CC::HI;
-  case CmpInst::FCMP_OLT:
-    return ARM64CC::MI;
-  case CmpInst::ICMP_ULE:
-  case CmpInst::FCMP_OLE:
-    return ARM64CC::LS;
-  case CmpInst::FCMP_ORD:
-    return ARM64CC::VC;
-  case CmpInst::FCMP_UNO:
-    return ARM64CC::VS;
-  case CmpInst::FCMP_UGE:
-    return ARM64CC::PL;
-  case CmpInst::ICMP_SLT:
-  case CmpInst::FCMP_ULT:
-    return ARM64CC::LT;
-  case CmpInst::ICMP_SLE:
-  case CmpInst::FCMP_ULE:
-    return ARM64CC::LE;
-  case CmpInst::FCMP_UNE:
-  case CmpInst::ICMP_NE:
-    return ARM64CC::NE;
-  case CmpInst::ICMP_UGE:
-    return ARM64CC::CS;
-  case CmpInst::ICMP_ULT:
-    return ARM64CC::CC;
-  }
-}
-
-bool ARM64FastISel::SelectBranch(const Instruction *I) {
-  const BranchInst *BI = cast<BranchInst>(I);
-  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
-  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
-
-  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
-    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
-      // We may not handle every CC for now.
-      ARM64CC::CondCode CC = getCompareCC(CI->getPredicate());
-      if (CC == ARM64CC::AL)
-        return false;
-
-      // Emit the cmp.
-      if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
-        return false;
-
-      // Emit the branch.
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
-          .addImm(CC)
-          .addMBB(TBB);
-      FuncInfo.MBB->addSuccessor(TBB);
-
-      FastEmitBranch(FBB, DbgLoc);
-      return true;
-    }
-  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
-    MVT SrcVT;
-    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
-        (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
-      unsigned CondReg = getRegForValue(TI->getOperand(0));
-      if (CondReg == 0)
-        return false;
-
-      // Issue an extract_subreg to get the lower 32-bits.
-      if (SrcVT == MVT::i64)
-        CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
-                                             ARM64::sub_32);
-
-      unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
-              ANDReg)
-          .addReg(CondReg)
-          .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri))
-          .addReg(ANDReg)
-          .addReg(ANDReg)
-          .addImm(0)
-          .addImm(0);
-
-      unsigned CC = ARM64CC::NE;
-      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
-        std::swap(TBB, FBB);
-        CC = ARM64CC::EQ;
-      }
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
-          .addImm(CC)
-          .addMBB(TBB);
-      FuncInfo.MBB->addSuccessor(TBB);
-      FastEmitBranch(FBB, DbgLoc);
-      return true;
-    }
-  } else if (const ConstantInt *CI =
-                 dyn_cast<ConstantInt>(BI->getCondition())) {
-    uint64_t Imm = CI->getZExtValue();
-    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::B))
-        .addMBB(Target);
-    FuncInfo.MBB->addSuccessor(Target);
-    return true;
-  }
-
-  unsigned CondReg = getRegForValue(BI->getCondition());
-  if (CondReg == 0)
-    return false;
-
-  // We've been divorced from our compare!  Our block was split, and
-  // now our compare lives in a predecessor block.  We musn't
-  // re-compare here, as the children of the compare aren't guaranteed
-  // live across the block boundary (we *could* check for this).
-  // Regardless, the compare has been done in the predecessor block,
-  // and it left a value for us in a virtual register.  Ergo, we test
-  // the one-bit value left in the virtual register.
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri),
-          ARM64::WZR)
-      .addReg(CondReg)
-      .addImm(0)
-      .addImm(0);
-
-  unsigned CC = ARM64CC::NE;
-  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
-    std::swap(TBB, FBB);
-    CC = ARM64CC::EQ;
-  }
-
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
-      .addImm(CC)
-      .addMBB(TBB);
-  FuncInfo.MBB->addSuccessor(TBB);
-  FastEmitBranch(FBB, DbgLoc);
-  return true;
-}
-
-bool ARM64FastISel::SelectIndirectBr(const Instruction *I) {
-  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
-  unsigned AddrReg = getRegForValue(BI->getOperand(0));
-  if (AddrReg == 0)
-    return false;
-
-  // Emit the indirect branch.
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BR))
-      .addReg(AddrReg);
-
-  // Make sure the CFG is up-to-date.
-  for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
-    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
-
-  return true;
-}
-
-bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
-  Type *Ty = Src1Value->getType();
-  EVT SrcEVT = TLI.getValueType(Ty, true);
-  if (!SrcEVT.isSimple())
-    return false;
-  MVT SrcVT = SrcEVT.getSimpleVT();
-
-  // Check to see if the 2nd operand is a constant that we can encode directly
-  // in the compare.
-  uint64_t Imm;
-  bool UseImm = false;
-  bool isNegativeImm = false;
-  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
-    if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
-        SrcVT == MVT::i8 || SrcVT == MVT::i1) {
-      const APInt &CIVal = ConstInt->getValue();
-
-      Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
-      if (CIVal.isNegative()) {
-        isNegativeImm = true;
-        Imm = -Imm;
-      }
-      // FIXME: We can handle more immediates using shifts.
-      UseImm = ((Imm & 0xfff) == Imm);
-    }
-  } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
-    if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
-      if (ConstFP->isZero() && !ConstFP->isNegative())
-        UseImm = true;
-  }
-
-  unsigned ZReg;
-  unsigned CmpOpc;
-  bool isICmp = true;
-  bool needsExt = false;
-  switch (SrcVT.SimpleTy) {
-  default:
-    return false;
-  case MVT::i1:
-  case MVT::i8:
-  case MVT::i16:
-    needsExt = true;
-  // Intentional fall-through.
-  case MVT::i32:
-    ZReg = ARM64::WZR;
-    if (UseImm)
-      CmpOpc = isNegativeImm ? ARM64::ADDSWri : ARM64::SUBSWri;
-    else
-      CmpOpc = ARM64::SUBSWrr;
-    break;
-  case MVT::i64:
-    ZReg = ARM64::XZR;
-    if (UseImm)
-      CmpOpc = isNegativeImm ? ARM64::ADDSXri : ARM64::SUBSXri;
-    else
-      CmpOpc = ARM64::SUBSXrr;
-    break;
-  case MVT::f32:
-    isICmp = false;
-    CmpOpc = UseImm ? ARM64::FCMPSri : ARM64::FCMPSrr;
-    break;
-  case MVT::f64:
-    isICmp = false;
-    CmpOpc = UseImm ? ARM64::FCMPDri : ARM64::FCMPDrr;
-    break;
-  }
-
-  unsigned SrcReg1 = getRegForValue(Src1Value);
-  if (SrcReg1 == 0)
-    return false;
-
-  unsigned SrcReg2;
-  if (!UseImm) {
-    SrcReg2 = getRegForValue(Src2Value);
-    if (SrcReg2 == 0)
-      return false;
-  }
-
-  // We have i1, i8, or i16, we need to either zero extend or sign extend.
-  if (needsExt) {
-    SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
-    if (SrcReg1 == 0)
-      return false;
-    if (!UseImm) {
-      SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
-      if (SrcReg2 == 0)
-        return false;
-    }
-  }
-
-  if (isICmp) {
-    if (UseImm)
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
-          .addReg(ZReg)
-          .addReg(SrcReg1)
-          .addImm(Imm)
-          .addImm(0);
-    else
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
-          .addReg(ZReg)
-          .addReg(SrcReg1)
-          .addReg(SrcReg2);
-  } else {
-    if (UseImm)
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
-          .addReg(SrcReg1);
-    else
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
-          .addReg(SrcReg1)
-          .addReg(SrcReg2);
-  }
-  return true;
-}
-
-bool ARM64FastISel::SelectCmp(const Instruction *I) {
-  const CmpInst *CI = cast<CmpInst>(I);
-
-  // We may not handle every CC for now.
-  ARM64CC::CondCode CC = getCompareCC(CI->getPredicate());
-  if (CC == ARM64CC::AL)
-    return false;
-
-  // Emit the cmp.
-  if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
-    return false;
-
-  // Now set a register based on the comparison.
-  ARM64CC::CondCode invertedCC = getInvertedCondCode(CC);
-  unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::CSINCWr),
-          ResultReg)
-      .addReg(ARM64::WZR)
-      .addReg(ARM64::WZR)
-      .addImm(invertedCC);
-
-  UpdateValueMap(I, ResultReg);
-  return true;
-}
-
-bool ARM64FastISel::SelectSelect(const Instruction *I) {
-  const SelectInst *SI = cast<SelectInst>(I);
-
-  EVT DestEVT = TLI.getValueType(SI->getType(), true);
-  if (!DestEVT.isSimple())
-    return false;
-
-  MVT DestVT = DestEVT.getSimpleVT();
-  if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
-      DestVT != MVT::f64)
-    return false;
-
-  unsigned CondReg = getRegForValue(SI->getCondition());
-  if (CondReg == 0)
-    return false;
-  unsigned TrueReg = getRegForValue(SI->getTrueValue());
-  if (TrueReg == 0)
-    return false;
-  unsigned FalseReg = getRegForValue(SI->getFalseValue());
-  if (FalseReg == 0)
-    return false;
-
-  unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
-          ANDReg)
-      .addReg(CondReg)
-      .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
-
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri))
-      .addReg(ANDReg)
-      .addReg(ANDReg)
-      .addImm(0)
-      .addImm(0);
-
-  unsigned SelectOpc;
-  switch (DestVT.SimpleTy) {
-  default:
-    return false;
-  case MVT::i32:
-    SelectOpc = ARM64::CSELWr;
-    break;
-  case MVT::i64:
-    SelectOpc = ARM64::CSELXr;
-    break;
-  case MVT::f32:
-    SelectOpc = ARM64::FCSELSrrr;
-    break;
-  case MVT::f64:
-    SelectOpc = ARM64::FCSELDrrr;
-    break;
-  }
-
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
-          ResultReg)
-      .addReg(TrueReg)
-      .addReg(FalseReg)
-      .addImm(ARM64CC::NE);
-
-  UpdateValueMap(I, ResultReg);
-  return true;
-}
-
-bool ARM64FastISel::SelectFPExt(const Instruction *I) {
-  Value *V = I->getOperand(0);
-  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
-    return false;
-
-  unsigned Op = getRegForValue(V);
-  if (Op == 0)
-    return false;
-
-  unsigned ResultReg = createResultReg(&ARM64::FPR64RegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTDSr),
-          ResultReg).addReg(Op);
-  UpdateValueMap(I, ResultReg);
-  return true;
-}
-
-bool ARM64FastISel::SelectFPTrunc(const Instruction *I) {
-  Value *V = I->getOperand(0);
-  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
-    return false;
-
-  unsigned Op = getRegForValue(V);
-  if (Op == 0)
-    return false;
-
-  unsigned ResultReg = createResultReg(&ARM64::FPR32RegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTSDr),
-          ResultReg).addReg(Op);
-  UpdateValueMap(I, ResultReg);
-  return true;
-}
-
-// FPToUI and FPToSI
-bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
-  MVT DestVT;
-  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
-    return false;
-
-  unsigned SrcReg = getRegForValue(I->getOperand(0));
-  if (SrcReg == 0)
-    return false;
-
-  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
-
-  unsigned Opc;
-  if (SrcVT == MVT::f64) {
-    if (Signed)
-      Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWDr : ARM64::FCVTZSUXDr;
-    else
-      Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWDr : ARM64::FCVTZUUXDr;
-  } else {
-    if (Signed)
-      Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWSr : ARM64::FCVTZSUXSr;
-    else
-      Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWSr : ARM64::FCVTZUUXSr;
-  }
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-      .addReg(SrcReg);
-  UpdateValueMap(I, ResultReg);
-  return true;
-}
-
-bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
-  MVT DestVT;
-  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
-    return false;
-
-  unsigned SrcReg = getRegForValue(I->getOperand(0));
-  if (SrcReg == 0)
-    return false;
-
-  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
-
-  // Handle sign-extension.
-  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
-    SrcReg =
-        EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
-    if (SrcReg == 0)
-      return false;
-  }
-
-  unsigned Opc;
-  if (SrcVT == MVT::i64) {
-    if (Signed)
-      Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUXSri : ARM64::SCVTFUXDri;
-    else
-      Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUXSri : ARM64::UCVTFUXDri;
-  } else {
-    if (Signed)
-      Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUWSri : ARM64::SCVTFUWDri;
-    else
-      Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUWSri : ARM64::UCVTFUWDri;
-  }
-
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-      .addReg(SrcReg);
-  UpdateValueMap(I, ResultReg);
-  return true;
-}
-
-bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl<Value *> &Args,
-                                    SmallVectorImpl<unsigned> &ArgRegs,
-                                    SmallVectorImpl<MVT> &ArgVTs,
-                                    SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
-                                    SmallVectorImpl<unsigned> &RegArgs,
-                                    CallingConv::ID CC, unsigned &NumBytes) {
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
-  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
-
-  // Get a count of how many bytes are to be pushed on the stack.
-  NumBytes = CCInfo.getNextStackOffset();
-
-  // Issue CALLSEQ_START
-  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
-      .addImm(NumBytes);
-
-  // Process the args.
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
-    unsigned Arg = ArgRegs[VA.getValNo()];
-    MVT ArgVT = ArgVTs[VA.getValNo()];
-
-    // Handle arg promotion: SExt, ZExt, AExt.
-    switch (VA.getLocInfo()) {
-    case CCValAssign::Full:
-      break;
-    case CCValAssign::SExt: {
-      MVT DestVT = VA.getLocVT();
-      MVT SrcVT = ArgVT;
-      Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false);
-      if (Arg == 0)
-        return false;
-      ArgVT = DestVT;
-      break;
-    }
-    case CCValAssign::AExt:
-    // Intentional fall-through.
-    case CCValAssign::ZExt: {
-      MVT DestVT = VA.getLocVT();
-      MVT SrcVT = ArgVT;
-      Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true);
-      if (Arg == 0)
-        return false;
-      ArgVT = DestVT;
-      break;
-    }
-    default:
-      llvm_unreachable("Unknown arg promotion!");
-    }
-
-    // Now copy/store arg to correct locations.
-    if (VA.isRegLoc() && !VA.needsCustom()) {
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
-      RegArgs.push_back(VA.getLocReg());
-    } else if (VA.needsCustom()) {
-      // FIXME: Handle custom args.
-      return false;
-    } else {
-      assert(VA.isMemLoc() && "Assuming store on stack.");
-
-      // Need to store on the stack.
-      Address Addr;
-      Addr.setKind(Address::RegBase);
-      Addr.setReg(ARM64::SP);
-      Addr.setOffset(VA.getLocMemOffset());
-
-      if (!EmitStore(ArgVT, Arg, Addr))
-        return false;
-    }
-  }
-  return true;
-}
-
-bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
-                               const Instruction *I, CallingConv::ID CC,
-                               unsigned &NumBytes) {
-  // Issue CALLSEQ_END
-  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
-      .addImm(NumBytes)
-      .addImm(0);
-
-  // Now the return value.
-  if (RetVT != MVT::isVoid) {
-    SmallVector<CCValAssign, 16> RVLocs;
-    CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
-    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
-
-    // Only handle a single return value.
-    if (RVLocs.size() != 1)
-      return false;
-
-    // Copy all of the result registers out of their specified physreg.
-    MVT CopyVT = RVLocs[0].getValVT();
-    unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-            TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(RVLocs[0].getLocReg());
-    UsedRegs.push_back(RVLocs[0].getLocReg());
-
-    // Finally update the result.
-    UpdateValueMap(I, ResultReg);
-  }
-
-  return true;
-}
-
-bool ARM64FastISel::SelectCall(const Instruction *I,
-                               const char *IntrMemName = 0) {
-  const CallInst *CI = cast<CallInst>(I);
-  const Value *Callee = CI->getCalledValue();
-
-  // Don't handle inline asm or intrinsics.
-  if (isa<InlineAsm>(Callee))
-    return false;
-
-  // Only handle global variable Callees.
-  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
-  if (!GV)
-    return false;
-
-  // Check the calling convention.
-  ImmutableCallSite CS(CI);
-  CallingConv::ID CC = CS.getCallingConv();
-
-  // Let SDISel handle vararg functions.
-  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
-  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
-  if (FTy->isVarArg())
-    return false;
-
-  // Handle *simple* calls for now.
-  MVT RetVT;
-  Type *RetTy = I->getType();
-  if (RetTy->isVoidTy())
-    RetVT = MVT::isVoid;
-  else if (!isTypeLegal(RetTy, RetVT))
-    return false;
-
-  // Set up the argument vectors.
-  SmallVector<Value *, 8> Args;
-  SmallVector<unsigned, 8> ArgRegs;
-  SmallVector<MVT, 8> ArgVTs;
-  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
-  Args.reserve(CS.arg_size());
-  ArgRegs.reserve(CS.arg_size());
-  ArgVTs.reserve(CS.arg_size());
-  ArgFlags.reserve(CS.arg_size());
-
-  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
-       i != e; ++i) {
-    // If we're lowering a memory intrinsic instead of a regular call, skip the
-    // last two arguments, which shouldn't be passed to the underlying function.
-    if (IntrMemName && e - i <= 2)
-      break;
-
-    unsigned Arg = getRegForValue(*i);
-    if (Arg == 0)
-      return false;
-
-    ISD::ArgFlagsTy Flags;
-    unsigned AttrInd = i - CS.arg_begin() + 1;
-    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
-      Flags.setSExt();
-    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
-      Flags.setZExt();
-
-    // FIXME: Only handle *easy* calls for now.
-    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
-        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
-        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
-        CS.paramHasAttr(AttrInd, Attribute::ByVal))
-      return false;
-
-    MVT ArgVT;
-    Type *ArgTy = (*i)->getType();
-    if (!isTypeLegal(ArgTy, ArgVT) &&
-        !(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16))
-      return false;
-
-    // We don't handle vector parameters yet.
-    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
-      return false;
-
-    unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
-    Flags.setOrigAlign(OriginalAlignment);
-
-    Args.push_back(*i);
-    ArgRegs.push_back(Arg);
-    ArgVTs.push_back(ArgVT);
-    ArgFlags.push_back(Flags);
-  }
-
-  // Handle the arguments now that we've gotten them.
-  SmallVector<unsigned, 4> RegArgs;
-  unsigned NumBytes;
-  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
-    return false;
-
-  // Issue the call.
-  MachineInstrBuilder MIB;
-  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BL));
-  if (!IntrMemName)
-    MIB.addGlobalAddress(GV, 0, 0);
-  else
-    MIB.addExternalSymbol(IntrMemName, 0);
-
-  // Add implicit physical register uses to the call.
-  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
-    MIB.addReg(RegArgs[i], RegState::Implicit);
-
-  // Add a register mask with the call-preserved registers.
-  // Proper defs for return values will be added by setPhysRegsDeadExcept().
-  MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
-
-  // Finish off the call including any return values.
-  SmallVector<unsigned, 4> UsedRegs;
-  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes))
-    return false;
-
-  // Set all unused physreg defs as dead.
-  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
-
-  return true;
-}
-
-bool ARM64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
-  if (Alignment)
-    return Len / Alignment <= 4;
-  else
-    return Len < 32;
-}
-
-bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
-                                       unsigned Alignment) {
-  // Make sure we don't bloat code by inlining very large memcpy's.
-  if (!IsMemCpySmall(Len, Alignment))
-    return false;
-
-  int64_t UnscaledOffset = 0;
-  Address OrigDest = Dest;
-  Address OrigSrc = Src;
-
-  while (Len) {
-    MVT VT;
-    if (!Alignment || Alignment >= 8) {
-      if (Len >= 8)
-        VT = MVT::i64;
-      else if (Len >= 4)
-        VT = MVT::i32;
-      else if (Len >= 2)
-        VT = MVT::i16;
-      else {
-        VT = MVT::i8;
-      }
-    } else {
-      // Bound based on alignment.
-      if (Len >= 4 && Alignment == 4)
-        VT = MVT::i32;
-      else if (Len >= 2 && Alignment == 2)
-        VT = MVT::i16;
-      else {
-        VT = MVT::i8;
-      }
-    }
-
-    bool RV;
-    unsigned ResultReg;
-    RV = EmitLoad(VT, ResultReg, Src);
-    assert(RV == true && "Should be able to handle this load.");
-    RV = EmitStore(VT, ResultReg, Dest);
-    assert(RV == true && "Should be able to handle this store.");
-    (void)RV;
-
-    int64_t Size = VT.getSizeInBits() / 8;
-    Len -= Size;
-    UnscaledOffset += Size;
-
-    // We need to recompute the unscaled offset for each iteration.
-    Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
-    Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
-  }
-
-  return true;
-}
-
-bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
-  // FIXME: Handle more intrinsics.
-  switch (I.getIntrinsicID()) {
-  default:
-    return false;
-  case Intrinsic::memcpy:
-  case Intrinsic::memmove: {
-    const MemTransferInst &MTI = cast<MemTransferInst>(I);
-    // Don't handle volatile.
-    if (MTI.isVolatile())
-      return false;
-
-    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
-    // we would emit dead code because we don't currently handle memmoves.
-    bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
-    if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
-      // Small memcpy's are common enough that we want to do them without a call
-      // if possible.
-      uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
-      unsigned Alignment = MTI.getAlignment();
-      if (IsMemCpySmall(Len, Alignment)) {
-        Address Dest, Src;
-        if (!ComputeAddress(MTI.getRawDest(), Dest) ||
-            !ComputeAddress(MTI.getRawSource(), Src))
-          return false;
-        if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
-          return true;
-      }
-    }
-
-    if (!MTI.getLength()->getType()->isIntegerTy(64))
-      return false;
-
-    if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
-      // Fast instruction selection doesn't support the special
-      // address spaces.
-      return false;
-
-    const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
-    return SelectCall(&I, IntrMemName);
-  }
-  case Intrinsic::memset: {
-    const MemSetInst &MSI = cast<MemSetInst>(I);
-    // Don't handle volatile.
-    if (MSI.isVolatile())
-      return false;
-
-    if (!MSI.getLength()->getType()->isIntegerTy(64))
-      return false;
-
-    if (MSI.getDestAddressSpace() > 255)
-      // Fast instruction selection doesn't support the special
-      // address spaces.
-      return false;
-
-    return SelectCall(&I, "memset");
-  }
-  case Intrinsic::trap: {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BRK))
-        .addImm(1);
-    return true;
-  }
-  }
-  return false;
-}
-
-bool ARM64FastISel::SelectRet(const Instruction *I) {
-  const ReturnInst *Ret = cast<ReturnInst>(I);
-  const Function &F = *I->getParent()->getParent();
-
-  if (!FuncInfo.CanLowerReturn)
-    return false;
-
-  if (F.isVarArg())
-    return false;
-
-  // Build a list of return value registers.
-  SmallVector<unsigned, 4> RetRegs;
-
-  if (Ret->getNumOperands() > 0) {
-    CallingConv::ID CC = F.getCallingConv();
-    SmallVector<ISD::OutputArg, 4> Outs;
-    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
-
-    // Analyze operands of the call, assigning locations to each operand.
-    SmallVector<CCValAssign, 16> ValLocs;
-    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
-                   I->getContext());
-    CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
-                                                     : RetCC_ARM64_AAPCS;
-    CCInfo.AnalyzeReturn(Outs, RetCC);
-
-    // Only handle a single return value for now.
-    if (ValLocs.size() != 1)
-      return false;
-
-    CCValAssign &VA = ValLocs[0];
-    const Value *RV = Ret->getOperand(0);
-
-    // Don't bother handling odd stuff for now.
-    if (VA.getLocInfo() != CCValAssign::Full)
-      return false;
-    // Only handle register returns for now.
-    if (!VA.isRegLoc())
-      return false;
-    unsigned Reg = getRegForValue(RV);
-    if (Reg == 0)
-      return false;
-
-    unsigned SrcReg = Reg + VA.getValNo();
-    unsigned DestReg = VA.getLocReg();
-    // Avoid a cross-class copy. This is very unlikely.
-    if (!MRI.getRegClass(SrcReg)->contains(DestReg))
-      return false;
-
-    EVT RVEVT = TLI.getValueType(RV->getType());
-    if (!RVEVT.isSimple())
-      return false;
-    MVT RVVT = RVEVT.getSimpleVT();
-    MVT DestVT = VA.getValVT();
-    // Special handling for extended integers.
-    if (RVVT != DestVT) {
-      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
-        return false;
-
-      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
-        return false;
-
-      bool isZExt = Outs[0].Flags.isZExt();
-      SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
-      if (SrcReg == 0)
-        return false;
-    }
-
-    // Make the copy.
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-            TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
-
-    // Add register to return instruction.
-    RetRegs.push_back(VA.getLocReg());
-  }
-
-  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                                    TII.get(ARM64::RET_ReallyLR));
-  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
-    MIB.addReg(RetRegs[i], RegState::Implicit);
-  return true;
-}
-
-bool ARM64FastISel::SelectTrunc(const Instruction *I) {
-  Type *DestTy = I->getType();
-  Value *Op = I->getOperand(0);
-  Type *SrcTy = Op->getType();
-
-  EVT SrcEVT = TLI.getValueType(SrcTy, true);
-  EVT DestEVT = TLI.getValueType(DestTy, true);
-  if (!SrcEVT.isSimple())
-    return false;
-  if (!DestEVT.isSimple())
-    return false;
-
-  MVT SrcVT = SrcEVT.getSimpleVT();
-  MVT DestVT = DestEVT.getSimpleVT();
-
-  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
-      SrcVT != MVT::i8)
-    return false;
-  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
-      DestVT != MVT::i1)
-    return false;
-
-  unsigned SrcReg = getRegForValue(Op);
-  if (!SrcReg)
-    return false;
-
-  // If we're truncating from i64 to a smaller non-legal type then generate an
-  // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
-  // generate any code.
-  if (SrcVT == MVT::i64) {
-    uint64_t Mask = 0;
-    switch (DestVT.SimpleTy) {
-    default:
-      // Trunc i64 to i32 is handled by the target-independent fast-isel.
-      return false;
-    case MVT::i1:
-      Mask = 0x1;
-      break;
-    case MVT::i8:
-      Mask = 0xff;
-      break;
-    case MVT::i16:
-      Mask = 0xffff;
-      break;
-    }
-    // Issue an extract_subreg to get the lower 32-bits.
-    unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
-                                                ARM64::sub_32);
-    // Create the AND instruction which performs the actual truncation.
-    unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
-            ANDReg)
-        .addReg(Reg32)
-        .addImm(ARM64_AM::encodeLogicalImmediate(Mask, 32));
-    SrcReg = ANDReg;
-  }
-
-  UpdateValueMap(I, SrcReg);
-  return true;
-}
-
-unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
-  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
-          DestVT == MVT::i64) &&
-         "Unexpected value type.");
-  // Handle i8 and i16 as i32.
-  if (DestVT == MVT::i8 || DestVT == MVT::i16)
-    DestVT = MVT::i32;
-
-  if (isZExt) {
-    unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
-            ResultReg)
-        .addReg(SrcReg)
-        .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
-
-    if (DestVT == MVT::i64) {
-      // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
-      // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
-      unsigned Reg64 = MRI.createVirtualRegister(&ARM64::GPR64RegClass);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(ARM64::SUBREG_TO_REG), Reg64)
-          .addImm(0)
-          .addReg(ResultReg)
-          .addImm(ARM64::sub_32);
-      ResultReg = Reg64;
-    }
-    return ResultReg;
-  } else {
-    if (DestVT == MVT::i64) {
-      // FIXME: We're SExt i1 to i64.
-      return 0;
-    }
-    unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SBFMWri),
-            ResultReg)
-        .addReg(SrcReg)
-        .addImm(0)
-        .addImm(0);
-    return ResultReg;
-  }
-}
-
-unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
-                                   bool isZExt) {
-  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
-  unsigned Opc;
-  unsigned Imm = 0;
-
-  switch (SrcVT.SimpleTy) {
-  default:
-    return 0;
-  case MVT::i1:
-    return Emiti1Ext(SrcReg, DestVT, isZExt);
-  case MVT::i8:
-    if (DestVT == MVT::i64)
-      Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
-    else
-      Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri;
-    Imm = 7;
-    break;
-  case MVT::i16:
-    if (DestVT == MVT::i64)
-      Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
-    else
-      Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri;
-    Imm = 15;
-    break;
-  case MVT::i32:
-    assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
-    Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
-    Imm = 31;
-    break;
-  }
-
-  // Handle i8 and i16 as i32.
-  if (DestVT == MVT::i8 || DestVT == MVT::i16)
-    DestVT = MVT::i32;
-
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-      .addReg(SrcReg)
-      .addImm(0)
-      .addImm(Imm);
-
-  return ResultReg;
-}
-
-bool ARM64FastISel::SelectIntExt(const Instruction *I) {
-  // On ARM, in general, integer casts don't involve legal types; this code
-  // handles promotable integers.  The high bits for a type smaller than
-  // the register size are assumed to be undefined.
-  Type *DestTy = I->getType();
-  Value *Src = I->getOperand(0);
-  Type *SrcTy = Src->getType();
-
-  bool isZExt = isa<ZExtInst>(I);
-  unsigned SrcReg = getRegForValue(Src);
-  if (!SrcReg)
-    return false;
-
-  EVT SrcEVT = TLI.getValueType(SrcTy, true);
-  EVT DestEVT = TLI.getValueType(DestTy, true);
-  if (!SrcEVT.isSimple())
-    return false;
-  if (!DestEVT.isSimple())
-    return false;
-
-  MVT SrcVT = SrcEVT.getSimpleVT();
-  MVT DestVT = DestEVT.getSimpleVT();
-  unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
-  if (ResultReg == 0)
-    return false;
-  UpdateValueMap(I, ResultReg);
-  return true;
-}
-
-bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
-  EVT DestEVT = TLI.getValueType(I->getType(), true);
-  if (!DestEVT.isSimple())
-    return false;
-
-  MVT DestVT = DestEVT.getSimpleVT();
-  if (DestVT != MVT::i64 && DestVT != MVT::i32)
-    return false;
-
-  unsigned DivOpc;
-  bool is64bit = (DestVT == MVT::i64);
-  switch (ISDOpcode) {
-  default:
-    return false;
-  case ISD::SREM:
-    DivOpc = is64bit ? ARM64::SDIVXr : ARM64::SDIVWr;
-    break;
-  case ISD::UREM:
-    DivOpc = is64bit ? ARM64::UDIVXr : ARM64::UDIVWr;
-    break;
-  }
-  unsigned MSubOpc = is64bit ? ARM64::MSUBXrrr : ARM64::MSUBWrrr;
-  unsigned Src0Reg = getRegForValue(I->getOperand(0));
-  if (!Src0Reg)
-    return false;
-
-  unsigned Src1Reg = getRegForValue(I->getOperand(1));
-  if (!Src1Reg)
-    return false;
-
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), ResultReg)
-      .addReg(Src0Reg)
-      .addReg(Src1Reg);
-  // The remainder is computed as numerator – (quotient * denominator) using the
-  // MSUB instruction.
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
-      .addReg(ResultReg)
-      .addReg(Src1Reg)
-      .addReg(Src0Reg);
-  UpdateValueMap(I, ResultReg);
-  return true;
-}
-
-bool ARM64FastISel::SelectMul(const Instruction *I) {
-  EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
-  if (!SrcEVT.isSimple())
-    return false;
-  MVT SrcVT = SrcEVT.getSimpleVT();
-
-  // Must be simple value type.  Don't handle vectors.
-  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
-      SrcVT != MVT::i8)
-    return false;
-
-  unsigned Opc;
-  unsigned ZReg;
-  switch (SrcVT.SimpleTy) {
-  default:
-    return false;
-  case MVT::i8:
-  case MVT::i16:
-  case MVT::i32:
-    ZReg = ARM64::WZR;
-    Opc = ARM64::MADDWrrr;
-    break;
-  case MVT::i64:
-    ZReg = ARM64::XZR;
-    Opc = ARM64::MADDXrrr;
-    break;
-  }
-
-  unsigned Src0Reg = getRegForValue(I->getOperand(0));
-  if (!Src0Reg)
-    return false;
-
-  unsigned Src1Reg = getRegForValue(I->getOperand(1));
-  if (!Src1Reg)
-    return false;
-
-  // Create the base instruction, then add the operands.
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT));
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-      .addReg(Src0Reg)
-      .addReg(Src1Reg)
-      .addReg(ZReg);
-  UpdateValueMap(I, ResultReg);
-  return true;
-}
-
-bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) {
-  switch (I->getOpcode()) {
-  default:
-    break;
-  case Instruction::Load:
-    return SelectLoad(I);
-  case Instruction::Store:
-    return SelectStore(I);
-  case Instruction::Br:
-    return SelectBranch(I);
-  case Instruction::IndirectBr:
-    return SelectIndirectBr(I);
-  case Instruction::FCmp:
-  case Instruction::ICmp:
-    return SelectCmp(I);
-  case Instruction::Select:
-    return SelectSelect(I);
-  case Instruction::FPExt:
-    return SelectFPExt(I);
-  case Instruction::FPTrunc:
-    return SelectFPTrunc(I);
-  case Instruction::FPToSI:
-    return SelectFPToInt(I, /*Signed=*/true);
-  case Instruction::FPToUI:
-    return SelectFPToInt(I, /*Signed=*/false);
-  case Instruction::SIToFP:
-    return SelectIntToFP(I, /*Signed=*/true);
-  case Instruction::UIToFP:
-    return SelectIntToFP(I, /*Signed=*/false);
-  case Instruction::SRem:
-    return SelectRem(I, ISD::SREM);
-  case Instruction::URem:
-    return SelectRem(I, ISD::UREM);
-  case Instruction::Call:
-    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
-      return SelectIntrinsicCall(*II);
-    return SelectCall(I);
-  case Instruction::Ret:
-    return SelectRet(I);
-  case Instruction::Trunc:
-    return SelectTrunc(I);
-  case Instruction::ZExt:
-  case Instruction::SExt:
-    return SelectIntExt(I);
-  case Instruction::Mul:
-    // FIXME: This really should be handled by the target-independent selector.
-    return SelectMul(I);
-  }
-  return false;
-  // Silence warnings.
-  (void)&CC_ARM64_DarwinPCS_VarArg;
-}
-
-namespace llvm {
-llvm::FastISel *ARM64::createFastISel(FunctionLoweringInfo &funcInfo,
-                                      const TargetLibraryInfo *libInfo) {
-  return new ARM64FastISel(funcInfo, libInfo);
-}
-}
diff --git a/lib/Target/ARM64/ARM64FrameLowering.cpp b/lib/Target/ARM64/ARM64FrameLowering.cpp
deleted file mode 100644
index 798986c..0000000
--- a/lib/Target/ARM64/ARM64FrameLowering.cpp
+++ /dev/null
@@ -1,816 +0,0 @@
-//===- ARM64FrameLowering.cpp - ARM64 Frame Lowering -----------*- C++ -*-====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "frame-info"
-#include "ARM64FrameLowering.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64Subtarget.h"
-#include "ARM64TargetMachine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static cl::opt<bool> EnableRedZone("arm64-redzone",
-                                   cl::desc("enable use of redzone on ARM64"),
-                                   cl::init(false), cl::Hidden);
-
-STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
-
-static unsigned estimateStackSize(MachineFunction &MF) {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  int Offset = 0;
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -FFI->getObjectOffset(i);
-    if (FixedOff > Offset)
-      Offset = FixedOff;
-  }
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
-      continue;
-    Offset += FFI->getObjectSize(i);
-    unsigned Align = FFI->getObjectAlignment(i);
-    // Adjust to alignment boundary
-    Offset = (Offset + Align - 1) / Align * Align;
-  }
-  // This does not include the 16 bytes used for fp and lr.
-  return (unsigned)Offset;
-}
-
-bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
-  if (!EnableRedZone)
-    return false;
-  // Don't use the red zone if the function explicitly asks us not to.
-  // This is typically used for kernel code.
-  if (MF.getFunction()->getAttributes().hasAttribute(
-          AttributeSet::FunctionIndex, Attribute::NoRedZone))
-    return false;
-
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
-  unsigned NumBytes = AFI->getLocalStackSize();
-
-  // Note: currently hasFP() is always true for hasCalls(), but that's an
-  // implementation detail of the current code, not a strict requirement,
-  // so stay safe here and check both.
-  if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
-    return false;
-  return true;
-}
-
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register.
-bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-#ifndef NDEBUG
-  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
-  assert(!RegInfo->needsStackRealignment(MF) &&
-         "No stack realignment on ARM64!");
-#endif
-
-  return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
-          MFI->isFrameAddressTaken());
-}
-
-/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
-/// not required, we reserve argument space for call sites in the function
-/// immediately on entry to the current function.  This eliminates the need for
-/// add/sub sp brackets around call sites.  Returns true if the call frame is
-/// included as part of the stack frame.
-bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-  return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
-void ARM64FrameLowering::eliminateCallFramePseudoInstr(
-    MachineFunction &MF, MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-  const ARM64InstrInfo *TII =
-      static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
-  if (!TFI->hasReservedCallFrame(MF)) {
-    // If we have alloca, convert as follows:
-    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
-    // ADJCALLSTACKUP   -> add, sp, sp, amount
-    MachineInstr *Old = I;
-    DebugLoc DL = Old->getDebugLoc();
-    unsigned Amount = Old->getOperand(0).getImm();
-    if (Amount != 0) {
-      // We need to keep the stack aligned properly.  To do this, we round the
-      // amount of space needed for the outgoing arguments up to the next
-      // alignment boundary.
-      unsigned Align = TFI->getStackAlignment();
-      Amount = (Amount + Align - 1) / Align * Align;
-
-      // Replace the pseudo instruction with a new instruction...
-      unsigned Opc = Old->getOpcode();
-      if (Opc == ARM64::ADJCALLSTACKDOWN) {
-        emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -Amount, TII);
-      } else {
-        assert(Opc == ARM64::ADJCALLSTACKUP && "expected ADJCALLSTACKUP");
-        emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII);
-      }
-    }
-  }
-  MBB.erase(I);
-}
-
-void
-ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
-                                              MachineBasicBlock::iterator MBBI,
-                                              unsigned FramePtr) const {
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineModuleInfo &MMI = MF.getMMI();
-  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
-  const ARM64InstrInfo *TII = TM.getInstrInfo();
-  DebugLoc DL = MBB.findDebugLoc(MBBI);
-
-  // Add callee saved registers to move list.
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  if (CSI.empty())
-    return;
-
-  const DataLayout *TD = MF.getTarget().getDataLayout();
-  bool HasFP = hasFP(MF);
-
-  // Calculate amount of bytes used for return address storing.
-  int stackGrowth = -TD->getPointerSize(0);
-
-  // Calculate offsets.
-  int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
-  unsigned TotalSkipped = 0;
-  for (const auto &Info : CSI) {
-    unsigned Reg = Info.getReg();
-    int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) -
-                     getOffsetOfLocalArea() + saveAreaOffset;
-
-    // Don't output a new CFI directive if we're re-saving the frame pointer or
-    // link register. This happens when the PrologEpilogInserter has inserted an
-    // extra "STP" of the frame pointer and link register -- the "emitPrologue"
-    // method automatically generates the directives when frame pointers are
-    // used. If we generate CFI directives for the extra "STP"s, the linker will
-    // lose track of the correct values for the frame pointer and link register.
-    if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) {
-      TotalSkipped += stackGrowth;
-      continue;
-    }
-
-    unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-    unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
-        nullptr, DwarfReg, Offset - TotalSkipped));
-    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-        .addCFIIndex(CFIIndex);
-  }
-}
-
-void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const Function *Fn = MF.getFunction();
-  const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo();
-  const ARM64InstrInfo *TII = TM.getInstrInfo();
-  MachineModuleInfo &MMI = MF.getMMI();
-  ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
-  bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
-  bool HasFP = hasFP(MF);
-  DebugLoc DL = MBB.findDebugLoc(MBBI);
-
-  int NumBytes = (int)MFI->getStackSize();
-  if (!AFI->hasStackFrame()) {
-    assert(!HasFP && "unexpected function without stack frame but with FP");
-
-    // All of the stack allocation is for locals.
-    AFI->setLocalStackSize(NumBytes);
-
-    // Label used to tie together the PROLOG_LABEL and the MachineMoves.
-    MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
-
-    // REDZONE: If the stack size is less than 128 bytes, we don't need
-    // to actually allocate.
-    if (NumBytes && !canUseRedZone(MF)) {
-      emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup);
-
-      // Encode the stack size of the leaf function.
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
-      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
-    } else if (NumBytes) {
-      ++NumRedZoneFunctions;
-    }
-
-    return;
-  }
-
-  // Only set up FP if we actually need to.
-  int FPOffset = 0;
-  if (HasFP) {
-    // First instruction must a) allocate the stack  and b) have an immediate
-    // that is a multiple of -2.
-    assert((MBBI->getOpcode() == ARM64::STPXpre ||
-            MBBI->getOpcode() == ARM64::STPDpre) &&
-           MBBI->getOperand(2).getReg() == ARM64::SP &&
-           MBBI->getOperand(3).getImm() < 0 &&
-           (MBBI->getOperand(3).getImm() & 1) == 0);
-
-    // Frame pointer is fp = sp - 16. Since the  STPXpre subtracts the space
-    // required for the callee saved register area we get the frame pointer
-    // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
-    FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8;
-    assert(FPOffset >= 0 && "Bad Framepointer Offset");
-  }
-
-  // Move past the saves of the callee-saved registers.
-  while (MBBI->getOpcode() == ARM64::STPXi ||
-         MBBI->getOpcode() == ARM64::STPDi ||
-         MBBI->getOpcode() == ARM64::STPXpre ||
-         MBBI->getOpcode() == ARM64::STPDpre) {
-    ++MBBI;
-    NumBytes -= 16;
-  }
-  assert(NumBytes >= 0 && "Negative stack allocation size!?");
-  if (HasFP) {
-    // Issue    sub fp, sp, FPOffset or
-    //          mov fp,sp          when FPOffset is zero.
-    // Note: All stores of callee-saved registers are marked as "FrameSetup".
-    // This code marks the instruction(s) that set the FP also.
-    emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII,
-                    MachineInstr::FrameSetup);
-  }
-
-  // All of the remaining stack allocations are for locals.
-  AFI->setLocalStackSize(NumBytes);
-
-  // Allocate space for the rest of the frame.
-  if (NumBytes) {
-    // If we're a leaf function, try using the red zone.
-    if (!canUseRedZone(MF))
-      emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup);
-  }
-
-  // If we need a base pointer, set it up here. It's whatever the value of the
-  // stack pointer is at this point. Any variable size objects will be allocated
-  // after this, so we can still use the base pointer to reference locals.
-  //
-  // FIXME: Clarify FrameSetup flags here.
-  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
-  // needed.
-  //
-  if (RegInfo->hasBasePointer(MF))
-    TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false);
-
-  if (needsFrameMoves) {
-    const DataLayout *TD = MF.getTarget().getDataLayout();
-    const int StackGrowth = -TD->getPointerSize(0);
-    unsigned FramePtr = RegInfo->getFrameRegister(MF);
-
-    // An example of the prologue:
-    //
-    //     .globl __foo
-    //     .align 2
-    //  __foo:
-    // Ltmp0:
-    //     .cfi_startproc
-    //     .cfi_personality 155, ___gxx_personality_v0
-    // Leh_func_begin:
-    //     .cfi_lsda 16, Lexception33
-    //
-    //     stp  xa,bx, [sp, -#offset]!
-    //     ...
-    //     stp  x28, x27, [sp, #offset-32]
-    //     stp  fp, lr, [sp, #offset-16]
-    //     add  fp, sp, #offset - 16
-    //     sub  sp, sp, #1360
-    //
-    // The Stack:
-    //       +-------------------------------------------+
-    // 10000 | ........ | ........ | ........ | ........ |
-    // 10004 | ........ | ........ | ........ | ........ |
-    //       +-------------------------------------------+
-    // 10008 | ........ | ........ | ........ | ........ |
-    // 1000c | ........ | ........ | ........ | ........ |
-    //       +===========================================+
-    // 10010 |                X28 Register               |
-    // 10014 |                X28 Register               |
-    //       +-------------------------------------------+
-    // 10018 |                X27 Register               |
-    // 1001c |                X27 Register               |
-    //       +===========================================+
-    // 10020 |                Frame Pointer              |
-    // 10024 |                Frame Pointer              |
-    //       +-------------------------------------------+
-    // 10028 |                Link Register              |
-    // 1002c |                Link Register              |
-    //       +===========================================+
-    // 10030 | ........ | ........ | ........ | ........ |
-    // 10034 | ........ | ........ | ........ | ........ |
-    //       +-------------------------------------------+
-    // 10038 | ........ | ........ | ........ | ........ |
-    // 1003c | ........ | ........ | ........ | ........ |
-    //       +-------------------------------------------+
-    //
-    //     [sp] = 10030        ::    >>initial value<<
-    //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
-    //     fp = sp == 10020    ::  mov fp, sp
-    //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
-    //     sp == 10010         ::    >>final value<<
-    //
-    // The frame pointer (w29) points to address 10020. If we use an offset of
-    // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
-    // for w27, and -32 for w28:
-    //
-    //  Ltmp1:
-    //     .cfi_def_cfa w29, 16
-    //  Ltmp2:
-    //     .cfi_offset w30, -8
-    //  Ltmp3:
-    //     .cfi_offset w29, -16
-    //  Ltmp4:
-    //     .cfi_offset w27, -24
-    //  Ltmp5:
-    //     .cfi_offset w28, -32
-
-    if (HasFP) {
-      // Define the current CFA rule to use the provided FP.
-      unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
-      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
-
-      // Record the location of the stored LR
-      unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true);
-      CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
-      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
-
-      // Record the location of the stored FP
-      CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
-      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
-    } else {
-      // Encode the stack size of the leaf function.
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
-      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
-    }
-
-    // Now emit the moves for whatever callee saved regs we have.
-    emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
-  }
-}
-
-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
-  for (unsigned i = 0; CSRegs[i]; ++i)
-    if (Reg == CSRegs[i])
-      return true;
-  return false;
-}
-
-static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
-  if (MI->getOpcode() == ARM64::LDPXpost ||
-      MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi ||
-      MI->getOpcode() == ARM64::LDPDi) {
-    if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) ||
-        !isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) ||
-        MI->getOperand(2).getReg() != ARM64::SP)
-      return false;
-    return true;
-  }
-
-  return false;
-}
-
-void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
-                                      MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const ARM64InstrInfo *TII =
-      static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
-  const ARM64RegisterInfo *RegInfo =
-      static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
-  DebugLoc DL = MBBI->getDebugLoc();
-
-  int NumBytes = MFI->getStackSize();
-  unsigned NumRestores = 0;
-  // Move past the restores of the callee-saved registers.
-  MachineBasicBlock::iterator LastPopI = MBBI;
-  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
-  if (LastPopI != MBB.begin()) {
-    do {
-      ++NumRestores;
-      --LastPopI;
-    } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
-    if (!isCSRestore(LastPopI, CSRegs)) {
-      ++LastPopI;
-      --NumRestores;
-    }
-  }
-  NumBytes -= NumRestores * 16;
-  assert(NumBytes >= 0 && "Negative stack allocation size!?");
-
-  if (!hasFP(MF)) {
-    // If this was a redzone leaf function, we don't need to restore the
-    // stack pointer.
-    if (!canUseRedZone(MF))
-      emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII);
-    return;
-  }
-
-  // Restore the original stack pointer.
-  // FIXME: Rather than doing the math here, we should instead just use
-  // non-post-indexed loads for the restores if we aren't actually going to
-  // be able to save any instructions.
-  if (NumBytes || MFI->hasVarSizedObjects())
-    emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP,
-                    -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
-}
-
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index.
-int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                            int FI) const {
-  unsigned FrameReg;
-  return getFrameIndexReference(MF, FI, FrameReg);
-}
-
-/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
-/// debug info.  It's the same as what we use for resolving the code-gen
-/// references for now.  FIXME: This can go wrong when references are
-/// SP-relative and simple call frames aren't used.
-int ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
-                                               int FI,
-                                               unsigned &FrameReg) const {
-  return resolveFrameIndexReference(MF, FI, FrameReg);
-}
-
-int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
-                                                   int FI, unsigned &FrameReg,
-                                                   bool PreferFP) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const ARM64RegisterInfo *RegInfo =
-      static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
-  const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
-  int FPOffset = MFI->getObjectOffset(FI) + 16;
-  int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
-  bool isFixed = MFI->isFixedObjectIndex(FI);
-
-  // Use frame pointer to reference fixed objects. Use it for locals if
-  // there are VLAs (and thus the SP isn't reliable as a base).
-  // Make sure useFPForScavengingIndex() does the right thing for the emergency
-  // spill slot.
-  bool UseFP = false;
-  if (AFI->hasStackFrame()) {
-    // Note: Keeping the following as multiple 'if' statements rather than
-    // merging to a single expression for readability.
-    //
-    // Argument access should always use the FP.
-    if (isFixed) {
-      UseFP = hasFP(MF);
-    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
-      // Use SP or FP, whichever gives us the best chance of the offset
-      // being in range for direct access. If the FPOffset is positive,
-      // that'll always be best, as the SP will be even further away.
-      // If the FPOffset is negative, we have to keep in mind that the
-      // available offset range for negative offsets is smaller than for
-      // positive ones. If we have variable sized objects, we're stuck with
-      // using the FP regardless, though, as the SP offset is unknown
-      // and we don't have a base pointer available. If an offset is
-      // available via the FP and the SP, use whichever is closest.
-      if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
-          (FPOffset >= -256 && Offset > -FPOffset))
-        UseFP = true;
-    }
-  }
-
-  if (UseFP) {
-    FrameReg = RegInfo->getFrameRegister(MF);
-    return FPOffset;
-  }
-
-  // Use the base pointer if we have one.
-  if (RegInfo->hasBasePointer(MF))
-    FrameReg = RegInfo->getBaseRegister();
-  else {
-    FrameReg = ARM64::SP;
-    // If we're using the red zone for this function, the SP won't actually
-    // be adjusted, so the offsets will be negative. They're also all
-    // within range of the signed 9-bit immediate instructions.
-    if (canUseRedZone(MF))
-      Offset -= AFI->getLocalStackSize();
-  }
-
-  return Offset;
-}
-
-static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
-  if (Reg != ARM64::LR)
-    return getKillRegState(true);
-
-  // LR maybe referred to later by an @llvm.returnaddress intrinsic.
-  bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR);
-  bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
-  return getKillRegState(LRKill);
-}
-
-bool ARM64FrameLowering::spillCalleeSavedRegisters(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-    const std::vector<CalleeSavedInfo> &CSI,
-    const TargetRegisterInfo *TRI) const {
-  MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-  unsigned Count = CSI.size();
-  DebugLoc DL;
-  assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
-
-  if (MI != MBB.end())
-    DL = MI->getDebugLoc();
-
-  for (unsigned i = 0; i < Count; i += 2) {
-    unsigned idx = Count - i - 2;
-    unsigned Reg1 = CSI[idx].getReg();
-    unsigned Reg2 = CSI[idx + 1].getReg();
-    // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
-    // list to come in sorted by frame index so that we can issue the store
-    // pair instructions directly. Assert if we see anything otherwise.
-    //
-    // The order of the registers in the list is controlled by
-    // getCalleeSavedRegs(), so they will always be in-order, as well.
-    assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
-           "Out of order callee saved regs!");
-    unsigned StrOpc;
-    assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
-    assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
-    // Issue sequence of non-sp increment and pi sp spills for cs regs. The
-    // first spill is a pre-increment that allocates the stack.
-    // For example:
-    //    stp     x22, x21, [sp, #-48]!   // addImm(-6)
-    //    stp     x20, x19, [sp, #16]    // addImm(+2)
-    //    stp     fp, lr, [sp, #32]      // addImm(+4)
-    // Rationale: This sequence saves uop updates compared to a sequence of
-    // pre-increment spills like stp xi,xj,[sp,#-16]!
-    // Note: Similar rational and sequence for restores in epilog.
-    if (ARM64::GPR64RegClass.contains(Reg1)) {
-      assert(ARM64::GPR64RegClass.contains(Reg2) &&
-             "Expected GPR64 callee-saved register pair!");
-      // For first spill use pre-increment store.
-      if (i == 0)
-        StrOpc = ARM64::STPXpre;
-      else
-        StrOpc = ARM64::STPXi;
-    } else if (ARM64::FPR64RegClass.contains(Reg1)) {
-      assert(ARM64::FPR64RegClass.contains(Reg2) &&
-             "Expected FPR64 callee-saved register pair!");
-      // For first spill use pre-increment store.
-      if (i == 0)
-        StrOpc = ARM64::STPDpre;
-      else
-        StrOpc = ARM64::STPDi;
-    } else
-      llvm_unreachable("Unexpected callee saved register!");
-    DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
-                 << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
-                 << ", " << CSI[idx + 1].getFrameIdx() << ")\n");
-    // Compute offset: i = 0 => offset = -Count;
-    //                 i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
-    const int Offset = (i == 0) ? -Count : i;
-    assert((Offset >= -64 && Offset <= 63) &&
-           "Offset out of bounds for STP immediate");
-    BuildMI(MBB, MI, DL, TII.get(StrOpc))
-        .addReg(Reg2, getPrologueDeath(MF, Reg2))
-        .addReg(Reg1, getPrologueDeath(MF, Reg1))
-        .addReg(ARM64::SP)
-        .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
-        .setMIFlag(MachineInstr::FrameSetup);
-  }
-  return true;
-}
-
-bool ARM64FrameLowering::restoreCalleeSavedRegisters(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-    const std::vector<CalleeSavedInfo> &CSI,
-    const TargetRegisterInfo *TRI) const {
-  MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-  unsigned Count = CSI.size();
-  DebugLoc DL;
-  assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
-
-  if (MI != MBB.end())
-    DL = MI->getDebugLoc();
-
-  for (unsigned i = 0; i < Count; i += 2) {
-    unsigned Reg1 = CSI[i].getReg();
-    unsigned Reg2 = CSI[i + 1].getReg();
-    // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
-    // list to come in sorted by frame index so that we can issue the store
-    // pair instructions directly. Assert if we see anything otherwise.
-    assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
-           "Out of order callee saved regs!");
-    // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
-    // the last load is sp-pi post-increment and de-allocates the stack:
-    // For example:
-    //    ldp     fp, lr, [sp, #32]       // addImm(+4)
-    //    ldp     x20, x19, [sp, #16]     // addImm(+2)
-    //    ldp     x22, x21, [sp], #48     // addImm(+6)
-    // Note: see comment in spillCalleeSavedRegisters()
-    unsigned LdrOpc;
-
-    assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
-    assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
-    if (ARM64::GPR64RegClass.contains(Reg1)) {
-      assert(ARM64::GPR64RegClass.contains(Reg2) &&
-             "Expected GPR64 callee-saved register pair!");
-      if (i == Count - 2)
-        LdrOpc = ARM64::LDPXpost;
-      else
-        LdrOpc = ARM64::LDPXi;
-    } else if (ARM64::FPR64RegClass.contains(Reg1)) {
-      assert(ARM64::FPR64RegClass.contains(Reg2) &&
-             "Expected FPR64 callee-saved register pair!");
-      if (i == Count - 2)
-        LdrOpc = ARM64::LDPDpost;
-      else
-        LdrOpc = ARM64::LDPDi;
-    } else
-      llvm_unreachable("Unexpected callee saved register!");
-    DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
-                 << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
-                 << ", " << CSI[i + 1].getFrameIdx() << ")\n");
-
-    // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
-    // etc.
-    const int Offset = (i == Count - 2) ? Count : Count - i - 2;
-    assert((Offset >= -64 && Offset <= 63) &&
-           "Offset out of bounds for LDP immediate");
-    BuildMI(MBB, MI, DL, TII.get(LdrOpc))
-        .addReg(Reg2, getDefRegState(true))
-        .addReg(Reg1, getDefRegState(true))
-        .addReg(ARM64::SP)
-        .addImm(Offset); // [sp], #offset * 8  or [sp, #offset * 8]
-                         // where the factor * 8 is implicit
-  }
-  return true;
-}
-
-void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
-    MachineFunction &MF, RegScavenger *RS) const {
-  const ARM64RegisterInfo *RegInfo =
-      static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
-  ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
-  MachineRegisterInfo *MRI = &MF.getRegInfo();
-  SmallVector<unsigned, 4> UnspilledCSGPRs;
-  SmallVector<unsigned, 4> UnspilledCSFPRs;
-
-  // The frame record needs to be created by saving the appropriate registers
-  if (hasFP(MF)) {
-    MRI->setPhysRegUsed(ARM64::FP);
-    MRI->setPhysRegUsed(ARM64::LR);
-  }
-
-  // Spill the BasePtr if it's used. Do this first thing so that the
-  // getCalleeSavedRegs() below will get the right answer.
-  if (RegInfo->hasBasePointer(MF))
-    MRI->setPhysRegUsed(RegInfo->getBaseRegister());
-
-  // If any callee-saved registers are used, the frame cannot be eliminated.
-  unsigned NumGPRSpilled = 0;
-  unsigned NumFPRSpilled = 0;
-  bool ExtraCSSpill = false;
-  bool CanEliminateFrame = true;
-  DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
-  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
-
-  // Check pairs of consecutive callee-saved registers.
-  for (unsigned i = 0; CSRegs[i]; i += 2) {
-    assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
-
-    const unsigned OddReg = CSRegs[i];
-    const unsigned EvenReg = CSRegs[i + 1];
-    assert((ARM64::GPR64RegClass.contains(OddReg) &&
-            ARM64::GPR64RegClass.contains(EvenReg)) ^
-               (ARM64::FPR64RegClass.contains(OddReg) &&
-                ARM64::FPR64RegClass.contains(EvenReg)) &&
-           "Register class mismatch!");
-
-    const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
-    const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
-
-    // Early exit if none of the registers in the register pair is actually
-    // used.
-    if (!OddRegUsed && !EvenRegUsed) {
-      if (ARM64::GPR64RegClass.contains(OddReg)) {
-        UnspilledCSGPRs.push_back(OddReg);
-        UnspilledCSGPRs.push_back(EvenReg);
-      } else {
-        UnspilledCSFPRs.push_back(OddReg);
-        UnspilledCSFPRs.push_back(EvenReg);
-      }
-      continue;
-    }
-
-    unsigned Reg = ARM64::NoRegister;
-    // If only one of the registers of the register pair is used, make sure to
-    // mark the other one as used as well.
-    if (OddRegUsed ^ EvenRegUsed) {
-      // Find out which register is the additional spill.
-      Reg = OddRegUsed ? EvenReg : OddReg;
-      MRI->setPhysRegUsed(Reg);
-    }
-
-    DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
-    DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
-
-    assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) ||
-            (RegInfo->getEncodingValue(OddReg) + 1 ==
-             RegInfo->getEncodingValue(EvenReg))) &&
-           "Register pair of non-adjacent registers!");
-    if (ARM64::GPR64RegClass.contains(OddReg)) {
-      NumGPRSpilled += 2;
-      // If it's not a reserved register, we can use it in lieu of an
-      // emergency spill slot for the register scavenger.
-      // FIXME: It would be better to instead keep looking and choose another
-      // unspilled register that isn't reserved, if there is one.
-      if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
-        ExtraCSSpill = true;
-    } else
-      NumFPRSpilled += 2;
-
-    CanEliminateFrame = false;
-  }
-
-  // FIXME: Set BigStack if any stack slot references may be out of range.
-  // For now, just conservatively guestimate based on unscaled indexing
-  // range. We'll end up allocating an unnecessary spill slot a lot, but
-  // realistically that's not a big deal at this stage of the game.
-  // The CSR spill slots have not been allocated yet, so estimateStackSize
-  // won't include them.
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
-  DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
-  bool BigStack = (CFSize >= 256);
-  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
-    AFI->setHasStackFrame(true);
-
-  // Estimate if we might need to scavenge a register at some point in order
-  // to materialize a stack offset. If so, either spill one additional
-  // callee-saved register or reserve a special spill slot to facilitate
-  // register scavenging. If we already spilled an extra callee-saved register
-  // above to keep the number of spills even, we don't need to do anything else
-  // here.
-  if (BigStack && !ExtraCSSpill) {
-
-    // If we're adding a register to spill here, we have to add two of them
-    // to keep the number of regs to spill even.
-    assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
-    unsigned Count = 0;
-    while (!UnspilledCSGPRs.empty() && Count < 2) {
-      unsigned Reg = UnspilledCSGPRs.back();
-      UnspilledCSGPRs.pop_back();
-      DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
-                   << " to get a scratch register.\n");
-      MRI->setPhysRegUsed(Reg);
-      ExtraCSSpill = true;
-      ++Count;
-    }
-
-    // If we didn't find an extra callee-saved register to spill, create
-    // an emergency spill slot.
-    if (!ExtraCSSpill) {
-      const TargetRegisterClass *RC = &ARM64::GPR64RegClass;
-      int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
-      RS->addScavengingFrameIndex(FI);
-      DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
-                   << " as the emergency spill slot.\n");
-    }
-  }
-}
diff --git a/lib/Target/ARM64/ARM64FrameLowering.h b/lib/Target/ARM64/ARM64FrameLowering.h
deleted file mode 100644
index 02edcdb..0000000
--- a/lib/Target/ARM64/ARM64FrameLowering.h
+++ /dev/null
@@ -1,75 +0,0 @@
-//===-- ARM64FrameLowering.h - TargetFrameLowering for ARM64 ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64_FRAMELOWERING_H
-#define ARM64_FRAMELOWERING_H
-
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
-
-class ARM64Subtarget;
-class ARM64TargetMachine;
-
-class ARM64FrameLowering : public TargetFrameLowering {
-  const ARM64TargetMachine &TM;
-
-public:
-  explicit ARM64FrameLowering(const ARM64TargetMachine &TM,
-                              const ARM64Subtarget &STI)
-      : TargetFrameLowering(StackGrowsDown, 16, 0, 16,
-                            false /*StackRealignable*/),
-        TM(TM) {}
-
-  void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MBBI,
-                                 unsigned FramePtr) const;
-
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
-  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
-  /// the function.
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-  int getFrameIndexReference(const MachineFunction &MF, int FI,
-                             unsigned &FrameReg) const;
-  int resolveFrameIndexReference(const MachineFunction &MF, int FI,
-                                 unsigned &FrameReg,
-                                 bool PreferFP = false) const;
-  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI,
-                                 const TargetRegisterInfo *TRI) const;
-
-  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MI,
-                                   const std::vector<CalleeSavedInfo> &CSI,
-                                   const TargetRegisterInfo *TRI) const;
-
-  /// \brief Can this function use the red zone for local allocations.
-  bool canUseRedZone(const MachineFunction &MF) const;
-
-  bool hasFP(const MachineFunction &MF) const;
-  bool hasReservedCallFrame(const MachineFunction &MF) const;
-
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
deleted file mode 100644
index 2e234c9..0000000
--- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
+++ /dev/null
@@ -1,2381 +0,0 @@
-//===-- ARM64ISelDAGToDAG.cpp - A dag to dag inst selector for ARM64 ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the ARM64 target.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-isel"
-#include "ARM64TargetMachine.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/Function.h" // To access function attributes.
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-//===--------------------------------------------------------------------===//
-/// ARM64DAGToDAGISel - ARM64 specific code to select ARM64 machine
-/// instructions for SelectionDAG operations.
-///
-namespace {
-
-class ARM64DAGToDAGISel : public SelectionDAGISel {
-  ARM64TargetMachine &TM;
-
-  /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
-  /// make the right decision when generating code for different targets.
-  const ARM64Subtarget *Subtarget;
-
-  bool ForCodeSize;
-
-public:
-  explicit ARM64DAGToDAGISel(ARM64TargetMachine &tm, CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(tm, OptLevel), TM(tm),
-        Subtarget(&TM.getSubtarget<ARM64Subtarget>()), ForCodeSize(false) {}
-
-  virtual const char *getPassName() const {
-    return "ARM64 Instruction Selection";
-  }
-
-  virtual bool runOnMachineFunction(MachineFunction &MF) {
-    AttributeSet FnAttrs = MF.getFunction()->getAttributes();
-    ForCodeSize =
-        FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
-                             Attribute::OptimizeForSize) ||
-        FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
-    return SelectionDAGISel::runOnMachineFunction(MF);
-  }
-
-  SDNode *Select(SDNode *Node);
-
-  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
-  /// inline asm expressions.
-  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                            char ConstraintCode,
-                                            std::vector<SDValue> &OutOps);
-
-  SDNode *SelectMLAV64LaneV128(SDNode *N);
-  SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);
-  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
-  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
-  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
-  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
-    return SelectShiftedRegister(N, false, Reg, Shift);
-  }
-  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
-    return SelectShiftedRegister(N, true, Reg, Shift);
-  }
-  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
-    return SelectAddrModeIndexed(N, 1, Base, OffImm);
-  }
-  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
-    return SelectAddrModeIndexed(N, 2, Base, OffImm);
-  }
-  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
-    return SelectAddrModeIndexed(N, 4, Base, OffImm);
-  }
-  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
-    return SelectAddrModeIndexed(N, 8, Base, OffImm);
-  }
-  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
-    return SelectAddrModeIndexed(N, 16, Base, OffImm);
-  }
-  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
-    return SelectAddrModeUnscaled(N, 1, Base, OffImm);
-  }
-  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
-    return SelectAddrModeUnscaled(N, 2, Base, OffImm);
-  }
-  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
-    return SelectAddrModeUnscaled(N, 4, Base, OffImm);
-  }
-  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
-    return SelectAddrModeUnscaled(N, 8, Base, OffImm);
-  }
-  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
-    return SelectAddrModeUnscaled(N, 16, Base, OffImm);
-  }
-
-  bool SelectAddrModeRO8(SDValue N, SDValue &Base, SDValue &Offset,
-                         SDValue &Imm) {
-    return SelectAddrModeRO(N, 1, Base, Offset, Imm);
-  }
-  bool SelectAddrModeRO16(SDValue N, SDValue &Base, SDValue &Offset,
-                          SDValue &Imm) {
-    return SelectAddrModeRO(N, 2, Base, Offset, Imm);
-  }
-  bool SelectAddrModeRO32(SDValue N, SDValue &Base, SDValue &Offset,
-                          SDValue &Imm) {
-    return SelectAddrModeRO(N, 4, Base, Offset, Imm);
-  }
-  bool SelectAddrModeRO64(SDValue N, SDValue &Base, SDValue &Offset,
-                          SDValue &Imm) {
-    return SelectAddrModeRO(N, 8, Base, Offset, Imm);
-  }
-  bool SelectAddrModeRO128(SDValue N, SDValue &Base, SDValue &Offset,
-                           SDValue &Imm) {
-    return SelectAddrModeRO(N, 16, Base, Offset, Imm);
-  }
-  bool SelectAddrModeNoIndex(SDValue N, SDValue &Val);
-
-  /// Form sequences of consecutive 64/128-bit registers for use in NEON
-  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
-  /// between 1 and 4 elements. If it contains a single element that is returned
-  /// unchanged; otherwise a REG_SEQUENCE value is returned.
-  SDValue createDTuple(ArrayRef<SDValue> Vecs);
-  SDValue createQTuple(ArrayRef<SDValue> Vecs);
-
-  /// Generic helper for the createDTuple/createQTuple
-  /// functions. Those should almost always be called instead.
-  SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
-                      unsigned SubRegs[]);
-
-  SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
-
-  SDNode *SelectIndexedLoad(SDNode *N, bool &Done);
-
-  SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
-                     unsigned SubRegIdx);
-  SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
-
-  SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
-  SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
-
-  SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
-  SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
-
-  SDNode *SelectAtomic(SDNode *Node, unsigned Op8, unsigned Op16, unsigned Op32,
-                       unsigned Op64);
-
-  SDNode *SelectBitfieldExtractOp(SDNode *N);
-  SDNode *SelectBitfieldInsertOp(SDNode *N);
-
-  SDNode *SelectLIBM(SDNode *N);
-
-// Include the pieces autogenerated from the target description.
-#include "ARM64GenDAGISel.inc"
-
-private:
-  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
-                             SDValue &Shift);
-  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
-                             SDValue &OffImm);
-  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
-                              SDValue &OffImm);
-  bool SelectAddrModeRO(SDValue N, unsigned Size, SDValue &Base,
-                        SDValue &Offset, SDValue &Imm);
-  bool isWorthFolding(SDValue V) const;
-  bool SelectExtendedSHL(SDValue N, unsigned Size, SDValue &Offset,
-                         SDValue &Imm);
-};
-} // end anonymous namespace
-
-/// isIntImmediate - This method tests to see if the node is a constant
-/// operand. If so Imm will receive the 32-bit value.
-static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
-  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
-    Imm = C->getZExtValue();
-    return true;
-  }
-  return false;
-}
-
-// isIntImmediate - This method tests to see if a constant operand.
-// If so Imm will receive the value.
-static bool isIntImmediate(SDValue N, uint64_t &Imm) {
-  return isIntImmediate(N.getNode(), Imm);
-}
-
-// isOpcWithIntImmediate - This method tests to see if the node is a specific
-// opcode and that it has a immediate integer right operand.
-// If so Imm will receive the 32 bit value.
-static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
-                                  uint64_t &Imm) {
-  return N->getOpcode() == Opc &&
-         isIntImmediate(N->getOperand(1).getNode(), Imm);
-}
-
-bool ARM64DAGToDAGISel::SelectAddrModeNoIndex(SDValue N, SDValue &Val) {
-  EVT ValTy = N.getValueType();
-  if (ValTy != MVT::i64)
-    return false;
-  Val = N;
-  return true;
-}
-
-bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand(
-    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
-  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
-  // Require the address to be in a register.  That is safe for all ARM64
-  // variants and it is hard to do anything much smarter without knowing
-  // how the operand is used.
-  OutOps.push_back(Op);
-  return false;
-}
-
-/// SelectArithImmed - Select an immediate value that can be represented as
-/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
-/// Val set to the 12-bit value and Shift set to the shifter operand.
-bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
-                                         SDValue &Shift) {
-  // This function is called from the addsub_shifted_imm ComplexPattern,
-  // which lists [imm] as the list of opcode it's interested in, however
-  // we still need to check whether the operand is actually an immediate
-  // here because the ComplexPattern opcode list is only used in
-  // root-level opcode matching.
-  if (!isa<ConstantSDNode>(N.getNode()))
-    return false;
-
-  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
-  unsigned ShiftAmt;
-
-  if (Immed >> 12 == 0) {
-    ShiftAmt = 0;
-  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
-    ShiftAmt = 12;
-    Immed = Immed >> 12;
-  } else
-    return false;
-
-  unsigned ShVal = ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt);
-  Val = CurDAG->getTargetConstant(Immed, MVT::i32);
-  Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
-  return true;
-}
-
-/// SelectNegArithImmed - As above, but negates the value before trying to
-/// select it.
-bool ARM64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
-                                            SDValue &Shift) {
-  // This function is called from the addsub_shifted_imm ComplexPattern,
-  // which lists [imm] as the list of opcode it's interested in, however
-  // we still need to check whether the operand is actually an immediate
-  // here because the ComplexPattern opcode list is only used in
-  // root-level opcode matching.
-  if (!isa<ConstantSDNode>(N.getNode()))
-    return false;
-
-  // The immediate operand must be a 24-bit zero-extended immediate.
-  uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
-
-  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
-  // have the opposite effect on the C flag, so this pattern mustn't match under
-  // those circumstances.
-  if (Immed == 0)
-    return false;
-
-  if (N.getValueType() == MVT::i32)
-    Immed = ~((uint32_t)Immed) + 1;
-  else
-    Immed = ~Immed + 1ULL;
-  if (Immed & 0xFFFFFFFFFF000000ULL)
-    return false;
-
-  Immed &= 0xFFFFFFULL;
-  return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
-}
-
-/// getShiftTypeForNode - Translate a shift node to the corresponding
-/// ShiftType value.
-static ARM64_AM::ShiftType getShiftTypeForNode(SDValue N) {
-  switch (N.getOpcode()) {
-  default:
-    return ARM64_AM::InvalidShift;
-  case ISD::SHL:
-    return ARM64_AM::LSL;
-  case ISD::SRL:
-    return ARM64_AM::LSR;
-  case ISD::SRA:
-    return ARM64_AM::ASR;
-  case ISD::ROTR:
-    return ARM64_AM::ROR;
-  }
-}
-
-/// \brief Determine wether it is worth to fold V into an extended register.
-bool ARM64DAGToDAGISel::isWorthFolding(SDValue V) const {
-  // it hurts if the a value is used at least twice, unless we are optimizing
-  // for code size.
-  if (ForCodeSize || V.hasOneUse())
-    return true;
-  return false;
-}
-
-/// SelectShiftedRegister - Select a "shifted register" operand.  If the value
-/// is not shifted, set the Shift operand to default of "LSL 0".  The logical
-/// instructions allow the shifted register to be rotated, but the arithmetic
-/// instructions do not.  The AllowROR parameter specifies whether ROR is
-/// supported.
-bool ARM64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
-                                              SDValue &Reg, SDValue &Shift) {
-  ARM64_AM::ShiftType ShType = getShiftTypeForNode(N);
-  if (ShType == ARM64_AM::InvalidShift)
-    return false;
-  if (!AllowROR && ShType == ARM64_AM::ROR)
-    return false;
-
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    unsigned BitSize = N.getValueType().getSizeInBits();
-    unsigned Val = RHS->getZExtValue() & (BitSize - 1);
-    unsigned ShVal = ARM64_AM::getShifterImm(ShType, Val);
-
-    Reg = N.getOperand(0);
-    Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
-    return isWorthFolding(N);
-  }
-
-  return false;
-}
-
-/// getExtendTypeForNode - Translate an extend node to the corresponding
-/// ExtendType value.
-static ARM64_AM::ExtendType getExtendTypeForNode(SDValue N,
-                                                 bool IsLoadStore = false) {
-  if (N.getOpcode() == ISD::SIGN_EXTEND ||
-      N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
-    EVT SrcVT;
-    if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
-      SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
-    else
-      SrcVT = N.getOperand(0).getValueType();
-
-    if (!IsLoadStore && SrcVT == MVT::i8)
-      return ARM64_AM::SXTB;
-    else if (!IsLoadStore && SrcVT == MVT::i16)
-      return ARM64_AM::SXTH;
-    else if (SrcVT == MVT::i32)
-      return ARM64_AM::SXTW;
-    else if (SrcVT == MVT::i64)
-      return ARM64_AM::SXTX;
-
-    return ARM64_AM::InvalidExtend;
-  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
-             N.getOpcode() == ISD::ANY_EXTEND) {
-    EVT SrcVT = N.getOperand(0).getValueType();
-    if (!IsLoadStore && SrcVT == MVT::i8)
-      return ARM64_AM::UXTB;
-    else if (!IsLoadStore && SrcVT == MVT::i16)
-      return ARM64_AM::UXTH;
-    else if (SrcVT == MVT::i32)
-      return ARM64_AM::UXTW;
-    else if (SrcVT == MVT::i64)
-      return ARM64_AM::UXTX;
-
-    return ARM64_AM::InvalidExtend;
-  } else if (N.getOpcode() == ISD::AND) {
-    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
-    if (!CSD)
-      return ARM64_AM::InvalidExtend;
-    uint64_t AndMask = CSD->getZExtValue();
-
-    switch (AndMask) {
-    default:
-      return ARM64_AM::InvalidExtend;
-    case 0xFF:
-      return !IsLoadStore ? ARM64_AM::UXTB : ARM64_AM::InvalidExtend;
-    case 0xFFFF:
-      return !IsLoadStore ? ARM64_AM::UXTH : ARM64_AM::InvalidExtend;
-    case 0xFFFFFFFF:
-      return ARM64_AM::UXTW;
-    }
-  }
-
-  return ARM64_AM::InvalidExtend;
-}
-
-// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
-static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
-  if (DL->getOpcode() != ARM64ISD::DUPLANE16 &&
-      DL->getOpcode() != ARM64ISD::DUPLANE32)
-    return false;
-
-  SDValue SV = DL->getOperand(0);
-  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
-    return false;
-
-  SDValue EV = SV.getOperand(1);
-  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
-    return false;
-
-  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
-  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
-  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
-  LaneOp = EV.getOperand(0);
-
-  return true;
-}
-
-// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
-// high lane extract.
-static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
-                             SDValue &LaneOp, int &LaneIdx) {
-
-  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
-    std::swap(Op0, Op1);
-    if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
-      return false;
-  }
-  StdOp = Op1;
-  return true;
-}
-
-/// SelectMLAV64LaneV128 - ARM64 supports 64-bit vector MLAs (v4i16 and v2i32)
-/// where one multiplicand is a lane in the upper half of a 128-bit vector.
-/// Recognize and select this so that we don't emit unnecessary lane extracts.
-SDNode *ARM64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
-  SDValue Op0 = N->getOperand(0);
-  SDValue Op1 = N->getOperand(1);
-  SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
-  SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
-  int LaneIdx = -1; // Will hold the lane index.
-
-  if (Op1.getOpcode() != ISD::MUL ||
-      !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
-                        LaneIdx)) {
-    std::swap(Op0, Op1);
-    if (Op1.getOpcode() != ISD::MUL ||
-        !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
-                          LaneIdx))
-      return 0;
-  }
-
-  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
-
-  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
-
-  unsigned MLAOpc = ~0U;
-
-  switch (N->getSimpleValueType(0).SimpleTy) {
-  default:
-    llvm_unreachable("Unrecognized MLA.");
-  case MVT::v4i16:
-    MLAOpc = ARM64::MLAv4i16_indexed;
-    break;
-  case MVT::v2i32:
-    MLAOpc = ARM64::MLAv2i32_indexed;
-    break;
-  }
-
-  return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
-}
-
-SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
-  SDValue SMULLOp0;
-  SDValue SMULLOp1;
-  int LaneIdx;
-
-  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
-                        LaneIdx))
-    return 0;
-
-  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
-
-  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
-
-  unsigned SMULLOpc = ~0U;
-
-  if (IntNo == Intrinsic::arm64_neon_smull) {
-    switch (N->getSimpleValueType(0).SimpleTy) {
-    default:
-      llvm_unreachable("Unrecognized SMULL.");
-    case MVT::v4i32:
-      SMULLOpc = ARM64::SMULLv4i16_indexed;
-      break;
-    case MVT::v2i64:
-      SMULLOpc = ARM64::SMULLv2i32_indexed;
-      break;
-    }
-  } else if (IntNo == Intrinsic::arm64_neon_umull) {
-    switch (N->getSimpleValueType(0).SimpleTy) {
-    default:
-      llvm_unreachable("Unrecognized SMULL.");
-    case MVT::v4i32:
-      SMULLOpc = ARM64::UMULLv4i16_indexed;
-      break;
-    case MVT::v2i64:
-      SMULLOpc = ARM64::UMULLv2i32_indexed;
-      break;
-    }
-  } else
-    llvm_unreachable("Unrecognized intrinsic.");
-
-  return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
-}
-
-/// SelectArithExtendedRegister - Select a "extended register" operand.  This
-/// operand folds in an extend followed by an optional left shift.
-bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
-                                                    SDValue &Shift) {
-  unsigned ShiftVal = 0;
-  ARM64_AM::ExtendType Ext;
-
-  if (N.getOpcode() == ISD::SHL) {
-    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
-    if (!CSD)
-      return false;
-    ShiftVal = CSD->getZExtValue();
-    if ((ShiftVal & 0x3) != ShiftVal)
-      return false;
-
-    Ext = getExtendTypeForNode(N.getOperand(0));
-    if (Ext == ARM64_AM::InvalidExtend)
-      return false;
-
-    Reg = N.getOperand(0).getOperand(0);
-  } else {
-    Ext = getExtendTypeForNode(N);
-    if (Ext == ARM64_AM::InvalidExtend)
-      return false;
-
-    Reg = N.getOperand(0);
-  }
-
-  // ARM64 mandates that the RHS of the operation must use the smallest
-  // register classs that could contain the size being extended from.  Thus,
-  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
-  // there might not be an actual 32-bit value in the program.  We can
-  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
-  if (Reg.getValueType() == MVT::i64 && Ext != ARM64_AM::UXTX &&
-      Ext != ARM64_AM::SXTX) {
-    SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
-    MachineSDNode *Node = CurDAG->getMachineNode(
-        TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, Reg, SubReg);
-    Reg = SDValue(Node, 0);
-  }
-
-  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
-  return isWorthFolding(N);
-}
-
-/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
-/// immediate" address.  The "Size" argument is the size in bytes of the memory
-/// reference, which determines the scale.
-bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
-                                              SDValue &Base, SDValue &OffImm) {
-  const TargetLowering *TLI = getTargetLowering();
-  if (N.getOpcode() == ISD::FrameIndex) {
-    int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
-    OffImm = CurDAG->getTargetConstant(0, MVT::i64);
-    return true;
-  }
-
-  if (N.getOpcode() == ARM64ISD::ADDlow) {
-    GlobalAddressSDNode *GAN =
-        dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
-    Base = N.getOperand(0);
-    OffImm = N.getOperand(1);
-    if (!GAN)
-      return true;
-
-    const GlobalValue *GV = GAN->getGlobal();
-    unsigned Alignment = GV->getAlignment();
-    const DataLayout *DL = TLI->getDataLayout();
-    if (Alignment == 0 && !Subtarget->isTargetDarwin())
-      Alignment = DL->getABITypeAlignment(GV->getType()->getElementType());
-
-    if (Alignment >= Size)
-      return true;
-  }
-
-  if (CurDAG->isBaseWithConstantOffset(N)) {
-    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-      int64_t RHSC = (int64_t)RHS->getZExtValue();
-      unsigned Scale = Log2_32(Size);
-      if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
-        Base = N.getOperand(0);
-        if (Base.getOpcode() == ISD::FrameIndex) {
-          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
-        }
-        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
-        return true;
-      }
-    }
-  }
-
-  // Before falling back to our general case, check if the unscaled
-  // instructions can handle this. If so, that's preferable.
-  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
-    return false;
-
-  // Base only. The address will be materialized into a register before
-  // the memory is accessed.
-  //    add x0, Xbase, #offset
-  //    ldr x0, [x0]
-  Base = N;
-  OffImm = CurDAG->getTargetConstant(0, MVT::i64);
-  return true;
-}
-
-/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
-/// immediate" address.  This should only match when there is an offset that
-/// is not valid for a scaled immediate addressing mode.  The "Size" argument
-/// is the size in bytes of the memory reference, which is needed here to know
-/// what is valid for a scaled immediate.
-bool ARM64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
-                                               SDValue &Base, SDValue &OffImm) {
-  if (!CurDAG->isBaseWithConstantOffset(N))
-    return false;
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    int64_t RHSC = RHS->getSExtValue();
-    // If the offset is valid as a scaled immediate, don't match here.
-    if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
-        RHSC < (0x1000 << Log2_32(Size)))
-      return false;
-    if (RHSC >= -256 && RHSC < 256) {
-      Base = N.getOperand(0);
-      if (Base.getOpcode() == ISD::FrameIndex) {
-        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        const TargetLowering *TLI = getTargetLowering();
-        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
-      }
-      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
-      return true;
-    }
-  }
-  return false;
-}
-
-static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
-  SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
-  SDValue ImpDef = SDValue(
-      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
-      0);
-  MachineSDNode *Node = CurDAG->getMachineNode(
-      TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
-  return SDValue(Node, 0);
-}
-
-static SDValue WidenIfNeeded(SelectionDAG *CurDAG, SDValue N) {
-  if (N.getValueType() == MVT::i32) {
-    return Widen(CurDAG, N);
-  }
-
-  return N;
-}
-
-/// \brief Check if the given SHL node (\p N), can be used to form an
-/// extended register for an addressing mode.
-bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
-                                          SDValue &Offset, SDValue &Imm) {
-  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
-  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
-  if (CSD && (CSD->getZExtValue() & 0x7) == CSD->getZExtValue()) {
-
-    ARM64_AM::ExtendType Ext = getExtendTypeForNode(N.getOperand(0), true);
-    if (Ext == ARM64_AM::InvalidExtend) {
-      Ext = ARM64_AM::UXTX;
-      Offset = WidenIfNeeded(CurDAG, N.getOperand(0));
-    } else {
-      Offset = WidenIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
-    }
-
-    unsigned LegalShiftVal = Log2_32(Size);
-    unsigned ShiftVal = CSD->getZExtValue();
-
-    if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
-      return false;
-
-    Imm = CurDAG->getTargetConstant(
-        ARM64_AM::getMemExtendImm(Ext, ShiftVal != 0), MVT::i32);
-    if (isWorthFolding(N))
-      return true;
-  }
-  return false;
-}
-
-bool ARM64DAGToDAGISel::SelectAddrModeRO(SDValue N, unsigned Size,
-                                         SDValue &Base, SDValue &Offset,
-                                         SDValue &Imm) {
-  if (N.getOpcode() != ISD::ADD)
-    return false;
-  SDValue LHS = N.getOperand(0);
-  SDValue RHS = N.getOperand(1);
-
-  // We don't want to match immediate adds here, because they are better lowered
-  // to the register-immediate addressing modes.
-  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
-    return false;
-
-  // Check if this particular node is reused in any non-memory related
-  // operation.  If yes, do not try to fold this node into the address
-  // computation, since the computation will be kept.
-  const SDNode *Node = N.getNode();
-  for (SDNode::use_iterator UI = Node->use_begin(), UE = Node->use_end();
-       UI != UE; ++UI) {
-    if (!isa<MemSDNode>(*UI))
-      return false;
-  }
-
-  // Remember if it is worth folding N when it produces extended register.
-  bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
-
-  // Try to match a shifted extend on the RHS.
-  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
-      SelectExtendedSHL(RHS, Size, Offset, Imm)) {
-    Base = LHS;
-    return true;
-  }
-
-  // Try to match a shifted extend on the LHS.
-  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
-      SelectExtendedSHL(LHS, Size, Offset, Imm)) {
-    Base = RHS;
-    return true;
-  }
-
-  ARM64_AM::ExtendType Ext = ARM64_AM::UXTX;
-  // Try to match an unshifted extend on the LHS.
-  if (IsExtendedRegisterWorthFolding &&
-      (Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidExtend) {
-    Base = RHS;
-    Offset = WidenIfNeeded(CurDAG, LHS.getOperand(0));
-    Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false),
-                                    MVT::i32);
-    if (isWorthFolding(LHS))
-      return true;
-  }
-
-  // Try to match an unshifted extend on the RHS.
-  if (IsExtendedRegisterWorthFolding &&
-      (Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidExtend) {
-    Base = LHS;
-    Offset = WidenIfNeeded(CurDAG, RHS.getOperand(0));
-    Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false),
-                                    MVT::i32);
-    if (isWorthFolding(RHS))
-      return true;
-  }
-
-  // Match any non-shifted, non-extend, non-immediate add expression.
-  Base = LHS;
-  Offset = WidenIfNeeded(CurDAG, RHS);
-  Ext = ARM64_AM::UXTX;
-  Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false),
-                                  MVT::i32);
-  // Reg1 + Reg2 is free: no check needed.
-  return true;
-}
-
-SDValue ARM64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
-  static unsigned RegClassIDs[] = { ARM64::DDRegClassID, ARM64::DDDRegClassID,
-                                    ARM64::DDDDRegClassID };
-  static unsigned SubRegs[] = { ARM64::dsub0, ARM64::dsub1,
-                                ARM64::dsub2, ARM64::dsub3 };
-
-  return createTuple(Regs, RegClassIDs, SubRegs);
-}
-
-SDValue ARM64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
-  static unsigned RegClassIDs[] = { ARM64::QQRegClassID, ARM64::QQQRegClassID,
-                                    ARM64::QQQQRegClassID };
-  static unsigned SubRegs[] = { ARM64::qsub0, ARM64::qsub1,
-                                ARM64::qsub2, ARM64::qsub3 };
-
-  return createTuple(Regs, RegClassIDs, SubRegs);
-}
-
-SDValue ARM64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
-                                       unsigned RegClassIDs[],
-                                       unsigned SubRegs[]) {
-  // There's no special register-class for a vector-list of 1 element: it's just
-  // a vector.
-  if (Regs.size() == 1)
-    return Regs[0];
-
-  assert(Regs.size() >= 2 && Regs.size() <= 4);
-
-  SDLoc DL(Regs[0].getNode());
-
-  SmallVector<SDValue, 4> Ops;
-
-  // First operand of REG_SEQUENCE is the desired RegClass.
-  Ops.push_back(
-      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
-
-  // Then we get pairs of source & subregister-position for the components.
-  for (unsigned i = 0; i < Regs.size(); ++i) {
-    Ops.push_back(Regs[i]);
-    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
-  }
-
-  SDNode *N =
-      CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
-  return SDValue(N, 0);
-}
-
-SDNode *ARM64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
-                                       unsigned Opc, bool isExt) {
-  SDLoc dl(N);
-  EVT VT = N->getValueType(0);
-
-  unsigned ExtOff = isExt;
-
-  // Form a REG_SEQUENCE to force register allocation.
-  unsigned Vec0Off = ExtOff + 1;
-  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
-                               N->op_begin() + Vec0Off + NumVecs);
-  SDValue RegSeq = createQTuple(Regs);
-
-  SmallVector<SDValue, 6> Ops;
-  if (isExt)
-    Ops.push_back(N->getOperand(1));
-  Ops.push_back(RegSeq);
-  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
-  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
-}
-
-SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
-  LoadSDNode *LD = cast<LoadSDNode>(N);
-  if (LD->isUnindexed())
-    return NULL;
-  EVT VT = LD->getMemoryVT();
-  EVT DstVT = N->getValueType(0);
-  ISD::MemIndexedMode AM = LD->getAddressingMode();
-  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
-
-  // We're not doing validity checking here. That was done when checking
-  // if we should mark the load as indexed or not. We're just selecting
-  // the right instruction.
-  unsigned Opcode = 0;
-
-  ISD::LoadExtType ExtType = LD->getExtensionType();
-  bool InsertTo64 = false;
-  if (VT == MVT::i64)
-    Opcode = IsPre ? ARM64::LDRXpre_isel : ARM64::LDRXpost_isel;
-  else if (VT == MVT::i32) {
-    if (ExtType == ISD::NON_EXTLOAD)
-      Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel;
-    else if (ExtType == ISD::SEXTLOAD)
-      Opcode = IsPre ? ARM64::LDRSWpre_isel : ARM64::LDRSWpost_isel;
-    else {
-      Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel;
-      InsertTo64 = true;
-      // The result of the load is only i32. It's the subreg_to_reg that makes
-      // it into an i64.
-      DstVT = MVT::i32;
-    }
-  } else if (VT == MVT::i16) {
-    if (ExtType == ISD::SEXTLOAD) {
-      if (DstVT == MVT::i64)
-        Opcode = IsPre ? ARM64::LDRSHXpre_isel : ARM64::LDRSHXpost_isel;
-      else
-        Opcode = IsPre ? ARM64::LDRSHWpre_isel : ARM64::LDRSHWpost_isel;
-    } else {
-      Opcode = IsPre ? ARM64::LDRHHpre_isel : ARM64::LDRHHpost_isel;
-      InsertTo64 = DstVT == MVT::i64;
-      // The result of the load is only i32. It's the subreg_to_reg that makes
-      // it into an i64.
-      DstVT = MVT::i32;
-    }
-  } else if (VT == MVT::i8) {
-    if (ExtType == ISD::SEXTLOAD) {
-      if (DstVT == MVT::i64)
-        Opcode = IsPre ? ARM64::LDRSBXpre_isel : ARM64::LDRSBXpost_isel;
-      else
-        Opcode = IsPre ? ARM64::LDRSBWpre_isel : ARM64::LDRSBWpost_isel;
-    } else {
-      Opcode = IsPre ? ARM64::LDRBBpre_isel : ARM64::LDRBBpost_isel;
-      InsertTo64 = DstVT == MVT::i64;
-      // The result of the load is only i32. It's the subreg_to_reg that makes
-      // it into an i64.
-      DstVT = MVT::i32;
-    }
-  } else if (VT == MVT::f32) {
-    Opcode = IsPre ? ARM64::LDRSpre_isel : ARM64::LDRSpost_isel;
-  } else if (VT == MVT::f64) {
-    Opcode = IsPre ? ARM64::LDRDpre_isel : ARM64::LDRDpost_isel;
-  } else
-    return NULL;
-  SDValue Chain = LD->getChain();
-  SDValue Base = LD->getBasePtr();
-  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
-  int OffsetVal = (int)OffsetOp->getZExtValue();
-  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
-  SDValue Ops[] = { Base, Offset, Chain };
-  SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), DstVT, MVT::i64,
-                                       MVT::Other, Ops);
-  // Either way, we're replacing the node, so tell the caller that.
-  Done = true;
-  if (InsertTo64) {
-    SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
-    SDNode *Sub = CurDAG->getMachineNode(
-        ARM64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
-        CurDAG->getTargetConstant(0, MVT::i64), SDValue(Res, 0), SubReg);
-    ReplaceUses(SDValue(N, 0), SDValue(Sub, 0));
-    ReplaceUses(SDValue(N, 1), SDValue(Res, 1));
-    ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
-    return 0;
-  }
-  return Res;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
-                                      unsigned SubRegIdx) {
-  SDLoc dl(N);
-  EVT VT = N->getValueType(0);
-  SDValue Chain = N->getOperand(0);
-
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(N->getOperand(2)); // Mem operand;
-  Ops.push_back(Chain);
-
-  std::vector<EVT> ResTys;
-  ResTys.push_back(MVT::Untyped);
-  ResTys.push_back(MVT::Other);
-
-  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
-  SDValue SuperReg = SDValue(Ld, 0);
-
-  // MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  // MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  // cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
-
-  switch (NumVecs) {
-  case 4:
-    ReplaceUses(SDValue(N, 3), CurDAG->getTargetExtractSubreg(SubRegIdx + 3, dl,
-                                                              VT, SuperReg));
-  // FALLTHROUGH
-  case 3:
-    ReplaceUses(SDValue(N, 2), CurDAG->getTargetExtractSubreg(SubRegIdx + 2, dl,
-                                                              VT, SuperReg));
-  // FALLTHROUGH
-  case 2:
-    ReplaceUses(SDValue(N, 1), CurDAG->getTargetExtractSubreg(SubRegIdx + 1, dl,
-                                                              VT, SuperReg));
-    ReplaceUses(SDValue(N, 0),
-                CurDAG->getTargetExtractSubreg(SubRegIdx, dl, VT, SuperReg));
-    break;
-  case 1:
-    ReplaceUses(SDValue(N, 0), SuperReg);
-    break;
-  }
-
-  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
-
-  return 0;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
-                                       unsigned Opc) {
-  SDLoc dl(N);
-  EVT VT = N->getOperand(2)->getValueType(0);
-
-  // Form a REG_SEQUENCE to force register allocation.
-  bool Is128Bit = VT.getSizeInBits() == 128;
-  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
-  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
-
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(RegSeq);
-  Ops.push_back(N->getOperand(NumVecs + 2));
-  Ops.push_back(N->getOperand(0));
-  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
-
-  return St;
-}
-
-/// WidenVector - Given a value in the V64 register class, produce the
-/// equivalent value in the V128 register class.
-class WidenVector {
-  SelectionDAG &DAG;
-
-public:
-  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
-
-  SDValue operator()(SDValue V64Reg) {
-    EVT VT = V64Reg.getValueType();
-    unsigned NarrowSize = VT.getVectorNumElements();
-    MVT EltTy = VT.getVectorElementType().getSimpleVT();
-    MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
-    SDLoc DL(V64Reg);
-
-    SDValue Undef =
-        SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
-    return DAG.getTargetInsertSubreg(ARM64::dsub, DL, WideTy, Undef, V64Reg);
-  }
-};
-
-/// NarrowVector - Given a value in the V128 register class, produce the
-/// equivalent value in the V64 register class.
-static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
-  EVT VT = V128Reg.getValueType();
-  unsigned WideSize = VT.getVectorNumElements();
-  MVT EltTy = VT.getVectorElementType().getSimpleVT();
-  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
-
-  return DAG.getTargetExtractSubreg(ARM64::dsub, SDLoc(V128Reg), NarrowTy,
-                                    V128Reg);
-}
-
-SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
-                                          unsigned Opc) {
-  SDLoc dl(N);
-  EVT VT = N->getValueType(0);
-  bool Narrow = VT.getSizeInBits() == 64;
-
-  // Form a REG_SEQUENCE to force register allocation.
-  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
-
-  if (Narrow)
-    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
-                   WidenVector(*CurDAG));
-
-  SDValue RegSeq = createQTuple(Regs);
-
-  std::vector<EVT> ResTys;
-  ResTys.push_back(MVT::Untyped);
-  ResTys.push_back(MVT::Other);
-
-  unsigned LaneNo =
-      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
-
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(RegSeq);
-  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
-  Ops.push_back(N->getOperand(NumVecs + 3));
-  Ops.push_back(N->getOperand(0));
-  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
-  SDValue SuperReg = SDValue(Ld, 0);
-
-  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
-  switch (NumVecs) {
-  case 4: {
-    SDValue NV3 =
-        CurDAG->getTargetExtractSubreg(ARM64::qsub3, dl, WideVT, SuperReg);
-    if (Narrow)
-      ReplaceUses(SDValue(N, 3), NarrowVector(NV3, *CurDAG));
-    else
-      ReplaceUses(SDValue(N, 3), NV3);
-  }
-  // FALLTHROUGH
-  case 3: {
-    SDValue NV2 =
-        CurDAG->getTargetExtractSubreg(ARM64::qsub2, dl, WideVT, SuperReg);
-    if (Narrow)
-      ReplaceUses(SDValue(N, 2), NarrowVector(NV2, *CurDAG));
-    else
-      ReplaceUses(SDValue(N, 2), NV2);
-  }
-  // FALLTHROUGH
-  case 2: {
-    SDValue NV1 =
-        CurDAG->getTargetExtractSubreg(ARM64::qsub1, dl, WideVT, SuperReg);
-    SDValue NV0 =
-        CurDAG->getTargetExtractSubreg(ARM64::qsub0, dl, WideVT, SuperReg);
-    if (Narrow) {
-      ReplaceUses(SDValue(N, 1), NarrowVector(NV1, *CurDAG));
-      ReplaceUses(SDValue(N, 0), NarrowVector(NV0, *CurDAG));
-    } else {
-      ReplaceUses(SDValue(N, 1), NV1);
-      ReplaceUses(SDValue(N, 0), NV0);
-    }
-    break;
-  }
-  }
-
-  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
-
-  return Ld;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
-                                           unsigned Opc) {
-  SDLoc dl(N);
-  EVT VT = N->getOperand(2)->getValueType(0);
-  bool Narrow = VT.getSizeInBits() == 64;
-
-  // Form a REG_SEQUENCE to force register allocation.
-  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
-
-  if (Narrow)
-    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
-                   WidenVector(*CurDAG));
-
-  SDValue RegSeq = createQTuple(Regs);
-
-  unsigned LaneNo =
-      cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
-
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(RegSeq);
-  Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
-  Ops.push_back(N->getOperand(NumVecs + 3));
-  Ops.push_back(N->getOperand(0));
-  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
-
-  // Transfer memoperands.
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
-
-  return St;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
-                                        unsigned Op16, unsigned Op32,
-                                        unsigned Op64) {
-  // Mostly direct translation to the given operations, except that we preserve
-  // the AtomicOrdering for use later on.
-  AtomicSDNode *AN = cast<AtomicSDNode>(Node);
-  EVT VT = AN->getMemoryVT();
-
-  unsigned Op;
-  if (VT == MVT::i8)
-    Op = Op8;
-  else if (VT == MVT::i16)
-    Op = Op16;
-  else if (VT == MVT::i32)
-    Op = Op32;
-  else if (VT == MVT::i64)
-    Op = Op64;
-  else
-    llvm_unreachable("Unexpected atomic operation");
-
-  SmallVector<SDValue, 4> Ops;
-  for (unsigned i = 1; i < AN->getNumOperands(); ++i)
-    Ops.push_back(AN->getOperand(i));
-
-  Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
-  Ops.push_back(AN->getOperand(0)); // Chain moves to the end
-
-  return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other,
-                              &Ops[0], Ops.size());
-}
-
-static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
-                                       unsigned &Opc, SDValue &Opd0,
-                                       unsigned &LSB, unsigned &MSB,
-                                       unsigned NumberOfIgnoredLowBits,
-                                       bool BiggerPattern) {
-  assert(N->getOpcode() == ISD::AND &&
-         "N must be a AND operation to call this function");
-
-  EVT VT = N->getValueType(0);
-
-  // Here we can test the type of VT and return false when the type does not
-  // match, but since it is done prior to that call in the current context
-  // we turned that into an assert to avoid redundant code.
-  assert((VT == MVT::i32 || VT == MVT::i64) &&
-         "Type checking must have been done before calling this function");
-
-  // FIXME: simplify-demanded-bits in DAGCombine will probably have
-  // changed the AND node to a 32-bit mask operation. We'll have to
-  // undo that as part of the transform here if we want to catch all
-  // the opportunities.
-  // Currently the NumberOfIgnoredLowBits argument helps to recover
-  // form these situations when matching bigger pattern (bitfield insert).
-
-  // For unsigned extracts, check for a shift right and mask
-  uint64_t And_imm = 0;
-  if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
-    return false;
-
-  const SDNode *Op0 = N->getOperand(0).getNode();
-
-  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
-  // simplified. Try to undo that
-  And_imm |= (1 << NumberOfIgnoredLowBits) - 1;
-
-  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
-  if (And_imm & (And_imm + 1))
-    return false;
-
-  bool ClampMSB = false;
-  uint64_t Srl_imm = 0;
-  // Handle the SRL + ANY_EXTEND case.
-  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
-      isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
-    // Extend the incoming operand of the SRL to 64-bit.
-    Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
-    // Make sure to clamp the MSB so that we preserve the semantics of the
-    // original operations.
-    ClampMSB = true;
-  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
-    Opd0 = Op0->getOperand(0);
-  } else if (BiggerPattern) {
-    // Let's pretend a 0 shift right has been performed.
-    // The resulting code will be at least as good as the original one
-    // plus it may expose more opportunities for bitfield insert pattern.
-    // FIXME: Currently we limit this to the bigger pattern, because
-    // some optimizations expect AND and not UBFM
-    Opd0 = N->getOperand(0);
-  } else
-    return false;
-
-  assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
-         "bad amount in shift node!");
-
-  LSB = Srl_imm;
-  MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
-                                  : CountTrailingOnes_64(And_imm)) -
-        1;
-  if (ClampMSB)
-    // Since we're moving the extend before the right shift operation, we need
-    // to clamp the MSB to make sure we don't shift in undefined bits instead of
-    // the zeros which would get shifted in with the original right shift
-    // operation.
-    MSB = MSB > 31 ? 31 : MSB;
-
-  Opc = VT == MVT::i32 ? ARM64::UBFMWri : ARM64::UBFMXri;
-  return true;
-}
-
-static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
-                                     unsigned &LSB, unsigned &MSB) {
-  // We are looking for the following pattern which basically extracts a single
-  // bit from the source value and places it in the LSB of the destination
-  // value, all other bits of the destination value or set to zero:
-  //
-  // Value2 = AND Value, MaskImm
-  // SRL Value2, ShiftImm
-  //
-  // with MaskImm >> ShiftImm == 1.
-  //
-  // This gets selected into a single UBFM:
-  //
-  // UBFM Value, ShiftImm, ShiftImm
-  //
-
-  if (N->getOpcode() != ISD::SRL)
-    return false;
-
-  uint64_t And_mask = 0;
-  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
-    return false;
-
-  Opd0 = N->getOperand(0).getOperand(0);
-
-  uint64_t Srl_imm = 0;
-  if (!isIntImmediate(N->getOperand(1), Srl_imm))
-    return false;
-
-  // Check whether we really have a one bit extract here.
-  if (And_mask >> Srl_imm == 0x1) {
-    if (N->getValueType(0) == MVT::i32)
-      Opc = ARM64::UBFMWri;
-    else
-      Opc = ARM64::UBFMXri;
-
-    LSB = MSB = Srl_imm;
-
-    return true;
-  }
-
-  return false;
-}
-
-static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
-                                       unsigned &LSB, unsigned &MSB,
-                                       bool BiggerPattern) {
-  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
-         "N must be a SHR/SRA operation to call this function");
-
-  EVT VT = N->getValueType(0);
-
-  // Here we can test the type of VT and return false when the type does not
-  // match, but since it is done prior to that call in the current context
-  // we turned that into an assert to avoid redundant code.
-  assert((VT == MVT::i32 || VT == MVT::i64) &&
-         "Type checking must have been done before calling this function");
-
-  // Check for AND + SRL doing a one bit extract.
-  if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
-    return true;
-
-  // we're looking for a shift of a shift
-  uint64_t Shl_imm = 0;
-  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
-    Opd0 = N->getOperand(0).getOperand(0);
-  } else if (BiggerPattern) {
-    // Let's pretend a 0 shift left has been performed.
-    // FIXME: Currently we limit this to the bigger pattern case,
-    // because some optimizations expect AND and not UBFM
-    Opd0 = N->getOperand(0);
-  } else
-    return false;
-
-  assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
-  uint64_t Srl_imm = 0;
-  if (!isIntImmediate(N->getOperand(1), Srl_imm))
-    return false;
-
-  assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
-         "bad amount in shift node!");
-  // Note: The width operand is encoded as width-1.
-  unsigned Width = VT.getSizeInBits() - Srl_imm - 1;
-  int sLSB = Srl_imm - Shl_imm;
-  if (sLSB < 0)
-    return false;
-  LSB = sLSB;
-  MSB = LSB + Width;
-  // SRA requires a signed extraction
-  if (VT == MVT::i32)
-    Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMWri : ARM64::UBFMWri;
-  else
-    Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMXri : ARM64::UBFMXri;
-  return true;
-}
-
-static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
-                                SDValue &Opd0, unsigned &LSB, unsigned &MSB,
-                                unsigned NumberOfIgnoredLowBits = 0,
-                                bool BiggerPattern = false) {
-  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
-    return false;
-
-  switch (N->getOpcode()) {
-  default:
-    if (!N->isMachineOpcode())
-      return false;
-    break;
-  case ISD::AND:
-    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
-                                      NumberOfIgnoredLowBits, BiggerPattern);
-  case ISD::SRL:
-  case ISD::SRA:
-    return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
-  }
-
-  unsigned NOpc = N->getMachineOpcode();
-  switch (NOpc) {
-  default:
-    return false;
-  case ARM64::SBFMWri:
-  case ARM64::UBFMWri:
-  case ARM64::SBFMXri:
-  case ARM64::UBFMXri:
-    Opc = NOpc;
-    Opd0 = N->getOperand(0);
-    LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
-    MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
-    return true;
-  }
-  // Unreachable
-  return false;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
-  unsigned Opc, LSB, MSB;
-  SDValue Opd0;
-  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
-    return NULL;
-
-  EVT VT = N->getValueType(0);
-  SDValue Ops[] = { Opd0, CurDAG->getTargetConstant(LSB, VT),
-                    CurDAG->getTargetConstant(MSB, VT) };
-  return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 3);
-}
-
-// Is mask a i32 or i64 binary sequence 1..10..0 and
-// CountTrailingZeros(mask) == ExpectedTrailingZeros
-static bool isHighMask(uint64_t Mask, unsigned ExpectedTrailingZeros,
-                       unsigned NumberOfIgnoredHighBits, EVT VT) {
-  assert((VT == MVT::i32 || VT == MVT::i64) &&
-         "i32 or i64 mask type expected!");
-
-  uint64_t ExpectedMask;
-  if (VT == MVT::i32) {
-    uint32_t ExpectedMaski32 = ~0 << ExpectedTrailingZeros;
-    ExpectedMask = ExpectedMaski32;
-    if (NumberOfIgnoredHighBits) {
-      uint32_t highMask = ~0 << (32 - NumberOfIgnoredHighBits);
-      Mask |= highMask;
-    }
-  } else {
-    ExpectedMask = ((uint64_t) ~0) << ExpectedTrailingZeros;
-    if (NumberOfIgnoredHighBits)
-      Mask |= ((uint64_t) ~0) << (64 - NumberOfIgnoredHighBits);
-  }
-
-  return Mask == ExpectedMask;
-}
-
-// Look for bits that will be useful for later uses.
-// A bit is consider useless as soon as it is dropped and never used
-// before it as been dropped.
-// E.g., looking for useful bit of x
-// 1. y = x & 0x7
-// 2. z = y >> 2
-// After #1, x useful bits are 0x7, then the useful bits of x, live through
-// y.
-// After #2, the useful bits of x are 0x4.
-// However, if x is used on an unpredicatable instruction, then all its bits
-// are useful.
-// E.g.
-// 1. y = x & 0x7
-// 2. z = y >> 2
-// 3. str x, [@x]
-static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
-
-static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
-                                              unsigned Depth) {
-  uint64_t Imm =
-      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
-  Imm = ARM64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
-  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
-  getUsefulBits(Op, UsefulBits, Depth + 1);
-}
-
-static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
-                                             uint64_t Imm, uint64_t MSB,
-                                             unsigned Depth) {
-  // inherit the bitwidth value
-  APInt OpUsefulBits(UsefulBits);
-  OpUsefulBits = 1;
-
-  if (MSB >= Imm) {
-    OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
-    --OpUsefulBits;
-    // The interesting part will be in the lower part of the result
-    getUsefulBits(Op, OpUsefulBits, Depth + 1);
-    // The interesting part was starting at Imm in the argument
-    OpUsefulBits = OpUsefulBits.shl(Imm);
-  } else {
-    OpUsefulBits = OpUsefulBits.shl(MSB + 1);
-    --OpUsefulBits;
-    // The interesting part will be shifted in the result
-    OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
-    getUsefulBits(Op, OpUsefulBits, Depth + 1);
-    // The interesting part was at zero in the argument
-    OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
-  }
-
-  UsefulBits &= OpUsefulBits;
-}
-
-static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
-                                  unsigned Depth) {
-  uint64_t Imm =
-      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
-  uint64_t MSB =
-      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
-
-  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
-}
-
-static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
-                                              unsigned Depth) {
-  uint64_t ShiftTypeAndValue =
-      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
-  APInt Mask(UsefulBits);
-  Mask.clearAllBits();
-  Mask.flipAllBits();
-
-  if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSL) {
-    // Shift Left
-    uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue);
-    Mask = Mask.shl(ShiftAmt);
-    getUsefulBits(Op, Mask, Depth + 1);
-    Mask = Mask.lshr(ShiftAmt);
-  } else if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSR) {
-    // Shift Right
-    // We do not handle ARM64_AM::ASR, because the sign will change the
-    // number of useful bits
-    uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue);
-    Mask = Mask.lshr(ShiftAmt);
-    getUsefulBits(Op, Mask, Depth + 1);
-    Mask = Mask.shl(ShiftAmt);
-  } else
-    return;
-
-  UsefulBits &= Mask;
-}
-
-static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
-                                 unsigned Depth) {
-  uint64_t Imm =
-      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
-  uint64_t MSB =
-      cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
-
-  if (Op.getOperand(1) == Orig)
-    return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
-
-  APInt OpUsefulBits(UsefulBits);
-  OpUsefulBits = 1;
-
-  if (MSB >= Imm) {
-    OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
-    --OpUsefulBits;
-    UsefulBits &= ~OpUsefulBits;
-    getUsefulBits(Op, UsefulBits, Depth + 1);
-  } else {
-    OpUsefulBits = OpUsefulBits.shl(MSB + 1);
-    --OpUsefulBits;
-    UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
-    getUsefulBits(Op, UsefulBits, Depth + 1);
-  }
-}
-
-static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
-                                SDValue Orig, unsigned Depth) {
-
-  // Users of this node should have already been instruction selected
-  // FIXME: Can we turn that into an assert?
-  if (!UserNode->isMachineOpcode())
-    return;
-
-  switch (UserNode->getMachineOpcode()) {
-  default:
-    return;
-  case ARM64::ANDSWri:
-  case ARM64::ANDSXri:
-  case ARM64::ANDWri:
-  case ARM64::ANDXri:
-    // We increment Depth only when we call the getUsefulBits
-    return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
-                                             Depth);
-  case ARM64::UBFMWri:
-  case ARM64::UBFMXri:
-    return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
-
-  case ARM64::ORRWrs:
-  case ARM64::ORRXrs:
-    if (UserNode->getOperand(1) != Orig)
-      return;
-    return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
-                                             Depth);
-  case ARM64::BFMWri:
-  case ARM64::BFMXri:
-    return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
-  }
-}
-
-static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
-  if (Depth >= 6)
-    return;
-  // Initialize UsefulBits
-  if (!Depth) {
-    unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
-    // At the beginning, assume every produced bits is useful
-    UsefulBits = APInt(Bitwidth, 0);
-    UsefulBits.flipAllBits();
-  }
-  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
-
-  for (SDNode::use_iterator UseIt = Op.getNode()->use_begin(),
-                            UseEnd = Op.getNode()->use_end();
-       UseIt != UseEnd; ++UseIt) {
-    // A use cannot produce useful bits
-    APInt UsefulBitsForUse = APInt(UsefulBits);
-    getUsefulBitsForUse(*UseIt, UsefulBitsForUse, Op, Depth);
-    UsersUsefulBits |= UsefulBitsForUse;
-  }
-  // UsefulBits contains the produced bits that are meaningful for the
-  // current definition, thus a user cannot make a bit meaningful at
-  // this point
-  UsefulBits &= UsersUsefulBits;
-}
-
-// Given a OR operation, check if we have the following pattern
-// ubfm c, b, imm, imm2 (or something that does the same jobs, see
-//                       isBitfieldExtractOp)
-// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
-//                 countTrailingZeros(mask2) == imm2 - imm + 1
-// f = d | c
-// if yes, given reference arguments will be update so that one can replace
-// the OR instruction with:
-// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
-static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Opd0,
-                                     SDValue &Opd1, unsigned &LSB,
-                                     unsigned &MSB, SelectionDAG *CurDAG) {
-  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
-
-  // Set Opc
-  EVT VT = N->getValueType(0);
-  if (VT == MVT::i32)
-    Opc = ARM64::BFMWri;
-  else if (VT == MVT::i64)
-    Opc = ARM64::BFMXri;
-  else
-    return false;
-
-  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
-  // have the expected shape. Try to undo that.
-  APInt UsefulBits;
-  getUsefulBits(SDValue(N, 0), UsefulBits);
-
-  unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
-  unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
-
-  // OR is commutative, check both possibilities (does llvm provide a
-  // way to do that directely, e.g., via code matcher?)
-  SDValue OrOpd1Val = N->getOperand(1);
-  SDNode *OrOpd0 = N->getOperand(0).getNode();
-  SDNode *OrOpd1 = N->getOperand(1).getNode();
-  for (int i = 0; i < 2;
-       ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
-    unsigned BFXOpc;
-    // Set Opd1, LSB and MSB arguments by looking for
-    // c = ubfm b, imm, imm2
-    if (!isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Opd1, LSB, MSB,
-                             NumberOfIgnoredLowBits, true))
-      continue;
-
-    // Check that the returned opcode is compatible with the pattern,
-    // i.e., same type and zero extended (U and not S)
-    if ((BFXOpc != ARM64::UBFMXri && VT == MVT::i64) ||
-        (BFXOpc != ARM64::UBFMWri && VT == MVT::i32))
-      continue;
-
-    // Compute the width of the bitfield insertion
-    int sMSB = MSB - LSB + 1;
-    // FIXME: This constraints is to catch bitfield insertion we may
-    // want to widen the pattern if we want to grab general bitfied
-    // move case
-    if (sMSB <= 0)
-      continue;
-
-    // Check the second part of the pattern
-    EVT VT = OrOpd1->getValueType(0);
-    if (VT != MVT::i32 && VT != MVT::i64)
-      continue;
-
-    // Compute the Known Zero for the candidate of the first operand.
-    // This allows to catch more general case than just looking for
-    // AND with imm. Indeed, simplify-demanded-bits may have removed
-    // the AND instruction because it proves it was useless.
-    APInt KnownZero, KnownOne;
-    CurDAG->ComputeMaskedBits(OrOpd1Val, KnownZero, KnownOne);
-
-    // Check if there is enough room for the second operand to appear
-    // in the first one
-    if (KnownZero.countTrailingOnes() < (unsigned)sMSB)
-      continue;
-
-    // Set the first operand
-    uint64_t Imm;
-    if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
-        isHighMask(Imm, sMSB, NumberOfIgnoredHighBits, VT))
-      // In that case, we can eliminate the AND
-      Opd0 = OrOpd1->getOperand(0);
-    else
-      // Maybe the AND has been removed by simplify-demanded-bits
-      // or is useful because it discards more bits
-      Opd0 = OrOpd1Val;
-
-    // both parts match
-    return true;
-  }
-
-  return false;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
-  if (N->getOpcode() != ISD::OR)
-    return NULL;
-
-  unsigned Opc;
-  unsigned LSB, MSB;
-  SDValue Opd0, Opd1;
-
-  if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
-    return NULL;
-
-  EVT VT = N->getValueType(0);
-  SDValue Ops[] = { Opd0,
-                    Opd1,
-                    CurDAG->getTargetConstant(LSB, VT),
-                    CurDAG->getTargetConstant(MSB, VT) };
-  return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 4);
-}
-
-SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) {
-  EVT VT = N->getValueType(0);
-  unsigned Variant;
-  unsigned Opc;
-  unsigned FRINTXOpcs[] = { ARM64::FRINTXSr, ARM64::FRINTXDr };
-
-  if (VT == MVT::f32) {
-    Variant = 0;
-  } else if (VT == MVT::f64) {
-    Variant = 1;
-  } else
-    return 0; // Unrecognized argument type. Fall back on default codegen.
-
-  // Pick the FRINTX variant needed to set the flags.
-  unsigned FRINTXOpc = FRINTXOpcs[Variant];
-
-  switch (N->getOpcode()) {
-  default:
-    return 0; // Unrecognized libm ISD node. Fall back on default codegen.
-  case ISD::FCEIL: {
-    unsigned FRINTPOpcs[] = { ARM64::FRINTPSr, ARM64::FRINTPDr };
-    Opc = FRINTPOpcs[Variant];
-    break;
-  }
-  case ISD::FFLOOR: {
-    unsigned FRINTMOpcs[] = { ARM64::FRINTMSr, ARM64::FRINTMDr };
-    Opc = FRINTMOpcs[Variant];
-    break;
-  }
-  case ISD::FTRUNC: {
-    unsigned FRINTZOpcs[] = { ARM64::FRINTZSr, ARM64::FRINTZDr };
-    Opc = FRINTZOpcs[Variant];
-    break;
-  }
-  case ISD::FROUND: {
-    unsigned FRINTAOpcs[] = { ARM64::FRINTASr, ARM64::FRINTADr };
-    Opc = FRINTAOpcs[Variant];
-    break;
-  }
-  }
-
-  SDLoc dl(N);
-  SDValue In = N->getOperand(0);
-  SmallVector<SDValue, 2> Ops;
-  Ops.push_back(In);
-
-  if (!TM.Options.UnsafeFPMath) {
-    SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
-    Ops.push_back(SDValue(FRINTX, 1));
-  }
-
-  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
-}
-
-SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
-  // Dump information about the Node being selected
-  DEBUG(errs() << "Selecting: ");
-  DEBUG(Node->dump(CurDAG));
-  DEBUG(errs() << "\n");
-
-  // If we have a custom node, we already have selected!
-  if (Node->isMachineOpcode()) {
-    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
-    Node->setNodeId(-1);
-    return NULL;
-  }
-
-  // Few custom selection stuff.
-  SDNode *ResNode = 0;
-  EVT VT = Node->getValueType(0);
-
-  switch (Node->getOpcode()) {
-  default:
-    break;
-
-  case ISD::ADD:
-    if (SDNode *I = SelectMLAV64LaneV128(Node))
-      return I;
-    break;
-
-  case ISD::ATOMIC_LOAD_ADD:
-    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_ADD_I8,
-                        ARM64::ATOMIC_LOAD_ADD_I16, ARM64::ATOMIC_LOAD_ADD_I32,
-                        ARM64::ATOMIC_LOAD_ADD_I64);
-  case ISD::ATOMIC_LOAD_SUB:
-    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_SUB_I8,
-                        ARM64::ATOMIC_LOAD_SUB_I16, ARM64::ATOMIC_LOAD_SUB_I32,
-                        ARM64::ATOMIC_LOAD_SUB_I64);
-  case ISD::ATOMIC_LOAD_AND:
-    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_AND_I8,
-                        ARM64::ATOMIC_LOAD_AND_I16, ARM64::ATOMIC_LOAD_AND_I32,
-                        ARM64::ATOMIC_LOAD_AND_I64);
-  case ISD::ATOMIC_LOAD_OR:
-    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_OR_I8,
-                        ARM64::ATOMIC_LOAD_OR_I16, ARM64::ATOMIC_LOAD_OR_I32,
-                        ARM64::ATOMIC_LOAD_OR_I64);
-  case ISD::ATOMIC_LOAD_XOR:
-    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_XOR_I8,
-                        ARM64::ATOMIC_LOAD_XOR_I16, ARM64::ATOMIC_LOAD_XOR_I32,
-                        ARM64::ATOMIC_LOAD_XOR_I64);
-  case ISD::ATOMIC_LOAD_NAND:
-    return SelectAtomic(
-        Node, ARM64::ATOMIC_LOAD_NAND_I8, ARM64::ATOMIC_LOAD_NAND_I16,
-        ARM64::ATOMIC_LOAD_NAND_I32, ARM64::ATOMIC_LOAD_NAND_I64);
-  case ISD::ATOMIC_LOAD_MIN:
-    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MIN_I8,
-                        ARM64::ATOMIC_LOAD_MIN_I16, ARM64::ATOMIC_LOAD_MIN_I32,
-                        ARM64::ATOMIC_LOAD_MIN_I64);
-  case ISD::ATOMIC_LOAD_MAX:
-    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MAX_I8,
-                        ARM64::ATOMIC_LOAD_MAX_I16, ARM64::ATOMIC_LOAD_MAX_I32,
-                        ARM64::ATOMIC_LOAD_MAX_I64);
-  case ISD::ATOMIC_LOAD_UMIN:
-    return SelectAtomic(
-        Node, ARM64::ATOMIC_LOAD_UMIN_I8, ARM64::ATOMIC_LOAD_UMIN_I16,
-        ARM64::ATOMIC_LOAD_UMIN_I32, ARM64::ATOMIC_LOAD_UMIN_I64);
-  case ISD::ATOMIC_LOAD_UMAX:
-    return SelectAtomic(
-        Node, ARM64::ATOMIC_LOAD_UMAX_I8, ARM64::ATOMIC_LOAD_UMAX_I16,
-        ARM64::ATOMIC_LOAD_UMAX_I32, ARM64::ATOMIC_LOAD_UMAX_I64);
-  case ISD::ATOMIC_SWAP:
-    return SelectAtomic(Node, ARM64::ATOMIC_SWAP_I8, ARM64::ATOMIC_SWAP_I16,
-                        ARM64::ATOMIC_SWAP_I32, ARM64::ATOMIC_SWAP_I64);
-  case ISD::ATOMIC_CMP_SWAP:
-    return SelectAtomic(Node, ARM64::ATOMIC_CMP_SWAP_I8,
-                        ARM64::ATOMIC_CMP_SWAP_I16, ARM64::ATOMIC_CMP_SWAP_I32,
-                        ARM64::ATOMIC_CMP_SWAP_I64);
-
-  case ISD::LOAD: {
-    // Try to select as an indexed load. Fall through to normal processing
-    // if we can't.
-    bool Done = false;
-    SDNode *I = SelectIndexedLoad(Node, Done);
-    if (Done)
-      return I;
-    break;
-  }
-
-  case ISD::SRL:
-  case ISD::AND:
-  case ISD::SRA:
-    if (SDNode *I = SelectBitfieldExtractOp(Node))
-      return I;
-    break;
-
-  case ISD::OR:
-    if (SDNode *I = SelectBitfieldInsertOp(Node))
-      return I;
-    break;
-
-  case ISD::EXTRACT_VECTOR_ELT: {
-    // Extracting lane zero is a special case where we can just use a plain
-    // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
-    // the rest of the compiler, especially the register allocator and copyi
-    // propagation, to reason about, so is preferred when it's possible to
-    // use it.
-    ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
-    // Bail and use the default Select() for non-zero lanes.
-    if (LaneNode->getZExtValue() != 0)
-      break;
-    // If the element type is not the same as the result type, likewise
-    // bail and use the default Select(), as there's more to do than just
-    // a cross-class COPY. This catches extracts of i8 and i16 elements
-    // since they will need an explicit zext.
-    if (VT != Node->getOperand(0).getValueType().getVectorElementType())
-      break;
-    unsigned SubReg;
-    switch (Node->getOperand(0)
-                .getValueType()
-                .getVectorElementType()
-                .getSizeInBits()) {
-    default:
-      assert(0 && "Unexpected vector element type!");
-    case 64:
-      SubReg = ARM64::dsub;
-      break;
-    case 32:
-      SubReg = ARM64::ssub;
-      break;
-    case 16: // FALLTHROUGH
-    case 8:
-      llvm_unreachable("unexpected zext-requiring extract element!");
-    }
-    SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
-                                                     Node->getOperand(0));
-    DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
-    DEBUG(Extract->dumpr(CurDAG));
-    DEBUG(dbgs() << "\n");
-    return Extract.getNode();
-  }
-  case ISD::Constant: {
-    // Materialize zero constants as copies from WZR/XZR.  This allows
-    // the coalescer to propagate these into other instructions.
-    ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
-    if (ConstNode->isNullValue()) {
-      if (VT == MVT::i32)
-        return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
-                                      ARM64::WZR, MVT::i32).getNode();
-      else if (VT == MVT::i64)
-        return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
-                                      ARM64::XZR, MVT::i64).getNode();
-    }
-    break;
-  }
-
-  case ISD::FrameIndex: {
-    // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
-    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
-    unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0);
-    const TargetLowering *TLI = getTargetLowering();
-    SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
-    SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
-                      CurDAG->getTargetConstant(Shifter, MVT::i32) };
-    return CurDAG->SelectNodeTo(Node, ARM64::ADDXri, MVT::i64, Ops, 3);
-  }
-  case ISD::INTRINSIC_W_CHAIN: {
-    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
-    switch (IntNo) {
-    default:
-      break;
-    case Intrinsic::arm64_ldxp: {
-      SDValue MemAddr = Node->getOperand(2);
-      SDLoc DL(Node);
-      SDValue Chain = Node->getOperand(0);
-
-      SDNode *Ld = CurDAG->getMachineNode(ARM64::LDXPX, DL, MVT::i64, MVT::i64,
-                                          MVT::Other, MemAddr, Chain);
-
-      // Transfer memoperands.
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
-      cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
-      return Ld;
-    }
-    case Intrinsic::arm64_stxp: {
-      SDLoc DL(Node);
-      SDValue Chain = Node->getOperand(0);
-      SDValue ValLo = Node->getOperand(2);
-      SDValue ValHi = Node->getOperand(3);
-      SDValue MemAddr = Node->getOperand(4);
-
-      // Place arguments in the right order.
-      SmallVector<SDValue, 7> Ops;
-      Ops.push_back(ValLo);
-      Ops.push_back(ValHi);
-      Ops.push_back(MemAddr);
-      Ops.push_back(Chain);
-
-      SDNode *St =
-          CurDAG->getMachineNode(ARM64::STXPX, DL, MVT::i32, MVT::Other, Ops);
-      // Transfer memoperands.
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
-      cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
-
-      return St;
-    }
-    case Intrinsic::arm64_neon_ld1x2:
-      if (VT == MVT::v8i8)
-        return SelectLoad(Node, 2, ARM64::LD1Twov8b, ARM64::dsub0);
-      else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 2, ARM64::LD1Twov16b, ARM64::qsub0);
-      else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 2, ARM64::LD1Twov4h, ARM64::dsub0);
-      else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 2, ARM64::LD1Twov8h, ARM64::qsub0);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 2, ARM64::LD1Twov2s, ARM64::dsub0);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 2, ARM64::LD1Twov4s, ARM64::qsub0);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 2, ARM64::LD1Twov2d, ARM64::qsub0);
-      break;
-    case Intrinsic::arm64_neon_ld1x3:
-      if (VT == MVT::v8i8)
-        return SelectLoad(Node, 3, ARM64::LD1Threev8b, ARM64::dsub0);
-      else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 3, ARM64::LD1Threev16b, ARM64::qsub0);
-      else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 3, ARM64::LD1Threev4h, ARM64::dsub0);
-      else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 3, ARM64::LD1Threev8h, ARM64::qsub0);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 3, ARM64::LD1Threev2s, ARM64::dsub0);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 3, ARM64::LD1Threev4s, ARM64::qsub0);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 3, ARM64::LD1Threev2d, ARM64::qsub0);
-      break;
-    case Intrinsic::arm64_neon_ld1x4:
-      if (VT == MVT::v8i8)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv8b, ARM64::dsub0);
-      else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv16b, ARM64::qsub0);
-      else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv4h, ARM64::dsub0);
-      else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv8h, ARM64::qsub0);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv2s, ARM64::dsub0);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv4s, ARM64::qsub0);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv2d, ARM64::qsub0);
-      break;
-    case Intrinsic::arm64_neon_ld2:
-      if (VT == MVT::v8i8)
-        return SelectLoad(Node, 2, ARM64::LD2Twov8b, ARM64::dsub0);
-      else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 2, ARM64::LD2Twov16b, ARM64::qsub0);
-      else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 2, ARM64::LD2Twov4h, ARM64::dsub0);
-      else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 2, ARM64::LD2Twov8h, ARM64::qsub0);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 2, ARM64::LD2Twov2s, ARM64::dsub0);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 2, ARM64::LD2Twov4s, ARM64::qsub0);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 2, ARM64::LD2Twov2d, ARM64::qsub0);
-      break;
-    case Intrinsic::arm64_neon_ld3:
-      if (VT == MVT::v8i8)
-        return SelectLoad(Node, 3, ARM64::LD3Threev8b, ARM64::dsub0);
-      else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 3, ARM64::LD3Threev16b, ARM64::qsub0);
-      else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 3, ARM64::LD3Threev4h, ARM64::dsub0);
-      else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 3, ARM64::LD3Threev8h, ARM64::qsub0);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 3, ARM64::LD3Threev2s, ARM64::dsub0);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 3, ARM64::LD3Threev4s, ARM64::qsub0);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 3, ARM64::LD3Threev2d, ARM64::qsub0);
-      break;
-    case Intrinsic::arm64_neon_ld4:
-      if (VT == MVT::v8i8)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv8b, ARM64::dsub0);
-      else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv16b, ARM64::qsub0);
-      else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv4h, ARM64::dsub0);
-      else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv8h, ARM64::qsub0);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv2s, ARM64::dsub0);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv4s, ARM64::qsub0);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 4, ARM64::LD4Fourv2d, ARM64::qsub0);
-      break;
-    case Intrinsic::arm64_neon_ld2r:
-      if (VT == MVT::v8i8)
-        return SelectLoad(Node, 2, ARM64::LD2Rv8b, ARM64::dsub0);
-      else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 2, ARM64::LD2Rv16b, ARM64::qsub0);
-      else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 2, ARM64::LD2Rv4h, ARM64::dsub0);
-      else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 2, ARM64::LD2Rv8h, ARM64::qsub0);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 2, ARM64::LD2Rv2s, ARM64::dsub0);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 2, ARM64::LD2Rv4s, ARM64::qsub0);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 2, ARM64::LD2Rv1d, ARM64::dsub0);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 2, ARM64::LD2Rv2d, ARM64::qsub0);
-      break;
-    case Intrinsic::arm64_neon_ld3r:
-      if (VT == MVT::v8i8)
-        return SelectLoad(Node, 3, ARM64::LD3Rv8b, ARM64::dsub0);
-      else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 3, ARM64::LD3Rv16b, ARM64::qsub0);
-      else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 3, ARM64::LD3Rv4h, ARM64::dsub0);
-      else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 3, ARM64::LD3Rv8h, ARM64::qsub0);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 3, ARM64::LD3Rv2s, ARM64::dsub0);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 3, ARM64::LD3Rv4s, ARM64::qsub0);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 3, ARM64::LD3Rv1d, ARM64::dsub0);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 3, ARM64::LD3Rv2d, ARM64::qsub0);
-      break;
-    case Intrinsic::arm64_neon_ld4r:
-      if (VT == MVT::v8i8)
-        return SelectLoad(Node, 4, ARM64::LD4Rv8b, ARM64::dsub0);
-      else if (VT == MVT::v16i8)
-        return SelectLoad(Node, 4, ARM64::LD4Rv16b, ARM64::qsub0);
-      else if (VT == MVT::v4i16)
-        return SelectLoad(Node, 4, ARM64::LD4Rv4h, ARM64::dsub0);
-      else if (VT == MVT::v8i16)
-        return SelectLoad(Node, 4, ARM64::LD4Rv8h, ARM64::qsub0);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectLoad(Node, 4, ARM64::LD4Rv2s, ARM64::dsub0);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectLoad(Node, 4, ARM64::LD4Rv4s, ARM64::qsub0);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectLoad(Node, 4, ARM64::LD4Rv1d, ARM64::dsub0);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectLoad(Node, 4, ARM64::LD4Rv2d, ARM64::qsub0);
-      break;
-    case Intrinsic::arm64_neon_ld2lane:
-      if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectLoadLane(Node, 2, ARM64::LD2i8);
-      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectLoadLane(Node, 2, ARM64::LD2i16);
-      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
-               VT == MVT::v2f32)
-        return SelectLoadLane(Node, 2, ARM64::LD2i32);
-      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
-               VT == MVT::v1f64)
-        return SelectLoadLane(Node, 2, ARM64::LD2i64);
-      break;
-    case Intrinsic::arm64_neon_ld3lane:
-      if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectLoadLane(Node, 3, ARM64::LD3i8);
-      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectLoadLane(Node, 3, ARM64::LD3i16);
-      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
-               VT == MVT::v2f32)
-        return SelectLoadLane(Node, 3, ARM64::LD3i32);
-      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
-               VT == MVT::v1f64)
-        return SelectLoadLane(Node, 3, ARM64::LD3i64);
-      break;
-    case Intrinsic::arm64_neon_ld4lane:
-      if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectLoadLane(Node, 4, ARM64::LD4i8);
-      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectLoadLane(Node, 4, ARM64::LD4i16);
-      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
-               VT == MVT::v2f32)
-        return SelectLoadLane(Node, 4, ARM64::LD4i32);
-      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
-               VT == MVT::v1f64)
-        return SelectLoadLane(Node, 4, ARM64::LD4i64);
-      break;
-    }
-  } break;
-  case ISD::INTRINSIC_WO_CHAIN: {
-    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
-    switch (IntNo) {
-    default:
-      break;
-    case Intrinsic::arm64_neon_tbl2:
-      return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBLv8i8Two
-                                                  : ARM64::TBLv16i8Two,
-                         false);
-    case Intrinsic::arm64_neon_tbl3:
-      return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBLv8i8Three
-                                                  : ARM64::TBLv16i8Three,
-                         false);
-    case Intrinsic::arm64_neon_tbl4:
-      return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBLv8i8Four
-                                                  : ARM64::TBLv16i8Four,
-                         false);
-    case Intrinsic::arm64_neon_tbx2:
-      return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBXv8i8Two
-                                                  : ARM64::TBXv16i8Two,
-                         true);
-    case Intrinsic::arm64_neon_tbx3:
-      return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBXv8i8Three
-                                                  : ARM64::TBXv16i8Three,
-                         true);
-    case Intrinsic::arm64_neon_tbx4:
-      return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBXv8i8Four
-                                                  : ARM64::TBXv16i8Four,
-                         true);
-    case Intrinsic::arm64_neon_smull:
-    case Intrinsic::arm64_neon_umull:
-      if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
-        return N;
-      break;
-    }
-    break;
-  }
-  case ISD::INTRINSIC_VOID: {
-    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
-    if (Node->getNumOperands() >= 3)
-      VT = Node->getOperand(2)->getValueType(0);
-    switch (IntNo) {
-    default:
-      break;
-    case Intrinsic::arm64_neon_st1x2: {
-      if (VT == MVT::v8i8)
-        return SelectStore(Node, 2, ARM64::ST1Twov8b);
-      else if (VT == MVT::v16i8)
-        return SelectStore(Node, 2, ARM64::ST1Twov16b);
-      else if (VT == MVT::v4i16)
-        return SelectStore(Node, 2, ARM64::ST1Twov4h);
-      else if (VT == MVT::v8i16)
-        return SelectStore(Node, 2, ARM64::ST1Twov8h);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 2, ARM64::ST1Twov2s);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 2, ARM64::ST1Twov4s);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 2, ARM64::ST1Twov2d);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 2, ARM64::ST1Twov1d);
-      break;
-    }
-    case Intrinsic::arm64_neon_st1x3: {
-      if (VT == MVT::v8i8)
-        return SelectStore(Node, 3, ARM64::ST1Threev8b);
-      else if (VT == MVT::v16i8)
-        return SelectStore(Node, 3, ARM64::ST1Threev16b);
-      else if (VT == MVT::v4i16)
-        return SelectStore(Node, 3, ARM64::ST1Threev4h);
-      else if (VT == MVT::v8i16)
-        return SelectStore(Node, 3, ARM64::ST1Threev8h);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 3, ARM64::ST1Threev2s);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 3, ARM64::ST1Threev4s);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 3, ARM64::ST1Threev2d);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 3, ARM64::ST1Threev1d);
-      break;
-    }
-    case Intrinsic::arm64_neon_st1x4: {
-      if (VT == MVT::v8i8)
-        return SelectStore(Node, 4, ARM64::ST1Fourv8b);
-      else if (VT == MVT::v16i8)
-        return SelectStore(Node, 4, ARM64::ST1Fourv16b);
-      else if (VT == MVT::v4i16)
-        return SelectStore(Node, 4, ARM64::ST1Fourv4h);
-      else if (VT == MVT::v8i16)
-        return SelectStore(Node, 4, ARM64::ST1Fourv8h);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 4, ARM64::ST1Fourv2s);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 4, ARM64::ST1Fourv4s);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 4, ARM64::ST1Fourv2d);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 4, ARM64::ST1Fourv1d);
-      break;
-    }
-    case Intrinsic::arm64_neon_st2: {
-      if (VT == MVT::v8i8)
-        return SelectStore(Node, 2, ARM64::ST2Twov8b);
-      else if (VT == MVT::v16i8)
-        return SelectStore(Node, 2, ARM64::ST2Twov16b);
-      else if (VT == MVT::v4i16)
-        return SelectStore(Node, 2, ARM64::ST2Twov4h);
-      else if (VT == MVT::v8i16)
-        return SelectStore(Node, 2, ARM64::ST2Twov8h);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 2, ARM64::ST2Twov2s);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 2, ARM64::ST2Twov4s);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 2, ARM64::ST2Twov2d);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 2, ARM64::ST1Twov1d);
-      break;
-    }
-    case Intrinsic::arm64_neon_st3: {
-      if (VT == MVT::v8i8)
-        return SelectStore(Node, 3, ARM64::ST3Threev8b);
-      else if (VT == MVT::v16i8)
-        return SelectStore(Node, 3, ARM64::ST3Threev16b);
-      else if (VT == MVT::v4i16)
-        return SelectStore(Node, 3, ARM64::ST3Threev4h);
-      else if (VT == MVT::v8i16)
-        return SelectStore(Node, 3, ARM64::ST3Threev8h);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 3, ARM64::ST3Threev2s);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 3, ARM64::ST3Threev4s);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 3, ARM64::ST3Threev2d);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 3, ARM64::ST1Threev1d);
-      break;
-    }
-    case Intrinsic::arm64_neon_st4: {
-      if (VT == MVT::v8i8)
-        return SelectStore(Node, 4, ARM64::ST4Fourv8b);
-      else if (VT == MVT::v16i8)
-        return SelectStore(Node, 4, ARM64::ST4Fourv16b);
-      else if (VT == MVT::v4i16)
-        return SelectStore(Node, 4, ARM64::ST4Fourv4h);
-      else if (VT == MVT::v8i16)
-        return SelectStore(Node, 4, ARM64::ST4Fourv8h);
-      else if (VT == MVT::v2i32 || VT == MVT::v2f32)
-        return SelectStore(Node, 4, ARM64::ST4Fourv2s);
-      else if (VT == MVT::v4i32 || VT == MVT::v4f32)
-        return SelectStore(Node, 4, ARM64::ST4Fourv4s);
-      else if (VT == MVT::v2i64 || VT == MVT::v2f64)
-        return SelectStore(Node, 4, ARM64::ST4Fourv2d);
-      else if (VT == MVT::v1i64 || VT == MVT::v1f64)
-        return SelectStore(Node, 4, ARM64::ST1Fourv1d);
-      break;
-    }
-    case Intrinsic::arm64_neon_st2lane: {
-      if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectStoreLane(Node, 2, ARM64::ST2i8);
-      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectStoreLane(Node, 2, ARM64::ST2i16);
-      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
-               VT == MVT::v2f32)
-        return SelectStoreLane(Node, 2, ARM64::ST2i32);
-      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
-               VT == MVT::v1f64)
-        return SelectStoreLane(Node, 2, ARM64::ST2i64);
-      break;
-    }
-    case Intrinsic::arm64_neon_st3lane: {
-      if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectStoreLane(Node, 3, ARM64::ST3i8);
-      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectStoreLane(Node, 3, ARM64::ST3i16);
-      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
-               VT == MVT::v2f32)
-        return SelectStoreLane(Node, 3, ARM64::ST3i32);
-      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
-               VT == MVT::v1f64)
-        return SelectStoreLane(Node, 3, ARM64::ST3i64);
-      break;
-    }
-    case Intrinsic::arm64_neon_st4lane: {
-      if (VT == MVT::v16i8 || VT == MVT::v8i8)
-        return SelectStoreLane(Node, 4, ARM64::ST4i8);
-      else if (VT == MVT::v8i16 || VT == MVT::v4i16)
-        return SelectStoreLane(Node, 4, ARM64::ST4i16);
-      else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
-               VT == MVT::v2f32)
-        return SelectStoreLane(Node, 4, ARM64::ST4i32);
-      else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
-               VT == MVT::v1f64)
-        return SelectStoreLane(Node, 4, ARM64::ST4i64);
-      break;
-    }
-    }
-  }
-
-  case ISD::FCEIL:
-  case ISD::FFLOOR:
-  case ISD::FTRUNC:
-  case ISD::FROUND:
-    if (SDNode *I = SelectLIBM(Node))
-      return I;
-    break;
-  }
-
-  // Select the default instruction
-  ResNode = SelectCode(Node);
-
-  DEBUG(errs() << "=> ");
-  if (ResNode == NULL || ResNode == Node)
-    DEBUG(Node->dump(CurDAG));
-  else
-    DEBUG(ResNode->dump(CurDAG));
-  DEBUG(errs() << "\n");
-
-  return ResNode;
-}
-
-/// createARM64ISelDag - This pass converts a legalized DAG into a
-/// ARM64-specific DAG, ready for instruction scheduling.
-FunctionPass *llvm::createARM64ISelDag(ARM64TargetMachine &TM,
-                                       CodeGenOpt::Level OptLevel) {
-  return new ARM64DAGToDAGISel(TM, OptLevel);
-}
diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp
deleted file mode 100644
index 641f591..0000000
--- a/lib/Target/ARM64/ARM64ISelLowering.cpp
+++ /dev/null
@@ -1,7551 +0,0 @@
-//===-- ARM64ISelLowering.cpp - ARM64 DAG Lowering Implementation  --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ARM64TargetLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-lower"
-
-#include "ARM64ISelLowering.h"
-#include "ARM64PerfectShuffle.h"
-#include "ARM64Subtarget.h"
-#include "ARM64CallingConv.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64TargetMachine.h"
-#include "ARM64TargetObjectFile.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
-using namespace llvm;
-
-STATISTIC(NumTailCalls, "Number of tail calls");
-STATISTIC(NumShiftInserts, "Number of vector shift inserts");
-
-// This option should go away when tail calls fully work.
-static cl::opt<bool>
-EnableARM64TailCalls("arm64-tail-calls", cl::Hidden,
-                     cl::desc("Generate ARM64 tail calls (TEMPORARY OPTION)."),
-                     cl::init(true));
-
-static cl::opt<bool>
-StrictAlign("arm64-strict-align", cl::Hidden,
-            cl::desc("Disallow all unaligned memory accesses"));
-
-// Place holder until extr generation is tested fully.
-static cl::opt<bool>
-EnableARM64ExtrGeneration("arm64-extr-generation", cl::Hidden,
-                          cl::desc("Allow ARM64 (or (shift)(shift))->extract"),
-                          cl::init(true));
-
-static cl::opt<bool>
-EnableARM64SlrGeneration("arm64-shift-insert-generation", cl::Hidden,
-                         cl::desc("Allow ARM64 SLI/SRI formation"),
-                         cl::init(false));
-
-//===----------------------------------------------------------------------===//
-// ARM64 Lowering public interface.
-//===----------------------------------------------------------------------===//
-static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
-  if (TM.getSubtarget<ARM64Subtarget>().isTargetDarwin())
-    return new ARM64_MachoTargetObjectFile();
-
-  return new ARM64_ELFTargetObjectFile();
-}
-
-ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
-    : TargetLowering(TM, createTLOF(TM)) {
-  Subtarget = &TM.getSubtarget<ARM64Subtarget>();
-
-  // ARM64 doesn't have comparisons which set GPRs or setcc instructions, so
-  // we have to make something up. Arbitrarily, choose ZeroOrOne.
-  setBooleanContents(ZeroOrOneBooleanContent);
-  // When comparing vectors the result sets the different elements in the
-  // vector to all-one or all-zero.
-  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
-
-  // Set up the register classes.
-  addRegisterClass(MVT::i32, &ARM64::GPR32allRegClass);
-  addRegisterClass(MVT::i64, &ARM64::GPR64allRegClass);
-  addRegisterClass(MVT::f32, &ARM64::FPR32RegClass);
-  addRegisterClass(MVT::f64, &ARM64::FPR64RegClass);
-  addRegisterClass(MVT::f128, &ARM64::FPR128RegClass);
-  addRegisterClass(MVT::v16i8, &ARM64::FPR8RegClass);
-  addRegisterClass(MVT::v8i16, &ARM64::FPR16RegClass);
-
-  // Someone set us up the NEON.
-  addDRTypeForNEON(MVT::v2f32);
-  addDRTypeForNEON(MVT::v8i8);
-  addDRTypeForNEON(MVT::v4i16);
-  addDRTypeForNEON(MVT::v2i32);
-  addDRTypeForNEON(MVT::v1i64);
-  addDRTypeForNEON(MVT::v1f64);
-
-  addQRTypeForNEON(MVT::v4f32);
-  addQRTypeForNEON(MVT::v2f64);
-  addQRTypeForNEON(MVT::v16i8);
-  addQRTypeForNEON(MVT::v8i16);
-  addQRTypeForNEON(MVT::v4i32);
-  addQRTypeForNEON(MVT::v2i64);
-
-  // Compute derived properties from the register classes
-  computeRegisterProperties();
-
-  // Provide all sorts of operation actions
-  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
-  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
-  setOperationAction(ISD::SETCC, MVT::i32, Custom);
-  setOperationAction(ISD::SETCC, MVT::i64, Custom);
-  setOperationAction(ISD::SETCC, MVT::f32, Custom);
-  setOperationAction(ISD::SETCC, MVT::f64, Custom);
-  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
-  setOperationAction(ISD::BR_CC, MVT::i64, Custom);
-  setOperationAction(ISD::BR_CC, MVT::f32, Custom);
-  setOperationAction(ISD::BR_CC, MVT::f64, Custom);
-  setOperationAction(ISD::SELECT, MVT::i32, Custom);
-  setOperationAction(ISD::SELECT, MVT::i64, Custom);
-  setOperationAction(ISD::SELECT, MVT::f32, Custom);
-  setOperationAction(ISD::SELECT, MVT::f64, Custom);
-  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-  setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
-  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
-  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
-  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
-  setOperationAction(ISD::JumpTable, MVT::i64, Custom);
-
-  setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
-  setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
-  setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
-
-  setOperationAction(ISD::FREM, MVT::f32, Expand);
-  setOperationAction(ISD::FREM, MVT::f64, Expand);
-  setOperationAction(ISD::FREM, MVT::f80, Expand);
-
-  // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
-  // silliness like this:
-  setOperationAction(ISD::FABS, MVT::v1f64, Expand);
-  setOperationAction(ISD::FADD, MVT::v1f64, Expand);
-  setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
-  setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
-  setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
-  setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
-  setOperationAction(ISD::FMA, MVT::v1f64, Expand);
-  setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
-  setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
-  setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
-  setOperationAction(ISD::FREM, MVT::v1f64, Expand);
-  setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
-  setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
-  setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
-  setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
-  setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
-  setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
-  setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
-  setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
-  setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
-  setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
-  setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
-
-  setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
-  setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
-  setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
-  setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
-  setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
-
-  // Custom lowering hooks are needed for XOR
-  // to fold it into CSINC/CSINV.
-  setOperationAction(ISD::XOR, MVT::i32, Custom);
-  setOperationAction(ISD::XOR, MVT::i64, Custom);
-
-  // Virtually no operation on f128 is legal, but LLVM can't expand them when
-  // there's a valid register class, so we need custom operations in most cases.
-  setOperationAction(ISD::FABS, MVT::f128, Expand);
-  setOperationAction(ISD::FADD, MVT::f128, Custom);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
-  setOperationAction(ISD::FCOS, MVT::f128, Expand);
-  setOperationAction(ISD::FDIV, MVT::f128, Custom);
-  setOperationAction(ISD::FMA, MVT::f128, Expand);
-  setOperationAction(ISD::FMUL, MVT::f128, Custom);
-  setOperationAction(ISD::FNEG, MVT::f128, Expand);
-  setOperationAction(ISD::FPOW, MVT::f128, Expand);
-  setOperationAction(ISD::FREM, MVT::f128, Expand);
-  setOperationAction(ISD::FRINT, MVT::f128, Expand);
-  setOperationAction(ISD::FSIN, MVT::f128, Expand);
-  setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
-  setOperationAction(ISD::FSQRT, MVT::f128, Expand);
-  setOperationAction(ISD::FSUB, MVT::f128, Custom);
-  setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
-  setOperationAction(ISD::SETCC, MVT::f128, Custom);
-  setOperationAction(ISD::BR_CC, MVT::f128, Custom);
-  setOperationAction(ISD::SELECT, MVT::f128, Custom);
-  setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
-  setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
-
-  // Lowering for many of the conversions is actually specified by the non-f128
-  // type. The LowerXXX function will be trivial when f128 isn't involved.
-  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
-  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
-  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
-  setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
-  setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
-
-  // 128-bit atomics
-  setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i128, Custom);
-  // These are surprisingly difficult. The only single-copy atomic 128-bit
-  // instruction on AArch64 is stxp (when it succeeds). So a store can safely
-  // become a simple swap, but a load can only be determined to have been atomic
-  // if storing the same value back succeeds.
-  setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
-  setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Expand);
-
-  // Variable arguments.
-  setOperationAction(ISD::VASTART, MVT::Other, Custom);
-  setOperationAction(ISD::VAARG, MVT::Other, Custom);
-  setOperationAction(ISD::VACOPY, MVT::Other, Custom);
-  setOperationAction(ISD::VAEND, MVT::Other, Expand);
-
-  // Variable-sized objects.
-  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
-  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
-
-  // Exception handling.
-  // FIXME: These are guesses. Has this been defined yet?
-  setExceptionPointerRegister(ARM64::X0);
-  setExceptionSelectorRegister(ARM64::X1);
-
-  // Constant pool entries
-  setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
-
-  // BlockAddress
-  setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
-
-  // Add/Sub overflow ops with MVT::Glues are lowered to CPSR dependences.
-  setOperationAction(ISD::ADDC, MVT::i32, Custom);
-  setOperationAction(ISD::ADDE, MVT::i32, Custom);
-  setOperationAction(ISD::SUBC, MVT::i32, Custom);
-  setOperationAction(ISD::SUBE, MVT::i32, Custom);
-  setOperationAction(ISD::ADDC, MVT::i64, Custom);
-  setOperationAction(ISD::ADDE, MVT::i64, Custom);
-  setOperationAction(ISD::SUBC, MVT::i64, Custom);
-  setOperationAction(ISD::SUBE, MVT::i64, Custom);
-
-  // ARM64 lacks both left-rotate and popcount instructions.
-  setOperationAction(ISD::ROTL, MVT::i32, Expand);
-  setOperationAction(ISD::ROTL, MVT::i64, Expand);
-
-  // ARM64 doesn't have a direct vector ->f32 conversion instructions for
-  // elements smaller than i32, so promote the input to i32 first.
-  setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote);
-  setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
-  setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
-  setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
-  // Similarly, there is no direct i32 -> f64 vector conversion instruction.
-  setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
-  setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
-  setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
-  setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
-
-  // ARM64 doesn't have {U|S}MUL_LOHI.
-  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-
-  // ARM64 doesn't have MUL.2d:
-  setOperationAction(ISD::MUL, MVT::v2i64, Expand);
-
-  // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero
-  // counterparts, which ARM64 supports directly.
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
-
-  setOperationAction(ISD::CTPOP, MVT::i32, Custom);
-  setOperationAction(ISD::CTPOP, MVT::i64, Custom);
-
-  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
-  setOperationAction(ISD::SREM, MVT::i32, Expand);
-  setOperationAction(ISD::SREM, MVT::i64, Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
-  setOperationAction(ISD::UREM, MVT::i32, Expand);
-  setOperationAction(ISD::UREM, MVT::i64, Expand);
-
-  // Custom lower Add/Sub/Mul with overflow.
-  setOperationAction(ISD::SADDO, MVT::i32, Custom);
-  setOperationAction(ISD::SADDO, MVT::i64, Custom);
-  setOperationAction(ISD::UADDO, MVT::i32, Custom);
-  setOperationAction(ISD::UADDO, MVT::i64, Custom);
-  setOperationAction(ISD::SSUBO, MVT::i32, Custom);
-  setOperationAction(ISD::SSUBO, MVT::i64, Custom);
-  setOperationAction(ISD::USUBO, MVT::i32, Custom);
-  setOperationAction(ISD::USUBO, MVT::i64, Custom);
-  setOperationAction(ISD::SMULO, MVT::i32, Custom);
-  setOperationAction(ISD::SMULO, MVT::i64, Custom);
-  setOperationAction(ISD::UMULO, MVT::i32, Custom);
-  setOperationAction(ISD::UMULO, MVT::i64, Custom);
-
-  setOperationAction(ISD::FSIN, MVT::f32, Expand);
-  setOperationAction(ISD::FSIN, MVT::f64, Expand);
-  setOperationAction(ISD::FCOS, MVT::f32, Expand);
-  setOperationAction(ISD::FCOS, MVT::f64, Expand);
-  setOperationAction(ISD::FPOW, MVT::f32, Expand);
-  setOperationAction(ISD::FPOW, MVT::f64, Expand);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
-
-  // ARM64 has implementations of a lot of rounding-like FP operations.
-  static MVT RoundingTypes[] = { MVT::f32,   MVT::f64,  MVT::v2f32,
-                                 MVT::v4f32, MVT::v2f64 };
-  for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
-    MVT Ty = RoundingTypes[I];
-    setOperationAction(ISD::FFLOOR, Ty, Legal);
-    setOperationAction(ISD::FNEARBYINT, Ty, Legal);
-    setOperationAction(ISD::FCEIL, Ty, Legal);
-    setOperationAction(ISD::FRINT, Ty, Legal);
-    setOperationAction(ISD::FTRUNC, Ty, Legal);
-    setOperationAction(ISD::FROUND, Ty, Legal);
-  }
-
-  setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
-
-  if (Subtarget->isTargetMachO()) {
-    // For iOS, we don't want to the normal expansion of a libcall to
-    // sincos. We want to issue a libcall to __sincos_stret to avoid memory
-    // traffic.
-    setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
-    setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
-  } else {
-    setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
-    setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-  }
-
-  // ARM64 does not have floating-point extending loads, i1 sign-extending load,
-  // floating-point truncating stores, or v2i32->v2i16 truncating store.
-  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-  setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
-  setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
-  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-  setTruncStoreAction(MVT::f128, MVT::f80, Expand);
-  setTruncStoreAction(MVT::f128, MVT::f64, Expand);
-  setTruncStoreAction(MVT::f128, MVT::f32, Expand);
-  setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
-  // Indexed loads and stores are supported.
-  for (unsigned im = (unsigned)ISD::PRE_INC;
-       im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
-    setIndexedLoadAction(im, MVT::i8, Legal);
-    setIndexedLoadAction(im, MVT::i16, Legal);
-    setIndexedLoadAction(im, MVT::i32, Legal);
-    setIndexedLoadAction(im, MVT::i64, Legal);
-    setIndexedLoadAction(im, MVT::f64, Legal);
-    setIndexedLoadAction(im, MVT::f32, Legal);
-    setIndexedStoreAction(im, MVT::i8, Legal);
-    setIndexedStoreAction(im, MVT::i16, Legal);
-    setIndexedStoreAction(im, MVT::i32, Legal);
-    setIndexedStoreAction(im, MVT::i64, Legal);
-    setIndexedStoreAction(im, MVT::f64, Legal);
-    setIndexedStoreAction(im, MVT::f32, Legal);
-  }
-
-  // Likewise, narrowing and extending vector loads/stores aren't handled
-  // directly.
-  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
-
-    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
-                       Expand);
-
-    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
-      setTruncStoreAction((MVT::SimpleValueType)VT,
-                          (MVT::SimpleValueType)InnerVT, Expand);
-    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
-    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
-    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
-  }
-
-  // Trap.
-  setOperationAction(ISD::TRAP, MVT::Other, Legal);
-  setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
-
-  // We combine OR nodes for bitfield operations.
-  setTargetDAGCombine(ISD::OR);
-
-  // Vector add and sub nodes may conceal a high-half opportunity.
-  // Also, try to fold ADD into CSINC/CSINV..
-  setTargetDAGCombine(ISD::ADD);
-  setTargetDAGCombine(ISD::SUB);
-
-  setTargetDAGCombine(ISD::XOR);
-  setTargetDAGCombine(ISD::SINT_TO_FP);
-  setTargetDAGCombine(ISD::UINT_TO_FP);
-
-  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
-
-  setTargetDAGCombine(ISD::ANY_EXTEND);
-  setTargetDAGCombine(ISD::ZERO_EXTEND);
-  setTargetDAGCombine(ISD::SIGN_EXTEND);
-  setTargetDAGCombine(ISD::BITCAST);
-  setTargetDAGCombine(ISD::CONCAT_VECTORS);
-  setTargetDAGCombine(ISD::STORE);
-
-  setTargetDAGCombine(ISD::MUL);
-
-  MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
-  MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
-  MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
-
-  setStackPointerRegisterToSaveRestore(ARM64::SP);
-
-  setSchedulingPreference(Sched::Hybrid);
-
-  // Enable TBZ/TBNZ
-  MaskAndBranchFoldingIsLegal = true;
-
-  setMinFunctionAlignment(2);
-
-  RequireStrictAlign = StrictAlign;
-}
-
-void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
-  if (VT == MVT::v2f32) {
-    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
-    AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32);
-
-    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
-    AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32);
-  } else if (VT == MVT::v2f64 || VT == MVT::v4f32) {
-    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
-    AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64);
-
-    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
-    AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i64);
-  }
-
-  // Mark vector float intrinsics as expand.
-  if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
-    setOperationAction(ISD::FSIN, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::FCOS, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::FPOWI, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::FPOW, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::FLOG, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::FLOG2, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand);
-  }
-
-  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::SCALAR_TO_VECTOR, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::AND, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::OR, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
-
-  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
-  setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
-
-  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
-
-  setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
-}
-
-void ARM64TargetLowering::addDRTypeForNEON(MVT VT) {
-  addRegisterClass(VT, &ARM64::FPR64RegClass);
-  addTypeForNEON(VT, MVT::v2i32);
-}
-
-void ARM64TargetLowering::addQRTypeForNEON(MVT VT) {
-  addRegisterClass(VT, &ARM64::FPR128RegClass);
-  addTypeForNEON(VT, MVT::v4i32);
-}
-
-EVT ARM64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
-  if (!VT.isVector())
-    return MVT::i32;
-  return VT.changeVectorElementTypeToInteger();
-}
-
-/// computeMaskedBitsForTargetNode - Determine which of the bits specified in
-/// Mask are known to be either zero or one and return them in the
-/// KnownZero/KnownOne bitsets.
-void ARM64TargetLowering::computeMaskedBitsForTargetNode(
-    const SDValue Op, APInt &KnownZero, APInt &KnownOne,
-    const SelectionDAG &DAG, unsigned Depth) const {
-  switch (Op.getOpcode()) {
-  default:
-    break;
-  case ARM64ISD::CSEL: {
-    APInt KnownZero2, KnownOne2;
-    DAG.ComputeMaskedBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1);
-    DAG.ComputeMaskedBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1);
-    KnownZero &= KnownZero2;
-    KnownOne &= KnownOne2;
-    break;
-  }
-  case ISD::INTRINSIC_W_CHAIN:
-    break;
-  case ISD::INTRINSIC_WO_CHAIN:
-  case ISD::INTRINSIC_VOID: {
-    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-    switch (IntNo) {
-    default:
-      break;
-    case Intrinsic::arm64_neon_umaxv:
-    case Intrinsic::arm64_neon_uminv: {
-      // Figure out the datatype of the vector operand. The UMINV instruction
-      // will zero extend the result, so we can mark as known zero all the
-      // bits larger than the element datatype. 32-bit or larget doesn't need
-      // this as those are legal types and will be handled by isel directly.
-      MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
-      unsigned BitWidth = KnownZero.getBitWidth();
-      if (VT == MVT::v8i8 || VT == MVT::v16i8) {
-        assert(BitWidth >= 8 && "Unexpected width!");
-        APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
-        KnownZero |= Mask;
-      } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
-        assert(BitWidth >= 16 && "Unexpected width!");
-        APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
-        KnownZero |= Mask;
-      }
-      break;
-    } break;
-    }
-  }
-  }
-}
-
-MVT ARM64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
-  return MVT::i64;
-}
-
-unsigned ARM64TargetLowering::getMaximalGlobalOffset() const {
-  // FIXME: On ARM64, this depends on the type.
-  // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes().
-  // and the offset has to be a multiple of the related size in bytes.
-  return 4095;
-}
-
-FastISel *
-ARM64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
-                                    const TargetLibraryInfo *libInfo) const {
-  return ARM64::createFastISel(funcInfo, libInfo);
-}
-
-const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default:
-    return 0;
-  case ARM64ISD::CALL:              return "ARM64ISD::CALL";
-  case ARM64ISD::ADRP:              return "ARM64ISD::ADRP";
-  case ARM64ISD::ADDlow:            return "ARM64ISD::ADDlow";
-  case ARM64ISD::LOADgot:           return "ARM64ISD::LOADgot";
-  case ARM64ISD::RET_FLAG:          return "ARM64ISD::RET_FLAG";
-  case ARM64ISD::BRCOND:            return "ARM64ISD::BRCOND";
-  case ARM64ISD::CSEL:              return "ARM64ISD::CSEL";
-  case ARM64ISD::FCSEL:             return "ARM64ISD::FCSEL";
-  case ARM64ISD::CSINV:             return "ARM64ISD::CSINV";
-  case ARM64ISD::CSNEG:             return "ARM64ISD::CSNEG";
-  case ARM64ISD::CSINC:             return "ARM64ISD::CSINC";
-  case ARM64ISD::THREAD_POINTER:    return "ARM64ISD::THREAD_POINTER";
-  case ARM64ISD::TLSDESC_CALL:      return "ARM64ISD::TLSDESC_CALL";
-  case ARM64ISD::ADC:               return "ARM64ISD::ADC";
-  case ARM64ISD::SBC:               return "ARM64ISD::SBC";
-  case ARM64ISD::ADDS:              return "ARM64ISD::ADDS";
-  case ARM64ISD::SUBS:              return "ARM64ISD::SUBS";
-  case ARM64ISD::ADCS:              return "ARM64ISD::ADCS";
-  case ARM64ISD::SBCS:              return "ARM64ISD::SBCS";
-  case ARM64ISD::ANDS:              return "ARM64ISD::ANDS";
-  case ARM64ISD::FCMP:              return "ARM64ISD::FCMP";
-  case ARM64ISD::FMIN:              return "ARM64ISD::FMIN";
-  case ARM64ISD::FMAX:              return "ARM64ISD::FMAX";
-  case ARM64ISD::DUP:               return "ARM64ISD::DUP";
-  case ARM64ISD::DUPLANE8:          return "ARM64ISD::DUPLANE8";
-  case ARM64ISD::DUPLANE16:         return "ARM64ISD::DUPLANE16";
-  case ARM64ISD::DUPLANE32:         return "ARM64ISD::DUPLANE32";
-  case ARM64ISD::DUPLANE64:         return "ARM64ISD::DUPLANE64";
-  case ARM64ISD::MOVI:              return "ARM64ISD::MOVI";
-  case ARM64ISD::MOVIshift:         return "ARM64ISD::MOVIshift";
-  case ARM64ISD::MOVIedit:          return "ARM64ISD::MOVIedit";
-  case ARM64ISD::MOVImsl:           return "ARM64ISD::MOVImsl";
-  case ARM64ISD::FMOV:              return "ARM64ISD::FMOV";
-  case ARM64ISD::MVNIshift:         return "ARM64ISD::MVNIshift";
-  case ARM64ISD::MVNImsl:           return "ARM64ISD::MVNImsl";
-  case ARM64ISD::BICi:              return "ARM64ISD::BICi";
-  case ARM64ISD::ORRi:              return "ARM64ISD::ORRi";
-  case ARM64ISD::NEG:               return "ARM64ISD::NEG";
-  case ARM64ISD::EXTR:              return "ARM64ISD::EXTR";
-  case ARM64ISD::ZIP1:              return "ARM64ISD::ZIP1";
-  case ARM64ISD::ZIP2:              return "ARM64ISD::ZIP2";
-  case ARM64ISD::UZP1:              return "ARM64ISD::UZP1";
-  case ARM64ISD::UZP2:              return "ARM64ISD::UZP2";
-  case ARM64ISD::TRN1:              return "ARM64ISD::TRN1";
-  case ARM64ISD::TRN2:              return "ARM64ISD::TRN2";
-  case ARM64ISD::REV16:             return "ARM64ISD::REV16";
-  case ARM64ISD::REV32:             return "ARM64ISD::REV32";
-  case ARM64ISD::REV64:             return "ARM64ISD::REV64";
-  case ARM64ISD::EXT:               return "ARM64ISD::EXT";
-  case ARM64ISD::VSHL:              return "ARM64ISD::VSHL";
-  case ARM64ISD::VLSHR:             return "ARM64ISD::VLSHR";
-  case ARM64ISD::VASHR:             return "ARM64ISD::VASHR";
-  case ARM64ISD::CMEQ:              return "ARM64ISD::CMEQ";
-  case ARM64ISD::CMGE:              return "ARM64ISD::CMGE";
-  case ARM64ISD::CMGT:              return "ARM64ISD::CMGT";
-  case ARM64ISD::CMHI:              return "ARM64ISD::CMHI";
-  case ARM64ISD::CMHS:              return "ARM64ISD::CMHS";
-  case ARM64ISD::FCMEQ:             return "ARM64ISD::FCMEQ";
-  case ARM64ISD::FCMGE:             return "ARM64ISD::FCMGE";
-  case ARM64ISD::FCMGT:             return "ARM64ISD::FCMGT";
-  case ARM64ISD::CMEQz:             return "ARM64ISD::CMEQz";
-  case ARM64ISD::CMGEz:             return "ARM64ISD::CMGEz";
-  case ARM64ISD::CMGTz:             return "ARM64ISD::CMGTz";
-  case ARM64ISD::CMLEz:             return "ARM64ISD::CMLEz";
-  case ARM64ISD::CMLTz:             return "ARM64ISD::CMLTz";
-  case ARM64ISD::FCMEQz:            return "ARM64ISD::FCMEQz";
-  case ARM64ISD::FCMGEz:            return "ARM64ISD::FCMGEz";
-  case ARM64ISD::FCMGTz:            return "ARM64ISD::FCMGTz";
-  case ARM64ISD::FCMLEz:            return "ARM64ISD::FCMLEz";
-  case ARM64ISD::FCMLTz:            return "ARM64ISD::FCMLTz";
-  case ARM64ISD::NOT:               return "ARM64ISD::NOT";
-  case ARM64ISD::BIT:               return "ARM64ISD::BIT";
-  case ARM64ISD::CBZ:               return "ARM64ISD::CBZ";
-  case ARM64ISD::CBNZ:              return "ARM64ISD::CBNZ";
-  case ARM64ISD::TBZ:               return "ARM64ISD::TBZ";
-  case ARM64ISD::TBNZ:              return "ARM64ISD::TBNZ";
-  case ARM64ISD::TC_RETURN:         return "ARM64ISD::TC_RETURN";
-  case ARM64ISD::SITOF:             return "ARM64ISD::SITOF";
-  case ARM64ISD::UITOF:             return "ARM64ISD::UITOF";
-  case ARM64ISD::SQSHL_I:           return "ARM64ISD::SQSHL_I";
-  case ARM64ISD::UQSHL_I:           return "ARM64ISD::UQSHL_I";
-  case ARM64ISD::SRSHR_I:           return "ARM64ISD::SRSHR_I";
-  case ARM64ISD::URSHR_I:           return "ARM64ISD::URSHR_I";
-  case ARM64ISD::SQSHLU_I:          return "ARM64ISD::SQSHLU_I";
-  case ARM64ISD::WrapperLarge:      return "ARM64ISD::WrapperLarge";
-  }
-}
-
-static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
-                                  unsigned &LdrOpc, unsigned &StrOpc) {
-  static unsigned LoadBares[] = { ARM64::LDXRB, ARM64::LDXRH, ARM64::LDXRW,
-                                  ARM64::LDXRX, ARM64::LDXPX };
-  static unsigned LoadAcqs[] = { ARM64::LDAXRB, ARM64::LDAXRH, ARM64::LDAXRW,
-                                 ARM64::LDAXRX, ARM64::LDAXPX };
-  static unsigned StoreBares[] = { ARM64::STXRB, ARM64::STXRH, ARM64::STXRW,
-                                   ARM64::STXRX, ARM64::STXPX };
-  static unsigned StoreRels[] = { ARM64::STLXRB, ARM64::STLXRH, ARM64::STLXRW,
-                                  ARM64::STLXRX, ARM64::STLXPX };
-
-  unsigned *LoadOps, *StoreOps;
-  if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
-    LoadOps = LoadAcqs;
-  else
-    LoadOps = LoadBares;
-
-  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
-    StoreOps = StoreRels;
-  else
-    StoreOps = StoreBares;
-
-  assert(isPowerOf2_32(Size) && Size <= 16 &&
-         "unsupported size for atomic binary op!");
-
-  LdrOpc = LoadOps[Log2_32(Size)];
-  StrOpc = StoreOps[Log2_32(Size)];
-}
-
-MachineBasicBlock *ARM64TargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
-                                                          MachineBasicBlock *BB,
-                                                          unsigned Size) const {
-  unsigned dest = MI->getOperand(0).getReg();
-  unsigned ptr = MI->getOperand(1).getReg();
-  unsigned oldval = MI->getOperand(2).getReg();
-  unsigned newval = MI->getOperand(3).getReg();
-  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
-  unsigned scratch = BB->getParent()->getRegInfo().createVirtualRegister(
-      &ARM64::GPR32RegClass);
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  DebugLoc dl = MI->getDebugLoc();
-
-  // FIXME: We currently always generate a seq_cst operation; we should
-  // be able to relax this in some cases.
-  unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
-
-  MachineFunction *MF = BB->getParent();
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It; // insert the new blocks after the current block
-
-  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MF->insert(It, loop1MBB);
-  MF->insert(It, loop2MBB);
-  MF->insert(It, exitMBB);
-
-  // Transfer the remainder of BB and its successor edges to exitMBB.
-  exitMBB->splice(exitMBB->begin(), BB,
-                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
-  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-  //  thisMBB:
-  //   ...
-  //   fallthrough --> loop1MBB
-  BB->addSuccessor(loop1MBB);
-
-  // loop1MBB:
-  //   ldrex dest, [ptr]
-  //   cmp dest, oldval
-  //   bne exitMBB
-  BB = loop1MBB;
-  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
-  BuildMI(BB, dl, TII->get(Size == 8 ? ARM64::SUBSXrr : ARM64::SUBSWrr))
-      .addReg(Size == 8 ? ARM64::XZR : ARM64::WZR, RegState::Define)
-      .addReg(dest)
-      .addReg(oldval);
-  BuildMI(BB, dl, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(exitMBB);
-  BB->addSuccessor(loop2MBB);
-  BB->addSuccessor(exitMBB);
-
-  // loop2MBB:
-  //   strex scratch, newval, [ptr]
-  //   cmp scratch, #0
-  //   bne loop1MBB
-  BB = loop2MBB;
-  BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
-  BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loop1MBB);
-  BB->addSuccessor(loop1MBB);
-  BB->addSuccessor(exitMBB);
-
-  //  exitMBB:
-  //   ...
-  BB = exitMBB;
-
-  MI->eraseFromParent(); // The instruction is gone now.
-
-  return BB;
-}
-
-MachineBasicBlock *
-ARM64TargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
-                                      unsigned Size, unsigned BinOpcode) const {
-  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction *MF = BB->getParent();
-  MachineFunction::iterator It = BB;
-  ++It;
-
-  unsigned dest = MI->getOperand(0).getReg();
-  unsigned ptr = MI->getOperand(1).getReg();
-  unsigned incr = MI->getOperand(2).getReg();
-  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
-  DebugLoc dl = MI->getDebugLoc();
-
-  unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
-
-  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MF->insert(It, loopMBB);
-  MF->insert(It, exitMBB);
-
-  // Transfer the remainder of BB and its successor edges to exitMBB.
-  exitMBB->splice(exitMBB->begin(), BB,
-                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
-  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-  MachineRegisterInfo &RegInfo = MF->getRegInfo();
-  unsigned scratch = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
-  unsigned scratch2 =
-      (!BinOpcode)
-          ? incr
-          : RegInfo.createVirtualRegister(Size == 8 ? &ARM64::GPR64RegClass
-                                                    : &ARM64::GPR32RegClass);
-
-  //  thisMBB:
-  //   ...
-  //   fallthrough --> loopMBB
-  BB->addSuccessor(loopMBB);
-
-  //  loopMBB:
-  //   ldxr dest, ptr
-  //   <binop> scratch2, dest, incr
-  //   stxr scratch, scratch2, ptr
-  //   cbnz scratch, loopMBB
-  //   fallthrough --> exitMBB
-  BB = loopMBB;
-  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
-  if (BinOpcode) {
-    // operand order needs to go the other way for NAND
-    if (BinOpcode == ARM64::BICWrr || BinOpcode == ARM64::BICXrr)
-      BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(incr).addReg(dest);
-    else
-      BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(dest).addReg(incr);
-  }
-
-  BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
-  BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loopMBB);
-
-  BB->addSuccessor(loopMBB);
-  BB->addSuccessor(exitMBB);
-
-  //  exitMBB:
-  //   ...
-  BB = exitMBB;
-
-  MI->eraseFromParent(); // The instruction is gone now.
-
-  return BB;
-}
-
-MachineBasicBlock *ARM64TargetLowering::EmitAtomicBinary128(
-    MachineInstr *MI, MachineBasicBlock *BB, unsigned BinOpcodeLo,
-    unsigned BinOpcodeHi) const {
-  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction *MF = BB->getParent();
-  MachineFunction::iterator It = BB;
-  ++It;
-
-  unsigned DestLo = MI->getOperand(0).getReg();
-  unsigned DestHi = MI->getOperand(1).getReg();
-  unsigned Ptr = MI->getOperand(2).getReg();
-  unsigned IncrLo = MI->getOperand(3).getReg();
-  unsigned IncrHi = MI->getOperand(4).getReg();
-  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(5).getImm());
-  DebugLoc DL = MI->getDebugLoc();
-
-  unsigned LdrOpc, StrOpc;
-  getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
-
-  MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MF->insert(It, LoopMBB);
-  MF->insert(It, ExitMBB);
-
-  // Transfer the remainder of BB and its successor edges to exitMBB.
-  ExitMBB->splice(ExitMBB->begin(), BB,
-                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
-  ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-  MachineRegisterInfo &RegInfo = MF->getRegInfo();
-  unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
-  unsigned ScratchLo = IncrLo, ScratchHi = IncrHi;
-  if (BinOpcodeLo) {
-    assert(BinOpcodeHi && "Expect neither or both opcodes to be defined");
-    ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
-    ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
-  }
-
-  //  ThisMBB:
-  //   ...
-  //   fallthrough --> LoopMBB
-  BB->addSuccessor(LoopMBB);
-
-  //  LoopMBB:
-  //   ldxp DestLo, DestHi, Ptr
-  //   <binoplo> ScratchLo, DestLo, IncrLo
-  //   <binophi> ScratchHi, DestHi, IncrHi
-  //   stxp ScratchRes, ScratchLo, ScratchHi, ptr
-  //   cbnz ScratchRes, LoopMBB
-  //   fallthrough --> ExitMBB
-  BB = LoopMBB;
-  BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
-      .addReg(DestHi, RegState::Define)
-      .addReg(Ptr);
-  if (BinOpcodeLo) {
-    // operand order needs to go the other way for NAND
-    if (BinOpcodeLo == ARM64::BICXrr) {
-      std::swap(IncrLo, DestLo);
-      std::swap(IncrHi, DestHi);
-    }
-
-    BuildMI(BB, DL, TII->get(BinOpcodeLo), ScratchLo).addReg(DestLo).addReg(
-        IncrLo);
-    BuildMI(BB, DL, TII->get(BinOpcodeHi), ScratchHi).addReg(DestHi).addReg(
-        IncrHi);
-  }
-
-  BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
-      .addReg(ScratchLo)
-      .addReg(ScratchHi)
-      .addReg(Ptr);
-  BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB);
-
-  BB->addSuccessor(LoopMBB);
-  BB->addSuccessor(ExitMBB);
-
-  //  ExitMBB:
-  //   ...
-  BB = ExitMBB;
-
-  MI->eraseFromParent(); // The instruction is gone now.
-
-  return BB;
-}
-
-MachineBasicBlock *
-ARM64TargetLowering::EmitAtomicCmpSwap128(MachineInstr *MI,
-                                          MachineBasicBlock *BB) const {
-  unsigned DestLo = MI->getOperand(0).getReg();
-  unsigned DestHi = MI->getOperand(1).getReg();
-  unsigned Ptr = MI->getOperand(2).getReg();
-  unsigned OldValLo = MI->getOperand(3).getReg();
-  unsigned OldValHi = MI->getOperand(4).getReg();
-  unsigned NewValLo = MI->getOperand(5).getReg();
-  unsigned NewValHi = MI->getOperand(6).getReg();
-  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(7).getImm());
-  unsigned ScratchRes = BB->getParent()->getRegInfo().createVirtualRegister(
-      &ARM64::GPR32RegClass);
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  DebugLoc DL = MI->getDebugLoc();
-
-  unsigned LdrOpc, StrOpc;
-  getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
-
-  MachineFunction *MF = BB->getParent();
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It; // insert the new blocks after the current block
-
-  MachineBasicBlock *Loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *Loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MF->insert(It, Loop1MBB);
-  MF->insert(It, Loop2MBB);
-  MF->insert(It, ExitMBB);
-
-  // Transfer the remainder of BB and its successor edges to exitMBB.
-  ExitMBB->splice(ExitMBB->begin(), BB,
-                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
-  ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-  //  ThisMBB:
-  //   ...
-  //   fallthrough --> Loop1MBB
-  BB->addSuccessor(Loop1MBB);
-
-  // Loop1MBB:
-  //   ldxp DestLo, DestHi, [Ptr]
-  //   cmp DestLo, OldValLo
-  //   sbc xzr, DestHi, OldValHi
-  //   bne ExitMBB
-  BB = Loop1MBB;
-  BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
-      .addReg(DestHi, RegState::Define)
-      .addReg(Ptr);
-  BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg(
-      OldValLo);
-  BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg(
-      OldValHi);
-
-  BuildMI(BB, DL, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(ExitMBB);
-  BB->addSuccessor(Loop2MBB);
-  BB->addSuccessor(ExitMBB);
-
-  // Loop2MBB:
-  //   stxp ScratchRes, NewValLo, NewValHi, [Ptr]
-  //   cbnz ScratchRes, Loop1MBB
-  BB = Loop2MBB;
-  BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
-      .addReg(NewValLo)
-      .addReg(NewValHi)
-      .addReg(Ptr);
-  BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(Loop1MBB);
-  BB->addSuccessor(Loop1MBB);
-  BB->addSuccessor(ExitMBB);
-
-  //  ExitMBB:
-  //   ...
-  BB = ExitMBB;
-
-  MI->eraseFromParent(); // The instruction is gone now.
-
-  return BB;
-}
-
-MachineBasicBlock *ARM64TargetLowering::EmitAtomicMinMax128(
-    MachineInstr *MI, MachineBasicBlock *BB, unsigned CondCode) const {
-  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction *MF = BB->getParent();
-  MachineFunction::iterator It = BB;
-  ++It;
-
-  unsigned DestLo = MI->getOperand(0).getReg();
-  unsigned DestHi = MI->getOperand(1).getReg();
-  unsigned Ptr = MI->getOperand(2).getReg();
-  unsigned IncrLo = MI->getOperand(3).getReg();
-  unsigned IncrHi = MI->getOperand(4).getReg();
-  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(5).getImm());
-  DebugLoc DL = MI->getDebugLoc();
-
-  unsigned LdrOpc, StrOpc;
-  getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
-
-  MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MF->insert(It, LoopMBB);
-  MF->insert(It, ExitMBB);
-
-  // Transfer the remainder of BB and its successor edges to exitMBB.
-  ExitMBB->splice(ExitMBB->begin(), BB,
-                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
-  ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-  MachineRegisterInfo &RegInfo = MF->getRegInfo();
-  unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
-  unsigned ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
-  unsigned ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
-
-  //  ThisMBB:
-  //   ...
-  //   fallthrough --> LoopMBB
-  BB->addSuccessor(LoopMBB);
-
-  //  LoopMBB:
-  //   ldxp DestLo, DestHi, Ptr
-  //   cmp ScratchLo, DestLo, IncrLo
-  //   sbc xzr, ScratchHi, DestHi, IncrHi
-  //   csel ScratchLo, DestLo, IncrLo, <cmp-op>
-  //   csel ScratchHi, DestHi, IncrHi, <cmp-op>
-  //   stxp ScratchRes, ScratchLo, ScratchHi, ptr
-  //   cbnz ScratchRes, LoopMBB
-  //   fallthrough --> ExitMBB
-  BB = LoopMBB;
-  BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
-      .addReg(DestHi, RegState::Define)
-      .addReg(Ptr);
-
-  BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg(
-      IncrLo);
-  BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg(
-      IncrHi);
-
-  BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchLo)
-      .addReg(DestLo)
-      .addReg(IncrLo)
-      .addImm(CondCode);
-  BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchHi)
-      .addReg(DestHi)
-      .addReg(IncrHi)
-      .addImm(CondCode);
-
-  BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
-      .addReg(ScratchLo)
-      .addReg(ScratchHi)
-      .addReg(Ptr);
-  BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB);
-
-  BB->addSuccessor(LoopMBB);
-  BB->addSuccessor(ExitMBB);
-
-  //  ExitMBB:
-  //   ...
-  BB = ExitMBB;
-
-  MI->eraseFromParent(); // The instruction is gone now.
-
-  return BB;
-}
-
-MachineBasicBlock *
-ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI,
-                                  MachineBasicBlock *MBB) const {
-  // We materialise the F128CSEL pseudo-instruction as some control flow and a
-  // phi node:
-
-  // OrigBB:
-  //     [... previous instrs leading to comparison ...]
-  //     b.ne TrueBB
-  //     b EndBB
-  // TrueBB:
-  //     ; Fallthrough
-  // EndBB:
-  //     Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
-
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  MachineFunction *MF = MBB->getParent();
-  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
-  DebugLoc DL = MI->getDebugLoc();
-  MachineFunction::iterator It = MBB;
-  ++It;
-
-  unsigned DestReg = MI->getOperand(0).getReg();
-  unsigned IfTrueReg = MI->getOperand(1).getReg();
-  unsigned IfFalseReg = MI->getOperand(2).getReg();
-  unsigned CondCode = MI->getOperand(3).getImm();
-  bool CPSRKilled = MI->getOperand(4).isKill();
-
-  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MF->insert(It, TrueBB);
-  MF->insert(It, EndBB);
-
-  // Transfer rest of current basic-block to EndBB
-  EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
-                MBB->end());
-  EndBB->transferSuccessorsAndUpdatePHIs(MBB);
-
-  BuildMI(MBB, DL, TII->get(ARM64::Bcc)).addImm(CondCode).addMBB(TrueBB);
-  BuildMI(MBB, DL, TII->get(ARM64::B)).addMBB(EndBB);
-  MBB->addSuccessor(TrueBB);
-  MBB->addSuccessor(EndBB);
-
-  // TrueBB falls through to the end.
-  TrueBB->addSuccessor(EndBB);
-
-  if (!CPSRKilled) {
-    TrueBB->addLiveIn(ARM64::CPSR);
-    EndBB->addLiveIn(ARM64::CPSR);
-  }
-
-  BuildMI(*EndBB, EndBB->begin(), DL, TII->get(ARM64::PHI), DestReg)
-      .addReg(IfTrueReg)
-      .addMBB(TrueBB)
-      .addReg(IfFalseReg)
-      .addMBB(MBB);
-
-  MI->eraseFromParent();
-  return EndBB;
-}
-
-MachineBasicBlock *
-ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB) const {
-  switch (MI->getOpcode()) {
-  default:
-#ifndef NDEBUG
-    MI->dump();
-#endif
-    assert(0 && "Unexpected instruction for custom inserter!");
-    break;
-
-  case ARM64::ATOMIC_LOAD_ADD_I8:
-    return EmitAtomicBinary(MI, BB, 1, ARM64::ADDWrr);
-  case ARM64::ATOMIC_LOAD_ADD_I16:
-    return EmitAtomicBinary(MI, BB, 2, ARM64::ADDWrr);
-  case ARM64::ATOMIC_LOAD_ADD_I32:
-    return EmitAtomicBinary(MI, BB, 4, ARM64::ADDWrr);
-  case ARM64::ATOMIC_LOAD_ADD_I64:
-    return EmitAtomicBinary(MI, BB, 8, ARM64::ADDXrr);
-  case ARM64::ATOMIC_LOAD_ADD_I128:
-    return EmitAtomicBinary128(MI, BB, ARM64::ADDSXrr, ARM64::ADCXr);
-
-  case ARM64::ATOMIC_LOAD_AND_I8:
-    return EmitAtomicBinary(MI, BB, 1, ARM64::ANDWrr);
-  case ARM64::ATOMIC_LOAD_AND_I16:
-    return EmitAtomicBinary(MI, BB, 2, ARM64::ANDWrr);
-  case ARM64::ATOMIC_LOAD_AND_I32:
-    return EmitAtomicBinary(MI, BB, 4, ARM64::ANDWrr);
-  case ARM64::ATOMIC_LOAD_AND_I64:
-    return EmitAtomicBinary(MI, BB, 8, ARM64::ANDXrr);
-  case ARM64::ATOMIC_LOAD_AND_I128:
-    return EmitAtomicBinary128(MI, BB, ARM64::ANDXrr, ARM64::ANDXrr);
-
-  case ARM64::ATOMIC_LOAD_OR_I8:
-    return EmitAtomicBinary(MI, BB, 1, ARM64::ORRWrr);
-  case ARM64::ATOMIC_LOAD_OR_I16:
-    return EmitAtomicBinary(MI, BB, 2, ARM64::ORRWrr);
-  case ARM64::ATOMIC_LOAD_OR_I32:
-    return EmitAtomicBinary(MI, BB, 4, ARM64::ORRWrr);
-  case ARM64::ATOMIC_LOAD_OR_I64:
-    return EmitAtomicBinary(MI, BB, 8, ARM64::ORRXrr);
-  case ARM64::ATOMIC_LOAD_OR_I128:
-    return EmitAtomicBinary128(MI, BB, ARM64::ORRXrr, ARM64::ORRXrr);
-
-  case ARM64::ATOMIC_LOAD_XOR_I8:
-    return EmitAtomicBinary(MI, BB, 1, ARM64::EORWrr);
-  case ARM64::ATOMIC_LOAD_XOR_I16:
-    return EmitAtomicBinary(MI, BB, 2, ARM64::EORWrr);
-  case ARM64::ATOMIC_LOAD_XOR_I32:
-    return EmitAtomicBinary(MI, BB, 4, ARM64::EORWrr);
-  case ARM64::ATOMIC_LOAD_XOR_I64:
-    return EmitAtomicBinary(MI, BB, 8, ARM64::EORXrr);
-  case ARM64::ATOMIC_LOAD_XOR_I128:
-    return EmitAtomicBinary128(MI, BB, ARM64::EORXrr, ARM64::EORXrr);
-
-  case ARM64::ATOMIC_LOAD_NAND_I8:
-    return EmitAtomicBinary(MI, BB, 1, ARM64::BICWrr);
-  case ARM64::ATOMIC_LOAD_NAND_I16:
-    return EmitAtomicBinary(MI, BB, 2, ARM64::BICWrr);
-  case ARM64::ATOMIC_LOAD_NAND_I32:
-    return EmitAtomicBinary(MI, BB, 4, ARM64::BICWrr);
-  case ARM64::ATOMIC_LOAD_NAND_I64:
-    return EmitAtomicBinary(MI, BB, 8, ARM64::BICXrr);
-  case ARM64::ATOMIC_LOAD_NAND_I128:
-    return EmitAtomicBinary128(MI, BB, ARM64::BICXrr, ARM64::BICXrr);
-
-  case ARM64::ATOMIC_LOAD_SUB_I8:
-    return EmitAtomicBinary(MI, BB, 1, ARM64::SUBWrr);
-  case ARM64::ATOMIC_LOAD_SUB_I16:
-    return EmitAtomicBinary(MI, BB, 2, ARM64::SUBWrr);
-  case ARM64::ATOMIC_LOAD_SUB_I32:
-    return EmitAtomicBinary(MI, BB, 4, ARM64::SUBWrr);
-  case ARM64::ATOMIC_LOAD_SUB_I64:
-    return EmitAtomicBinary(MI, BB, 8, ARM64::SUBXrr);
-  case ARM64::ATOMIC_LOAD_SUB_I128:
-    return EmitAtomicBinary128(MI, BB, ARM64::SUBSXrr, ARM64::SBCXr);
-
-  case ARM64::ATOMIC_LOAD_MIN_I128:
-    return EmitAtomicMinMax128(MI, BB, ARM64CC::LT);
-
-  case ARM64::ATOMIC_LOAD_MAX_I128:
-    return EmitAtomicMinMax128(MI, BB, ARM64CC::GT);
-
-  case ARM64::ATOMIC_LOAD_UMIN_I128:
-    return EmitAtomicMinMax128(MI, BB, ARM64CC::CC);
-
-  case ARM64::ATOMIC_LOAD_UMAX_I128:
-    return EmitAtomicMinMax128(MI, BB, ARM64CC::HI);
-
-  case ARM64::ATOMIC_SWAP_I8:
-    return EmitAtomicBinary(MI, BB, 1, 0);
-  case ARM64::ATOMIC_SWAP_I16:
-    return EmitAtomicBinary(MI, BB, 2, 0);
-  case ARM64::ATOMIC_SWAP_I32:
-    return EmitAtomicBinary(MI, BB, 4, 0);
-  case ARM64::ATOMIC_SWAP_I64:
-    return EmitAtomicBinary(MI, BB, 8, 0);
-  case ARM64::ATOMIC_SWAP_I128:
-    return EmitAtomicBinary128(MI, BB, 0, 0);
-
-  case ARM64::ATOMIC_CMP_SWAP_I8:
-    return EmitAtomicCmpSwap(MI, BB, 1);
-  case ARM64::ATOMIC_CMP_SWAP_I16:
-    return EmitAtomicCmpSwap(MI, BB, 2);
-  case ARM64::ATOMIC_CMP_SWAP_I32:
-    return EmitAtomicCmpSwap(MI, BB, 4);
-  case ARM64::ATOMIC_CMP_SWAP_I64:
-    return EmitAtomicCmpSwap(MI, BB, 8);
-  case ARM64::ATOMIC_CMP_SWAP_I128:
-    return EmitAtomicCmpSwap128(MI, BB);
-
-  case ARM64::F128CSEL:
-    return EmitF128CSEL(MI, BB);
-
-  case TargetOpcode::STACKMAP:
-  case TargetOpcode::PATCHPOINT:
-    return emitPatchPoint(MI, BB);
-  }
-  llvm_unreachable("Unexpected instruction for custom inserter!");
-}
-
-//===----------------------------------------------------------------------===//
-// ARM64 Lowering private implementation.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Lowering Code
-//===----------------------------------------------------------------------===//
-
-/// changeIntCCToARM64CC - Convert a DAG integer condition code to an ARM64 CC
-static ARM64CC::CondCode changeIntCCToARM64CC(ISD::CondCode CC) {
-  switch (CC) {
-  default:
-    llvm_unreachable("Unknown condition code!");
-  case ISD::SETNE:
-    return ARM64CC::NE;
-  case ISD::SETEQ:
-    return ARM64CC::EQ;
-  case ISD::SETGT:
-    return ARM64CC::GT;
-  case ISD::SETGE:
-    return ARM64CC::GE;
-  case ISD::SETLT:
-    return ARM64CC::LT;
-  case ISD::SETLE:
-    return ARM64CC::LE;
-  case ISD::SETUGT:
-    return ARM64CC::HI;
-  case ISD::SETUGE:
-    return ARM64CC::CS;
-  case ISD::SETULT:
-    return ARM64CC::CC;
-  case ISD::SETULE:
-    return ARM64CC::LS;
-  }
-}
-
-/// changeFPCCToARM64CC - Convert a DAG fp condition code to an ARM64 CC.
-static void changeFPCCToARM64CC(ISD::CondCode CC, ARM64CC::CondCode &CondCode,
-                                ARM64CC::CondCode &CondCode2) {
-  CondCode2 = ARM64CC::AL;
-  switch (CC) {
-  default:
-    llvm_unreachable("Unknown FP condition!");
-  case ISD::SETEQ:
-  case ISD::SETOEQ:
-    CondCode = ARM64CC::EQ;
-    break;
-  case ISD::SETGT:
-  case ISD::SETOGT:
-    CondCode = ARM64CC::GT;
-    break;
-  case ISD::SETGE:
-  case ISD::SETOGE:
-    CondCode = ARM64CC::GE;
-    break;
-  case ISD::SETOLT:
-    CondCode = ARM64CC::MI;
-    break;
-  case ISD::SETOLE:
-    CondCode = ARM64CC::LS;
-    break;
-  case ISD::SETONE:
-    CondCode = ARM64CC::MI;
-    CondCode2 = ARM64CC::GT;
-    break;
-  case ISD::SETO:
-    CondCode = ARM64CC::VC;
-    break;
-  case ISD::SETUO:
-    CondCode = ARM64CC::VS;
-    break;
-  case ISD::SETUEQ:
-    CondCode = ARM64CC::EQ;
-    CondCode2 = ARM64CC::VS;
-    break;
-  case ISD::SETUGT:
-    CondCode = ARM64CC::HI;
-    break;
-  case ISD::SETUGE:
-    CondCode = ARM64CC::PL;
-    break;
-  case ISD::SETLT:
-  case ISD::SETULT:
-    CondCode = ARM64CC::LT;
-    break;
-  case ISD::SETLE:
-  case ISD::SETULE:
-    CondCode = ARM64CC::LE;
-    break;
-  case ISD::SETNE:
-  case ISD::SETUNE:
-    CondCode = ARM64CC::NE;
-    break;
-  }
-}
-
-static bool isLegalArithImmed(uint64_t C) {
-  // Matches ARM64DAGToDAGISel::SelectArithImmed().
-  return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
-}
-
-static SDValue emitComparison(SDValue LHS, SDValue RHS, SDLoc dl,
-                              SelectionDAG &DAG) {
-  EVT VT = LHS.getValueType();
-
-  if (VT.isFloatingPoint())
-    return DAG.getNode(ARM64ISD::FCMP, dl, VT, LHS, RHS);
-
-  // The CMP instruction is just an alias for SUBS, and representing it as
-  // SUBS means that it's possible to get CSE with subtract operations.
-  // A later phase can perform the optimization of setting the destination
-  // register to WZR/XZR if it ends up being unused.
-  return DAG.getNode(ARM64ISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
-      .getValue(1);
-}
-
-static SDValue getARM64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                           SDValue &ARM64cc, SelectionDAG &DAG, SDLoc dl) {
-  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
-    EVT VT = RHS.getValueType();
-    uint64_t C = RHSC->getZExtValue();
-    if (!isLegalArithImmed(C)) {
-      // Constant does not fit, try adjusting it by one?
-      switch (CC) {
-      default:
-        break;
-      case ISD::SETLT:
-      case ISD::SETGE:
-        if ((VT == MVT::i32 && C != 0x80000000 &&
-             isLegalArithImmed((uint32_t)(C - 1))) ||
-            (VT == MVT::i64 && C != 0x80000000ULL &&
-             isLegalArithImmed(C - 1ULL))) {
-          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
-          C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
-          RHS = DAG.getConstant(C, VT);
-        }
-        break;
-      case ISD::SETULT:
-      case ISD::SETUGE:
-        if ((VT == MVT::i32 && C != 0 &&
-             isLegalArithImmed((uint32_t)(C - 1))) ||
-            (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
-          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
-          C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
-          RHS = DAG.getConstant(C, VT);
-        }
-        break;
-      case ISD::SETLE:
-      case ISD::SETGT:
-        if ((VT == MVT::i32 && C != 0x7fffffff &&
-             isLegalArithImmed((uint32_t)(C + 1))) ||
-            (VT == MVT::i64 && C != 0x7ffffffffffffffULL &&
-             isLegalArithImmed(C + 1ULL))) {
-          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
-          C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
-          RHS = DAG.getConstant(C, VT);
-        }
-        break;
-      case ISD::SETULE:
-      case ISD::SETUGT:
-        if ((VT == MVT::i32 && C != 0xffffffff &&
-             isLegalArithImmed((uint32_t)(C + 1))) ||
-            (VT == MVT::i64 && C != 0xfffffffffffffffULL &&
-             isLegalArithImmed(C + 1ULL))) {
-          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
-          C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
-          RHS = DAG.getConstant(C, VT);
-        }
-        break;
-      }
-    }
-  }
-
-  SDValue Cmp = emitComparison(LHS, RHS, dl, DAG);
-  ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC);
-  ARM64cc = DAG.getConstant(ARM64CC, MVT::i32);
-  return Cmp;
-}
-
-static std::pair<SDValue, SDValue>
-getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
-  assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
-         "Unsupported value type");
-  SDValue Value, Overflow;
-  SDLoc DL(Op);
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  unsigned Opc = 0;
-  switch (Op.getOpcode()) {
-  default:
-    llvm_unreachable("Unknown overflow instruction!");
-  case ISD::SADDO:
-    Opc = ARM64ISD::ADDS;
-    CC = ARM64CC::VS;
-    break;
-  case ISD::UADDO:
-    Opc = ARM64ISD::ADDS;
-    CC = ARM64CC::CS;
-    break;
-  case ISD::SSUBO:
-    Opc = ARM64ISD::SUBS;
-    CC = ARM64CC::VS;
-    break;
-  case ISD::USUBO:
-    Opc = ARM64ISD::SUBS;
-    CC = ARM64CC::CC;
-    break;
-  // Multiply needs a little bit extra work.
-  case ISD::SMULO:
-  case ISD::UMULO: {
-    CC = ARM64CC::NE;
-    bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
-    if (Op.getValueType() == MVT::i32) {
-      unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-      // For a 32 bit multiply with overflow check we want the instruction
-      // selector to generate a widening multiply (SMADDL/UMADDL). For that we
-      // need to generate the following pattern:
-      // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
-      LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
-      RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
-      SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
-      SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
-                                DAG.getConstant(0, MVT::i64));
-      // On ARM64 the upper 32 bits are always zero extended for a 32 bit
-      // operation. We need to clear out the upper 32 bits, because we used a
-      // widening multiply that wrote all 64 bits. In the end this should be a
-      // noop.
-      Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
-      if (IsSigned) {
-        // The signed overflow check requires more than just a simple check for
-        // any bit set in the upper 32 bits of the result. These bits could be
-        // just the sign bits of a negative number. To perform the overflow
-        // check we have to arithmetic shift right the 32nd bit of the result by
-        // 31 bits. Then we compare the result to the upper 32 bits.
-        SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
-                                        DAG.getConstant(32, MVT::i64));
-        UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
-        SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
-                                        DAG.getConstant(31, MVT::i64));
-        // It is important that LowerBits is last, otherwise the arithmetic
-        // shift will not be folded into the compare (SUBS).
-        SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
-        Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
-                       .getValue(1);
-      } else {
-        // The overflow check for unsigned multiply is easy. We only need to
-        // check if any of the upper 32 bits are set. This can be done with a
-        // CMP (shifted register). For that we need to generate the following
-        // pattern:
-        // (i64 ARM64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
-        SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
-                                        DAG.getConstant(32, MVT::i64));
-        SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
-        Overflow =
-            DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
-                        UpperBits).getValue(1);
-      }
-      break;
-    }
-    assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
-    // For the 64 bit multiply
-    Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
-    if (IsSigned) {
-      SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
-      SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
-                                      DAG.getConstant(63, MVT::i64));
-      // It is important that LowerBits is last, otherwise the arithmetic
-      // shift will not be folded into the compare (SUBS).
-      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
-      Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
-                     .getValue(1);
-    } else {
-      SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
-      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
-      Overflow =
-          DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
-                      UpperBits).getValue(1);
-    }
-    break;
-  }
-  } // switch (...)
-
-  if (Opc) {
-    SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
-
-    // Emit the ARM64 operation with overflow check.
-    Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
-    Overflow = Value.getValue(1);
-  }
-  return std::make_pair(Value, Overflow);
-}
-
-SDValue ARM64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
-                                           RTLIB::Libcall Call) const {
-  SmallVector<SDValue, 2> Ops;
-  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
-    Ops.push_back(Op.getOperand(i));
-
-  return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
-                     SDLoc(Op)).first;
-}
-
-static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
-  SDValue Sel = Op.getOperand(0);
-  SDValue Other = Op.getOperand(1);
-
-  // If neither operand is a SELECT_CC, give up.
-  if (Sel.getOpcode() != ISD::SELECT_CC)
-    std::swap(Sel, Other);
-  if (Sel.getOpcode() != ISD::SELECT_CC)
-    return Op;
-
-  // The folding we want to perform is:
-  // (xor x, (select_cc a, b, cc, 0, -1) )
-  //   -->
-  // (csel x, (xor x, -1), cc ...)
-  //
-  // The latter will get matched to a CSINV instruction.
-
-  ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
-  SDValue LHS = Sel.getOperand(0);
-  SDValue RHS = Sel.getOperand(1);
-  SDValue TVal = Sel.getOperand(2);
-  SDValue FVal = Sel.getOperand(3);
-  SDLoc dl(Sel);
-
-  // FIXME: This could be generalized to non-integer comparisons.
-  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
-    return Op;
-
-  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
-  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
-
-  // The the values aren't constants, this isn't the pattern we're looking for.
-  if (!CFVal || !CTVal)
-    return Op;
-
-  // We can commute the SELECT_CC by inverting the condition.  This
-  // might be needed to make this fit into a CSINV pattern.
-  if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
-    std::swap(TVal, FVal);
-    std::swap(CTVal, CFVal);
-    CC = ISD::getSetCCInverse(CC, true);
-  }
-
-  // If the constants line up, perform the transform!
-  if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
-    SDValue CCVal;
-    SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
-
-    FVal = Other;
-    TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
-                       DAG.getConstant(-1ULL, Other.getValueType()));
-
-    return DAG.getNode(ARM64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
-                       CCVal, Cmp);
-  }
-
-  return Op;
-}
-
-static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-
-  // Let legalize expand this if it isn't a legal type yet.
-  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
-    return SDValue();
-
-  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
-
-  unsigned Opc;
-  bool ExtraOp = false;
-  switch (Op.getOpcode()) {
-  default:
-    assert(0 && "Invalid code");
-  case ISD::ADDC:
-    Opc = ARM64ISD::ADDS;
-    break;
-  case ISD::SUBC:
-    Opc = ARM64ISD::SUBS;
-    break;
-  case ISD::ADDE:
-    Opc = ARM64ISD::ADCS;
-    ExtraOp = true;
-    break;
-  case ISD::SUBE:
-    Opc = ARM64ISD::SBCS;
-    ExtraOp = true;
-    break;
-  }
-
-  if (!ExtraOp)
-    return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
-  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
-                     Op.getOperand(2));
-}
-
-static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
-  // Let legalize expand this if it isn't a legal type yet.
-  if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
-    return SDValue();
-
-  ARM64CC::CondCode CC;
-  // The actual operation that sets the overflow or carry flag.
-  SDValue Value, Overflow;
-  std::tie(Value, Overflow) = getARM64XALUOOp(CC, Op, DAG);
-
-  // We use 0 and 1 as false and true values.
-  SDValue TVal = DAG.getConstant(1, MVT::i32);
-  SDValue FVal = DAG.getConstant(0, MVT::i32);
-
-  // We use an inverted condition, because the conditional select is inverted
-  // too. This will allow it to be selected to a single instruction:
-  // CSINC Wd, WZR, WZR, invert(cond).
-  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32);
-  Overflow = DAG.getNode(ARM64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal, CCVal,
-                         Overflow);
-
-  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
-  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
-}
-
-// Prefetch operands are:
-// 1: Address to prefetch
-// 2: bool isWrite
-// 3: int locality (0 = no locality ... 3 = extreme locality)
-// 4: bool isDataCache
-static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
-  SDLoc DL(Op);
-  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
-  unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
-  // The data thing is not used.
-  // unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-
-  bool IsStream = !Locality;
-  // When the locality number is set
-  if (Locality) {
-    // The front-end should have filtered out the out-of-range values
-    assert(Locality <= 3 && "Prefetch locality out-of-range");
-    // The locality degree is the opposite of the cache speed.
-    // Put the number the other way around.
-    // The encoding starts at 0 for level 1
-    Locality = 3 - Locality;
-  }
-
-  // built the mask value encoding the expected behavior.
-  unsigned PrfOp = (IsWrite << 4) |     // Load/Store bit
-                   (Locality << 1) |    // Cache level bits
-                   (unsigned)IsStream;  // Stream bit
-  return DAG.getNode(ARM64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
-                     DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1));
-}
-
-SDValue ARM64TargetLowering::LowerFP_EXTEND(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
-
-  RTLIB::Libcall LC;
-  LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
-
-  return LowerF128Call(Op, DAG, LC);
-}
-
-SDValue ARM64TargetLowering::LowerFP_ROUND(SDValue Op,
-                                           SelectionDAG &DAG) const {
-  if (Op.getOperand(0).getValueType() != MVT::f128) {
-    // It's legal except when f128 is involved
-    return Op;
-  }
-
-  RTLIB::Libcall LC;
-  LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
-
-  // FP_ROUND node has a second operand indicating whether it is known to be
-  // precise. That doesn't take part in the LibCall so we can't directly use
-  // LowerF128Call.
-  SDValue SrcVal = Op.getOperand(0);
-  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
-                     /*isSigned*/ false, SDLoc(Op)).first;
-}
-
-static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
-  // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp.
-  // Any additional optimization in this function should be recorded
-  // in the cost tables.
-  EVT InVT = Op.getOperand(0).getValueType();
-  EVT VT = Op.getValueType();
-
-  // FP_TO_XINT conversion from the same type are legal.
-  if (VT.getSizeInBits() == InVT.getSizeInBits())
-    return Op;
-
-  if (InVT == MVT::v2f64) {
-    SDLoc dl(Op);
-    SDValue Cv = DAG.getNode(Op.getOpcode(), dl, MVT::v2i64, Op.getOperand(0));
-    return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
-  }
-
-  // Type changing conversions are illegal.
-  return SDValue();
-}
-
-SDValue ARM64TargetLowering::LowerFP_TO_INT(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  if (Op.getOperand(0).getValueType().isVector())
-    return LowerVectorFP_TO_INT(Op, DAG);
-
-  if (Op.getOperand(0).getValueType() != MVT::f128) {
-    // It's legal except when f128 is involved
-    return Op;
-  }
-
-  RTLIB::Libcall LC;
-  if (Op.getOpcode() == ISD::FP_TO_SINT)
-    LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
-  else
-    LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
-
-  SmallVector<SDValue, 2> Ops;
-  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
-    Ops.push_back(Op.getOperand(i));
-
-  return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
-                     SDLoc(Op)).first;
-}
-
-static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
-  // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp.
-  // Any additional optimization in this function should be recorded
-  // in the cost tables.
-  EVT VT = Op.getValueType();
-  SDLoc dl(Op);
-  SDValue In = Op.getOperand(0);
-  EVT InVT = In.getValueType();
-
-  // v2i32 to v2f32 is legal.
-  if (VT == MVT::v2f32 && InVT == MVT::v2i32)
-    return Op;
-
-  // This function only handles v2f64 outputs.
-  if (VT == MVT::v2f64) {
-    // Extend the input argument to a v2i64 that we can feed into the
-    // floating point conversion. Zero or sign extend based on whether
-    // we're doing a signed or unsigned float conversion.
-    unsigned Opc =
-        Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
-    assert(Op.getNumOperands() == 1 && "FP conversions take one argument");
-    SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0));
-    return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted);
-  }
-
-  // Scalarize v2i64 to v2f32 conversions.
-  std::vector<SDValue> BuildVectorOps;
-  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
-    SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In,
-                               DAG.getConstant(i, MVT::i64));
-    Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr);
-    BuildVectorOps.push_back(Sclr);
-  }
-
-  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &BuildVectorOps[0],
-                     BuildVectorOps.size());
-}
-
-SDValue ARM64TargetLowering::LowerINT_TO_FP(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  if (Op.getValueType().isVector())
-    return LowerVectorINT_TO_FP(Op, DAG);
-
-  // i128 conversions are libcalls.
-  if (Op.getOperand(0).getValueType() == MVT::i128)
-    return SDValue();
-
-  // Other conversions are legal, unless it's to the completely software-based
-  // fp128.
-  if (Op.getValueType() != MVT::f128)
-    return Op;
-
-  RTLIB::Libcall LC;
-  if (Op.getOpcode() == ISD::SINT_TO_FP)
-    LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
-  else
-    LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
-
-  return LowerF128Call(Op, DAG, LC);
-}
-
-SDValue ARM64TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
-  // For iOS, we want to call an alternative entry point: __sincos_stret,
-  // which returns the values in two S / D registers.
-  SDLoc dl(Op);
-  SDValue Arg = Op.getOperand(0);
-  EVT ArgVT = Arg.getValueType();
-  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-
-  ArgListTy Args;
-  ArgListEntry Entry;
-
-  Entry.Node = Arg;
-  Entry.Ty = ArgTy;
-  Entry.isSExt = false;
-  Entry.isZExt = false;
-  Args.push_back(Entry);
-
-  const char *LibcallName =
-      (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
-  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
-
-  StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
-  TargetLowering::CallLoweringInfo CLI(
-      DAG.getEntryNode(), RetTy, false, false, false, false, 0,
-      CallingConv::Fast, /*isTaillCall=*/false,
-      /*doesNotRet=*/false, /*isReturnValueUsed*/ true, Callee, Args, DAG, dl);
-  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
-  return CallResult.first;
-}
-
-SDValue ARM64TargetLowering::LowerOperation(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  switch (Op.getOpcode()) {
-  default:
-    llvm_unreachable("unimplemented operand");
-    return SDValue();
-  case ISD::GlobalAddress:
-    return LowerGlobalAddress(Op, DAG);
-  case ISD::GlobalTLSAddress:
-    return LowerGlobalTLSAddress(Op, DAG);
-  case ISD::SETCC:
-    return LowerSETCC(Op, DAG);
-  case ISD::BR_CC:
-    return LowerBR_CC(Op, DAG);
-  case ISD::SELECT:
-    return LowerSELECT(Op, DAG);
-  case ISD::SELECT_CC:
-    return LowerSELECT_CC(Op, DAG);
-  case ISD::JumpTable:
-    return LowerJumpTable(Op, DAG);
-  case ISD::ConstantPool:
-    return LowerConstantPool(Op, DAG);
-  case ISD::BlockAddress:
-    return LowerBlockAddress(Op, DAG);
-  case ISD::VASTART:
-    return LowerVASTART(Op, DAG);
-  case ISD::VACOPY:
-    return LowerVACOPY(Op, DAG);
-  case ISD::VAARG:
-    return LowerVAARG(Op, DAG);
-  case ISD::ADDC:
-  case ISD::ADDE:
-  case ISD::SUBC:
-  case ISD::SUBE:
-    return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
-  case ISD::SADDO:
-  case ISD::UADDO:
-  case ISD::SSUBO:
-  case ISD::USUBO:
-  case ISD::SMULO:
-  case ISD::UMULO:
-    return LowerXALUO(Op, DAG);
-  case ISD::FADD:
-    return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
-  case ISD::FSUB:
-    return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
-  case ISD::FMUL:
-    return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
-  case ISD::FDIV:
-    return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
-  case ISD::FP_ROUND:
-    return LowerFP_ROUND(Op, DAG);
-  case ISD::FP_EXTEND:
-    return LowerFP_EXTEND(Op, DAG);
-  case ISD::FRAMEADDR:
-    return LowerFRAMEADDR(Op, DAG);
-  case ISD::RETURNADDR:
-    return LowerRETURNADDR(Op, DAG);
-  case ISD::INSERT_VECTOR_ELT:
-    return LowerINSERT_VECTOR_ELT(Op, DAG);
-  case ISD::EXTRACT_VECTOR_ELT:
-    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
-  case ISD::SCALAR_TO_VECTOR:
-    return LowerSCALAR_TO_VECTOR(Op, DAG);
-  case ISD::BUILD_VECTOR:
-    return LowerBUILD_VECTOR(Op, DAG);
-  case ISD::VECTOR_SHUFFLE:
-    return LowerVECTOR_SHUFFLE(Op, DAG);
-  case ISD::EXTRACT_SUBVECTOR:
-    return LowerEXTRACT_SUBVECTOR(Op, DAG);
-  case ISD::SRA:
-  case ISD::SRL:
-  case ISD::SHL:
-    return LowerVectorSRA_SRL_SHL(Op, DAG);
-  case ISD::SHL_PARTS:
-    return LowerShiftLeftParts(Op, DAG);
-  case ISD::SRL_PARTS:
-  case ISD::SRA_PARTS:
-    return LowerShiftRightParts(Op, DAG);
-  case ISD::CTPOP:
-    return LowerCTPOP(Op, DAG);
-  case ISD::FCOPYSIGN:
-    return LowerFCOPYSIGN(Op, DAG);
-  case ISD::AND:
-    return LowerVectorAND(Op, DAG);
-  case ISD::OR:
-    return LowerVectorOR(Op, DAG);
-  case ISD::XOR:
-    return LowerXOR(Op, DAG);
-  case ISD::PREFETCH:
-    return LowerPREFETCH(Op, DAG);
-  case ISD::SINT_TO_FP:
-  case ISD::UINT_TO_FP:
-    return LowerINT_TO_FP(Op, DAG);
-  case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT:
-    return LowerFP_TO_INT(Op, DAG);
-  case ISD::FSINCOS:
-    return LowerFSINCOS(Op, DAG);
-  }
-}
-
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned ARM64TargetLowering::getFunctionAlignment(const Function *F) const {
-  return 2;
-}
-
-//===----------------------------------------------------------------------===//
-//                      Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "ARM64GenCallingConv.inc"
-
-/// Selects the correct CCAssignFn for a the given CallingConvention
-/// value.
-CCAssignFn *ARM64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
-                                                   bool IsVarArg) const {
-  switch (CC) {
-  default:
-    llvm_unreachable("Unsupported calling convention.");
-  case CallingConv::WebKit_JS:
-    return CC_ARM64_WebKit_JS;
-  case CallingConv::C:
-  case CallingConv::Fast:
-    if (!Subtarget->isTargetDarwin())
-      return CC_ARM64_AAPCS;
-    return IsVarArg ? CC_ARM64_DarwinPCS_VarArg : CC_ARM64_DarwinPCS;
-  }
-}
-
-SDValue ARM64TargetLowering::LowerFormalArguments(
-    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
-    SmallVectorImpl<SDValue> &InVals) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  // Assign locations to all of the incoming arguments.
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), ArgLocs, *DAG.getContext());
-
-  // At this point, Ins[].VT may already be promoted to i32. To correctly
-  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
-  // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT.
-  // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
-  // we use a special version of AnalyzeFormalArguments to pass in ValVT and
-  // LocVT.
-  unsigned NumArgs = Ins.size();
-  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
-  unsigned CurArgIdx = 0;
-  for (unsigned i = 0; i != NumArgs; ++i) {
-    MVT ValVT = Ins[i].VT;
-    std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx);
-    CurArgIdx = Ins[i].OrigArgIndex;
-
-    // Get type of the original argument.
-    EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
-    MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
-    // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
-    MVT LocVT = ValVT;
-    if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
-      LocVT = MVT::i8;
-    else if (ActualMVT == MVT::i16)
-      LocVT = MVT::i16;
-
-    CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
-    bool Res =
-        AssignFn(i, ValVT, LocVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
-    assert(!Res && "Call operand has unhandled type");
-    (void)Res;
-  }
-
-  SmallVector<SDValue, 16> ArgValues;
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
-
-    // Arguments stored in registers.
-    if (VA.isRegLoc()) {
-      EVT RegVT = VA.getLocVT();
-
-      SDValue ArgValue;
-      const TargetRegisterClass *RC;
-
-      if (RegVT == MVT::i32)
-        RC = &ARM64::GPR32RegClass;
-      else if (RegVT == MVT::i64)
-        RC = &ARM64::GPR64RegClass;
-      else if (RegVT == MVT::f32)
-        RC = &ARM64::FPR32RegClass;
-      else if (RegVT == MVT::f64 || RegVT == MVT::v1i64 ||
-               RegVT == MVT::v1f64 || RegVT == MVT::v2i32 ||
-               RegVT == MVT::v4i16 || RegVT == MVT::v8i8)
-        RC = &ARM64::FPR64RegClass;
-      else if (RegVT == MVT::v2i64 || RegVT == MVT::v4i32 ||
-               RegVT == MVT::v8i16 || RegVT == MVT::v16i8)
-        RC = &ARM64::FPR128RegClass;
-      else
-        llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
-
-      // Transform the arguments in physical registers into virtual ones.
-      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
-      ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
-
-      // If this is an 8, 16 or 32-bit value, it is really passed promoted
-      // to 64 bits.  Insert an assert[sz]ext to capture this, then
-      // truncate to the right size.
-      switch (VA.getLocInfo()) {
-      default:
-        llvm_unreachable("Unknown loc info!");
-      case CCValAssign::Full:
-        break;
-      case CCValAssign::BCvt:
-        ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
-        break;
-      case CCValAssign::SExt:
-        ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
-                               DAG.getValueType(VA.getValVT()));
-        ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
-        break;
-      case CCValAssign::ZExt:
-        ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,
-                               DAG.getValueType(VA.getValVT()));
-        ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
-        break;
-      }
-
-      InVals.push_back(ArgValue);
-
-    } else { // VA.isRegLoc()
-      assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
-      unsigned ArgOffset = VA.getLocMemOffset();
-      unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
-      int FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
-
-      // Create load nodes to retrieve arguments from the stack.
-      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, FIN,
-                                   MachinePointerInfo::getFixedStack(FI), false,
-                                   false, false, 0));
-    }
-  }
-
-  // varargs
-  if (isVarArg) {
-    if (!Subtarget->isTargetDarwin()) {
-      // The AAPCS variadic function ABI is identical to the non-variadic
-      // one. As a result there may be more arguments in registers and we should
-      // save them for future reference.
-      saveVarArgRegisters(CCInfo, DAG, DL, Chain);
-    }
-
-    ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
-    // This will point to the next argument passed via stack.
-    unsigned StackOffset = CCInfo.getNextStackOffset();
-    // We currently pass all varargs at 8-byte alignment.
-    StackOffset = ((StackOffset + 7) & ~7);
-    AFI->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true));
-  }
-
-  return Chain;
-}
-
-void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
-                                              SelectionDAG &DAG, SDLoc DL,
-                                              SDValue &Chain) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>();
-
-  SmallVector<SDValue, 8> MemOps;
-
-  static const uint16_t GPRArgRegs[] = { ARM64::X0, ARM64::X1, ARM64::X2,
-                                         ARM64::X3, ARM64::X4, ARM64::X5,
-                                         ARM64::X6, ARM64::X7 };
-  static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
-  unsigned FirstVariadicGPR =
-      CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs);
-
-  static const uint16_t FPRArgRegs[] = { ARM64::Q0, ARM64::Q1, ARM64::Q2,
-                                         ARM64::Q3, ARM64::Q4, ARM64::Q5,
-                                         ARM64::Q6, ARM64::Q7 };
-  static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
-  unsigned FirstVariadicFPR =
-      CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs);
-
-  unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
-  int GPRIdx = 0;
-  if (GPRSaveSize != 0) {
-    GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
-
-    SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
-
-    for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
-      unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &ARM64::GPR64RegClass);
-      SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
-      SDValue Store =
-          DAG.getStore(Val.getValue(1), DL, Val, FIN,
-                       MachinePointerInfo::getStack(i * 8), false, false, 0);
-      MemOps.push_back(Store);
-      FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
-                        DAG.getConstant(8, getPointerTy()));
-    }
-  }
-
-  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
-  int FPRIdx = 0;
-  if (FPRSaveSize != 0) {
-    FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
-
-    SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
-
-    for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
-      unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &ARM64::FPR128RegClass);
-      SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::v2i64);
-      SDValue Store =
-          DAG.getStore(Val.getValue(1), DL, Val, FIN,
-                       MachinePointerInfo::getStack(i * 16), false, false, 0);
-      MemOps.push_back(Store);
-      FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
-                        DAG.getConstant(16, getPointerTy()));
-    }
-  }
-
-  FuncInfo->setVarArgsGPRIndex(GPRIdx);
-  FuncInfo->setVarArgsGPRSize(GPRSaveSize);
-  FuncInfo->setVarArgsFPRIndex(FPRIdx);
-  FuncInfo->setVarArgsFPRSize(FPRSaveSize);
-
-  if (!MemOps.empty()) {
-    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
-                        MemOps.size());
-  }
-}
-
-/// LowerCallResult - Lower the result values of a call into the
-/// appropriate copies out of appropriate physical registers.
-SDValue ARM64TargetLowering::LowerCallResult(
-    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
-    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
-    SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
-    SDValue ThisVal) const {
-  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
-                                                         : RetCC_ARM64_AAPCS;
-  // Assign locations to each value returned by this call.
-  SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), RVLocs, *DAG.getContext());
-  CCInfo.AnalyzeCallResult(Ins, RetCC);
-
-  // Copy all of the result registers out of their specified physreg.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    CCValAssign VA = RVLocs[i];
-
-    // Pass 'this' value directly from the argument to return value, to avoid
-    // reg unit interference
-    if (i == 0 && isThisReturn) {
-      assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
-             "unexpected return calling convention register assignment");
-      InVals.push_back(ThisVal);
-      continue;
-    }
-
-    SDValue Val =
-        DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
-    Chain = Val.getValue(1);
-    InFlag = Val.getValue(2);
-
-    switch (VA.getLocInfo()) {
-    default:
-      llvm_unreachable("Unknown loc info!");
-    case CCValAssign::Full:
-      break;
-    case CCValAssign::BCvt:
-      Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
-      break;
-    }
-
-    InVals.push_back(Val);
-  }
-
-  return Chain;
-}
-
-bool ARM64TargetLowering::isEligibleForTailCallOptimization(
-    SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
-    bool isCalleeStructRet, bool isCallerStructRet,
-    const SmallVectorImpl<ISD::OutputArg> &Outs,
-    const SmallVectorImpl<SDValue> &OutVals,
-    const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
-  // Look for obvious safe cases to perform tail call optimization that do not
-  // require ABI changes. This is what gcc calls sibcall.
-
-  // Do not sibcall optimize vararg calls unless the call site is not passing
-  // any arguments.
-  if (isVarArg && !Outs.empty())
-    return false;
-
-  // Also avoid sibcall optimization if either caller or callee uses struct
-  // return semantics.
-  if (isCalleeStructRet || isCallerStructRet)
-    return false;
-
-  // Note that currently ARM64 "C" calling convention and "Fast" calling
-  // convention are compatible. If/when that ever changes, we'll need to
-  // add checks here to make sure any interactions are OK.
-
-  // If the callee takes no arguments then go on to check the results of the
-  // call.
-  if (!Outs.empty()) {
-    // Check if stack adjustment is needed. For now, do not do this if any
-    // argument is passed on the stack.
-    SmallVector<CCValAssign, 16> ArgLocs;
-    CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
-                   getTargetMachine(), ArgLocs, *DAG.getContext());
-    CCAssignFn *AssignFn = CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false);
-    CCInfo.AnalyzeCallOperands(Outs, AssignFn);
-    if (CCInfo.getNextStackOffset()) {
-      // Check if the arguments are already laid out in the right way as
-      // the caller's fixed stack objects.
-      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
-           ++i, ++realArgIdx) {
-        CCValAssign &VA = ArgLocs[i];
-        if (VA.getLocInfo() == CCValAssign::Indirect)
-          return false;
-        if (VA.needsCustom()) {
-          // Just don't handle anything that needs custom adjustments for now.
-          // If need be, we can revisit later, but we shouldn't ever end up
-          // here.
-          return false;
-        } else if (!VA.isRegLoc()) {
-          // Likewise, don't try to handle stack based arguments for the
-          // time being.
-          return false;
-        }
-      }
-    }
-  }
-
-  return true;
-}
-/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
-/// and add input and output parameter nodes.
-SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
-                                       SmallVectorImpl<SDValue> &InVals) const {
-  SelectionDAG &DAG = CLI.DAG;
-  SDLoc &DL = CLI.DL;
-  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
-  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
-  SDValue Chain = CLI.Chain;
-  SDValue Callee = CLI.Callee;
-  bool &IsTailCall = CLI.IsTailCall;
-  CallingConv::ID CallConv = CLI.CallConv;
-  bool IsVarArg = CLI.IsVarArg;
-
-  MachineFunction &MF = DAG.getMachineFunction();
-  bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
-  bool IsThisReturn = false;
-
-  // If tail calls are explicitly disabled, make sure not to use them.
-  if (!EnableARM64TailCalls)
-    IsTailCall = false;
-
-  if (IsTailCall) {
-    // Check if it's really possible to do a tail call.
-    IsTailCall = isEligibleForTailCallOptimization(
-        Callee, CallConv, IsVarArg, IsStructRet,
-        MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG);
-    // We don't support GuaranteedTailCallOpt, only automatically
-    // detected sibcalls.
-    // FIXME: Re-evaluate. Is this true? Should it be true?
-    if (IsTailCall)
-      ++NumTailCalls;
-  }
-
-  // Analyze operands of the call, assigning locations to each operand.
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), ArgLocs, *DAG.getContext());
-
-  if (IsVarArg) {
-    // Handle fixed and variable vector arguments differently.
-    // Variable vector arguments always go into memory.
-    unsigned NumArgs = Outs.size();
-
-    for (unsigned i = 0; i != NumArgs; ++i) {
-      MVT ArgVT = Outs[i].VT;
-      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
-      CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
-                                               /*IsVarArg=*/ !Outs[i].IsFixed);
-      bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
-      assert(!Res && "Call operand has unhandled type");
-      (void)Res;
-    }
-  } else {
-    // At this point, Outs[].VT may already be promoted to i32. To correctly
-    // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
-    // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT.
-    // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
-    // we use a special version of AnalyzeCallOperands to pass in ValVT and
-    // LocVT.
-    unsigned NumArgs = Outs.size();
-    for (unsigned i = 0; i != NumArgs; ++i) {
-      MVT ValVT = Outs[i].VT;
-      // Get type of the original argument.
-      EVT ActualVT = getValueType(CLI.Args[Outs[i].OrigArgIndex].Ty,
-                                  /*AllowUnknown*/ true);
-      MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
-      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
-      // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
-      MVT LocVT = ValVT;
-      if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
-        LocVT = MVT::i8;
-      else if (ActualMVT == MVT::i16)
-        LocVT = MVT::i16;
-
-      CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
-      bool Res = AssignFn(i, ValVT, LocVT, CCValAssign::Full, ArgFlags, CCInfo);
-      assert(!Res && "Call operand has unhandled type");
-      (void)Res;
-    }
-  }
-
-  // Get a count of how many bytes are to be pushed on the stack.
-  unsigned NumBytes = CCInfo.getNextStackOffset();
-
-  // Adjust the stack pointer for the new arguments...
-  // These operations are automatically eliminated by the prolog/epilog pass
-  if (!IsTailCall)
-    Chain =
-        DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL);
-
-  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, ARM64::SP, getPointerTy());
-
-  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
-  SmallVector<SDValue, 8> MemOpChains;
-
-  // Walk the register/memloc assignments, inserting copies/loads.
-  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
-       ++i, ++realArgIdx) {
-    CCValAssign &VA = ArgLocs[i];
-    SDValue Arg = OutVals[realArgIdx];
-    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
-
-    // Promote the value if needed.
-    switch (VA.getLocInfo()) {
-    default:
-      llvm_unreachable("Unknown loc info!");
-    case CCValAssign::Full:
-      break;
-    case CCValAssign::SExt:
-      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
-      break;
-    case CCValAssign::ZExt:
-      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
-      break;
-    case CCValAssign::AExt:
-      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
-      break;
-    case CCValAssign::BCvt:
-      Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
-      break;
-    case CCValAssign::FPExt:
-      Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
-      break;
-    }
-
-    if (VA.isRegLoc()) {
-      if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) {
-        assert(VA.getLocVT() == MVT::i64 &&
-               "unexpected calling convention register assignment");
-        assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
-               "unexpected use of 'returned'");
-        IsThisReturn = true;
-      }
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-    } else {
-      assert(VA.isMemLoc());
-      // There's no reason we can't support stack args w/ tailcall, but
-      // we currently don't, so assert if we see one.
-      assert(!IsTailCall && "stack argument with tail call!?");
-      unsigned LocMemOffset = VA.getLocMemOffset();
-      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
-      PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
-
-      // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
-      // promoted to a legal register type i32, we should truncate Arg back to
-      // i1/i8/i16.
-      if (Arg.getValueType().isSimple() &&
-          Arg.getValueType().getSimpleVT() == MVT::i32 &&
-          (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 ||
-           VA.getLocVT() == MVT::i16))
-        Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg);
-
-      SDValue Store = DAG.getStore(Chain, DL, Arg, PtrOff,
-                                   MachinePointerInfo::getStack(LocMemOffset),
-                                   false, false, 0);
-      MemOpChains.push_back(Store);
-    }
-  }
-
-  if (!MemOpChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOpChains[0],
-                        MemOpChains.size());
-
-  // Build a sequence of copy-to-reg nodes chained together with token chain
-  // and flag operands which copy the outgoing args into the appropriate regs.
-  SDValue InFlag;
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
-                             RegsToPass[i].second, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
-  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
-  // node so that legalize doesn't hack it.
-  if (getTargetMachine().getCodeModel() == CodeModel::Large &&
-      Subtarget->isTargetMachO()) {
-    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-      const GlobalValue *GV = G->getGlobal();
-      bool InternalLinkage = GV->hasInternalLinkage();
-      if (InternalLinkage)
-        Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
-      else {
-        Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0,
-                                            ARM64II::MO_GOT);
-        Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee);
-      }
-    } else if (ExternalSymbolSDNode *S =
-                   dyn_cast<ExternalSymbolSDNode>(Callee)) {
-      const char *Sym = S->getSymbol();
-      Callee =
-          DAG.getTargetExternalSymbol(Sym, getPointerTy(), ARM64II::MO_GOT);
-      Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee);
-    }
-  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    const GlobalValue *GV = G->getGlobal();
-    Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
-  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    const char *Sym = S->getSymbol();
-    Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0);
-  }
-
-  std::vector<SDValue> Ops;
-  Ops.push_back(Chain);
-  Ops.push_back(Callee);
-
-  // Add argument registers to the end of the list so that they are known live
-  // into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
-
-  // Add a register mask operand representing the call-preserved registers.
-  const uint32_t *Mask;
-  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI);
-  if (IsThisReturn) {
-    // For 'this' returns, use the X0-preserving mask if applicable
-    Mask = ARI->getThisReturnPreservedMask(CallConv);
-    if (!Mask) {
-      IsThisReturn = false;
-      Mask = ARI->getCallPreservedMask(CallConv);
-    }
-  } else
-    Mask = ARI->getCallPreservedMask(CallConv);
-
-  assert(Mask && "Missing call preserved mask for calling convention");
-  Ops.push_back(DAG.getRegisterMask(Mask));
-
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
-
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-
-  // If we're doing a tall call, use a TC_RETURN here rather than an
-  // actual call instruction.
-  if (IsTailCall)
-    return DAG.getNode(ARM64ISD::TC_RETURN, DL, NodeTys, &Ops[0], Ops.size());
-
-  // Returns a chain and a flag for retval copy to use.
-  Chain = DAG.getNode(ARM64ISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
-
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag, DL);
-  if (!Ins.empty())
-    InFlag = Chain.getValue(1);
-
-  // Handle result values, copying them out of physregs into vregs that we
-  // return.
-  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
-                         InVals, IsThisReturn,
-                         IsThisReturn ? OutVals[0] : SDValue());
-}
-
-bool ARM64TargetLowering::CanLowerReturn(
-    CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
-    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
-  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
-                                                         : RetCC_ARM64_AAPCS;
-  SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
-  return CCInfo.CheckReturn(Outs, RetCC);
-}
-
-SDValue
-ARM64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
-                                 bool isVarArg,
-                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 const SmallVectorImpl<SDValue> &OutVals,
-                                 SDLoc DL, SelectionDAG &DAG) const {
-  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
-                                                         : RetCC_ARM64_AAPCS;
-  SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), RVLocs, *DAG.getContext());
-  CCInfo.AnalyzeReturn(Outs, RetCC);
-
-  // Copy the result values into the output registers.
-  SDValue Flag;
-  SmallVector<SDValue, 4> RetOps(1, Chain);
-  for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
-       ++i, ++realRVLocIdx) {
-    CCValAssign &VA = RVLocs[i];
-    assert(VA.isRegLoc() && "Can only return in registers!");
-    SDValue Arg = OutVals[realRVLocIdx];
-
-    switch (VA.getLocInfo()) {
-    default:
-      llvm_unreachable("Unknown loc info!");
-    case CCValAssign::Full:
-      break;
-    case CCValAssign::BCvt:
-      Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
-      break;
-    }
-
-    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
-    Flag = Chain.getValue(1);
-    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
-  }
-
-  RetOps[0] = Chain; // Update chain.
-
-  // Add the flag if we have it.
-  if (Flag.getNode())
-    RetOps.push_back(Flag);
-
-  return DAG.getNode(ARM64ISD::RET_FLAG, DL, MVT::Other, &RetOps[0],
-                     RetOps.size());
-}
-
-//===----------------------------------------------------------------------===//
-//  Other Lowering Code
-//===----------------------------------------------------------------------===//
-
-SDValue ARM64TargetLowering::LowerGlobalAddress(SDValue Op,
-                                                SelectionDAG &DAG) const {
-  EVT PtrVT = getPointerTy();
-  SDLoc DL(Op);
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  unsigned char OpFlags =
-      Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
-
-  assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
-         "unexpected offset in global node");
-
-  // This also catched the large code model case for Darwin.
-  if ((OpFlags & ARM64II::MO_GOT) != 0) {
-    SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
-    // FIXME: Once remat is capable of dealing with instructions with register
-    // operands, expand this into two nodes instead of using a wrapper node.
-    return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr);
-  }
-
-  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
-    const unsigned char MO_NC = ARM64II::MO_NC;
-    return DAG.getNode(
-        ARM64ISD::WrapperLarge, DL, PtrVT,
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G3),
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G2 | MO_NC),
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G1 | MO_NC),
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G0 | MO_NC));
-  } else {
-    // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and
-    // the only correct model on Darwin.
-    SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
-                                            OpFlags | ARM64II::MO_PAGE);
-    unsigned char LoFlags = OpFlags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC;
-    SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags);
-
-    SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
-    return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
-  }
-}
-
-/// \brief Convert a TLS address reference into the correct sequence of loads
-/// and calls to compute the variable's address (for Darwin, currently) and
-/// return an SDValue containing the final node.
-
-/// Darwin only has one TLS scheme which must be capable of dealing with the
-/// fully general situation, in the worst case. This means:
-///     + "extern __thread" declaration.
-///     + Defined in a possibly unknown dynamic library.
-///
-/// The general system is that each __thread variable has a [3 x i64] descriptor
-/// which contains information used by the runtime to calculate the address. The
-/// only part of this the compiler needs to know about is the first xword, which
-/// contains a function pointer that must be called with the address of the
-/// entire descriptor in "x0".
-///
-/// Since this descriptor may be in a different unit, in general even the
-/// descriptor must be accessed via an indirect load. The "ideal" code sequence
-/// is:
-///     adrp x0, _var@TLVPPAGE
-///     ldr x0, [x0, _var@TLVPPAGEOFF]   ; x0 now contains address of descriptor
-///     ldr x1, [x0]                     ; x1 contains 1st entry of descriptor,
-///                                      ; the function pointer
-///     blr x1                           ; Uses descriptor address in x0
-///     ; Address of _var is now in x0.
-///
-/// If the address of _var's descriptor *is* known to the linker, then it can
-/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
-/// a slight efficiency gain.
-SDValue
-ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
-
-  SDLoc DL(Op);
-  MVT PtrVT = getPointerTy();
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-
-  SDValue TLVPAddr =
-      DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS);
-  SDValue DescAddr = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TLVPAddr);
-
-  // The first entry in the descriptor is a function pointer that we must call
-  // to obtain the address of the variable.
-  SDValue Chain = DAG.getEntryNode();
-  SDValue FuncTLVGet =
-      DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(),
-                  false, true, true, 8);
-  Chain = FuncTLVGet.getValue(1);
-
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setAdjustsStack(true);
-
-  // TLS calls preserve all registers except those that absolutely must be
-  // trashed: X0 (it takes an argument), LR (it's a call) and CPSR (let's not be
-  // silly).
-  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI);
-  const uint32_t *Mask = ARI->getTLSCallPreservedMask();
-
-  // Finally, we can make the call. This is just a degenerate version of a
-  // normal ARM64 call node: x0 takes the address of the descriptor, and returns
-  // the address of the variable in this thread.
-  Chain = DAG.getCopyToReg(Chain, DL, ARM64::X0, DescAddr, SDValue());
-  Chain = DAG.getNode(ARM64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
-                      Chain, FuncTLVGet, DAG.getRegister(ARM64::X0, MVT::i64),
-                      DAG.getRegisterMask(Mask), Chain.getValue(1));
-  return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Chain.getValue(1));
-}
-
-/// When accessing thread-local variables under either the general-dynamic or
-/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
-/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, CPSR) are preserved.
-///
-/// Thus, the ideal call sequence on AArch64 is:
-///
-///     adrp x0, :tlsdesc:thread_var
-///     ldr x8, [x0, :tlsdesc_lo12:thread_var]
-///     add x0, x0, :tlsdesc_lo12:thread_var
-///     .tlsdesccall thread_var
-///     blr x8
-///     (TPIDR_EL0 offset now in x0).
-///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
-///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue ARM64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
-                                                 SDValue DescAddr, SDLoc DL,
-                                                 SelectionDAG &DAG) const {
-  EVT PtrVT = getPointerTy();
-
-  // The function we need to call is simply the first entry in the GOT for this
-  // descriptor, load it in preparation.
-  SDValue Func = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, SymAddr);
-
-  // TLS calls preserve all registers except those that absolutely must be
-  // trashed: X0 (it takes an argument), LR (it's a call) and CPSR (let's not be
-  // silly).
-  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI);
-  const uint32_t *Mask = ARI->getTLSCallPreservedMask();
-
-  // The function takes only one argument: the address of the descriptor itself
-  // in X0.
-  SDValue Glue, Chain;
-  Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM64::X0, DescAddr, Glue);
-  Glue = Chain.getValue(1);
-
-  // We're now ready to populate the argument list, as with a normal call:
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(Chain);
-  Ops.push_back(Func);
-  Ops.push_back(SymAddr);
-  Ops.push_back(DAG.getRegister(ARM64::X0, PtrVT));
-  Ops.push_back(DAG.getRegisterMask(Mask));
-  Ops.push_back(Glue);
-
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-  Chain = DAG.getNode(ARM64ISD::TLSDESC_CALL, DL, NodeTys, &Ops[0], Ops.size());
-  Glue = Chain.getValue(1);
-
-  return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Glue);
-}
-
-SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
-                                                      SelectionDAG &DAG) const {
-  assert(Subtarget->isTargetELF() && "This function expects an ELF target");
-  assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
-         "ELF TLS only supported in small memory model");
-  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
-
-  TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
-
-  SDValue TPOff;
-  EVT PtrVT = getPointerTy();
-  SDLoc DL(Op);
-  const GlobalValue *GV = GA->getGlobal();
-
-  SDValue ThreadBase = DAG.getNode(ARM64ISD::THREAD_POINTER, DL, PtrVT);
-
-  if (Model == TLSModel::LocalExec) {
-    SDValue HiVar = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G1);
-    SDValue LoVar = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC);
-
-    TPOff = SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar,
-                                       DAG.getTargetConstant(16, MVT::i32)),
-                    0);
-    TPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, TPOff, LoVar,
-                                       DAG.getTargetConstant(0, MVT::i32)),
-                    0);
-  } else if (Model == TLSModel::InitialExec) {
-    TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS);
-    TPOff = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TPOff);
-  } else if (Model == TLSModel::LocalDynamic) {
-    // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
-    // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
-    // the beginning of the module's TLS region, followed by a DTPREL offset
-    // calculation.
-
-    // These accesses will need deduplicating if there's more than one.
-    ARM64FunctionInfo *MFI =
-        DAG.getMachineFunction().getInfo<ARM64FunctionInfo>();
-    MFI->incNumLocalDynamicTLSAccesses();
-
-    // Accesses used in this sequence go via the TLS descriptor which lives in
-    // the GOT. Prepare an address we can use to handle this.
-    SDValue HiDesc = DAG.getTargetExternalSymbol(
-        "_TLS_MODULE_BASE_", PtrVT, ARM64II::MO_TLS | ARM64II::MO_PAGE);
-    SDValue LoDesc = DAG.getTargetExternalSymbol(
-        "_TLS_MODULE_BASE_", PtrVT,
-        ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-
-    // First argument to the descriptor call is the address of the descriptor
-    // itself.
-    SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc);
-    DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
-    // The call needs a relocation too for linker relaxation. It doesn't make
-    // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
-    // the address.
-    SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
-                                                  ARM64II::MO_TLS);
-
-    // Now we can calculate the offset from TPIDR_EL0 to this module's
-    // thread-local area.
-    TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
-
-    // Now use :dtprel_whatever: operations to calculate this variable's offset
-    // in its thread-storage area.
-    SDValue HiVar = DAG.getTargetGlobalAddress(
-        GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G1);
-    SDValue LoVar = DAG.getTargetGlobalAddress(
-        GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC);
-
-    SDValue DTPOff =
-        SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar,
-                                   DAG.getTargetConstant(16, MVT::i32)),
-                0);
-    DTPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
-                                        DAG.getTargetConstant(0, MVT::i32)),
-                     0);
-
-    TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
-  } else if (Model == TLSModel::GeneralDynamic) {
-    // Accesses used in this sequence go via the TLS descriptor which lives in
-    // the GOT. Prepare an address we can use to handle this.
-    SDValue HiDesc = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_PAGE);
-    SDValue LoDesc = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0,
-        ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-
-    // First argument to the descriptor call is the address of the descriptor
-    // itself.
-    SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc);
-    DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
-    // The call needs a relocation too for linker relaxation. It doesn't make
-    // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
-    // the address.
-    SDValue SymAddr =
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS);
-
-    // Finally we can make a call to calculate the offset from tpidr_el0.
-    TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
-  } else
-    llvm_unreachable("Unsupported ELF TLS access model");
-
-  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
-}
-
-SDValue ARM64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
-                                                   SelectionDAG &DAG) const {
-  if (Subtarget->isTargetDarwin())
-    return LowerDarwinGlobalTLSAddress(Op, DAG);
-  else if (Subtarget->isTargetELF())
-    return LowerELFGlobalTLSAddress(Op, DAG);
-
-  llvm_unreachable("Unexpected platform trying to use TLS");
-}
-SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Chain = Op.getOperand(0);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
-  SDValue LHS = Op.getOperand(2);
-  SDValue RHS = Op.getOperand(3);
-  SDValue Dest = Op.getOperand(4);
-  SDLoc dl(Op);
-
-  // Handle f128 first, since lowering it will result in comparing the return
-  // value of a libcall against zero, which is just what the rest of LowerBR_CC
-  // is expecting to deal with.
-  if (LHS.getValueType() == MVT::f128) {
-    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
-
-    // If softenSetCCOperands returned a scalar, we need to compare the result
-    // against zero to select between true and false values.
-    if (RHS.getNode() == 0) {
-      RHS = DAG.getConstant(0, LHS.getValueType());
-      CC = ISD::SETNE;
-    }
-  }
-
-  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
-  // instruction.
-  unsigned Opc = LHS.getOpcode();
-  if (LHS.getResNo() == 1 && isa<ConstantSDNode>(RHS) &&
-      cast<ConstantSDNode>(RHS)->isOne() &&
-      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
-    assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
-           "Unexpected condition code.");
-    // Only lower legal XALUO ops.
-    if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
-      return SDValue();
-
-    // The actual operation with overflow check.
-    ARM64CC::CondCode OFCC;
-    SDValue Value, Overflow;
-    std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, LHS.getValue(0), DAG);
-
-    if (CC == ISD::SETNE)
-      OFCC = getInvertedCondCode(OFCC);
-    SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
-    return DAG.getNode(ARM64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest,
-                       CCVal, Overflow);
-  }
-
-  if (LHS.getValueType().isInteger()) {
-    assert((LHS.getValueType() == RHS.getValueType()) &&
-           (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
-
-    // If the RHS of the comparison is zero, we can potentially fold this
-    // to a specialized branch.
-    const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
-    if (RHSC && RHSC->getZExtValue() == 0) {
-      if (CC == ISD::SETEQ) {
-        // See if we can use a TBZ to fold in an AND as well.
-        // TBZ has a smaller branch displacement than CBZ.  If the offset is
-        // out of bounds, a late MI-layer pass rewrites branches.
-        // 403.gcc is an example that hits this case.
-        if (LHS.getOpcode() == ISD::AND &&
-            isa<ConstantSDNode>(LHS.getOperand(1)) &&
-            isPowerOf2_64(LHS.getConstantOperandVal(1))) {
-          SDValue Test = LHS.getOperand(0);
-          uint64_t Mask = LHS.getConstantOperandVal(1);
-
-          // TBZ only operates on i64's, but the ext should be free.
-          if (Test.getValueType() == MVT::i32)
-            Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
-
-          return DAG.getNode(ARM64ISD::TBZ, dl, MVT::Other, Chain, Test,
-                             DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
-        }
-
-        return DAG.getNode(ARM64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
-      } else if (CC == ISD::SETNE) {
-        // See if we can use a TBZ to fold in an AND as well.
-        // TBZ has a smaller branch displacement than CBZ.  If the offset is
-        // out of bounds, a late MI-layer pass rewrites branches.
-        // 403.gcc is an example that hits this case.
-        if (LHS.getOpcode() == ISD::AND &&
-            isa<ConstantSDNode>(LHS.getOperand(1)) &&
-            isPowerOf2_64(LHS.getConstantOperandVal(1))) {
-          SDValue Test = LHS.getOperand(0);
-          uint64_t Mask = LHS.getConstantOperandVal(1);
-
-          // TBNZ only operates on i64's, but the ext should be free.
-          if (Test.getValueType() == MVT::i32)
-            Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
-
-          return DAG.getNode(ARM64ISD::TBNZ, dl, MVT::Other, Chain, Test,
-                             DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
-        }
-
-        return DAG.getNode(ARM64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
-      }
-    }
-
-    SDValue CCVal;
-    SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
-    return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
-                       Cmp);
-  }
-
-  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
-
-  // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
-  // clean.  Some of them require two branches to implement.
-  SDValue Cmp = emitComparison(LHS, RHS, dl, DAG);
-  ARM64CC::CondCode CC1, CC2;
-  changeFPCCToARM64CC(CC, CC1, CC2);
-  SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
-  SDValue BR1 =
-      DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
-  if (CC2 != ARM64CC::AL) {
-    SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
-    return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
-                       Cmp);
-  }
-
-  return BR1;
-}
-
-SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
-  SDLoc DL(Op);
-
-  SDValue In1 = Op.getOperand(0);
-  SDValue In2 = Op.getOperand(1);
-  EVT SrcVT = In2.getValueType();
-  if (SrcVT != VT) {
-    if (SrcVT == MVT::f32 && VT == MVT::f64)
-      In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
-    else if (SrcVT == MVT::f64 && VT == MVT::f32)
-      In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0));
-    else
-      // FIXME: Src type is different, bail out for now. Can VT really be a
-      // vector type?
-      return SDValue();
-  }
-
-  EVT VecVT;
-  EVT EltVT;
-  SDValue EltMask, VecVal1, VecVal2;
-  if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
-    EltVT = MVT::i32;
-    VecVT = MVT::v4i32;
-    EltMask = DAG.getConstant(0x80000000ULL, EltVT);
-
-    if (!VT.isVector()) {
-      VecVal1 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT,
-                                          DAG.getUNDEF(VecVT), In1);
-      VecVal2 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT,
-                                          DAG.getUNDEF(VecVT), In2);
-    } else {
-      VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
-      VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
-    }
-  } else if (VT == MVT::f64 || VT == MVT::v2f64) {
-    EltVT = MVT::i64;
-    VecVT = MVT::v2i64;
-
-    // We want to materialize a mask with the the high bit set, but the AdvSIMD
-    // immediate moves cannot materialize that in a single instruction for
-    // 64-bit elements. Instead, materialize zero and then negate it.
-    EltMask = DAG.getConstant(0, EltVT);
-
-    if (!VT.isVector()) {
-      VecVal1 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT,
-                                          DAG.getUNDEF(VecVT), In1);
-      VecVal2 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT,
-                                          DAG.getUNDEF(VecVT), In2);
-    } else {
-      VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
-      VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
-    }
-  } else {
-    llvm_unreachable("Invalid type for copysign!");
-  }
-
-  std::vector<SDValue> BuildVectorOps;
-  for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i)
-    BuildVectorOps.push_back(EltMask);
-
-  SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT,
-                                 &BuildVectorOps[0], BuildVectorOps.size());
-
-  // If we couldn't materialize the mask above, then the mask vector will be
-  // the zero vector, and we need to negate it here.
-  if (VT == MVT::f64 || VT == MVT::v2f64) {
-    BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
-    BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
-    BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
-  }
-
-  SDValue Sel =
-      DAG.getNode(ARM64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
-
-  if (VT == MVT::f32)
-    return DAG.getTargetExtractSubreg(ARM64::ssub, DL, VT, Sel);
-  else if (VT == MVT::f64)
-    return DAG.getTargetExtractSubreg(ARM64::dsub, DL, VT, Sel);
-  else
-    return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
-}
-
-SDValue ARM64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
-  if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
-          AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
-    return SDValue();
-
-  // While there is no integer popcount instruction, it can
-  // be more efficiently lowered to the following sequence that uses
-  // AdvSIMD registers/instructions as long as the copies to/from
-  // the AdvSIMD registers are cheap.
-  //  FMOV    D0, X0        // copy 64-bit int to vector, high bits zero'd
-  //  CNT     V0.8B, V0.8B  // 8xbyte pop-counts
-  //  ADDV    B0, V0.8B     // sum 8xbyte pop-counts
-  //  UMOV    X0, V0.B[0]   // copy byte result back to integer reg
-  SDValue Val = Op.getOperand(0);
-  SDLoc DL(Op);
-  EVT VT = Op.getValueType();
-  SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8);
-
-  SDValue VecVal;
-  if (VT == MVT::i32) {
-    VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
-    VecVal =
-        DAG.getTargetInsertSubreg(ARM64::ssub, DL, MVT::v8i8, ZeroVec, VecVal);
-  } else {
-    VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
-  }
-
-  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal);
-  SDValue UaddLV = DAG.getNode(
-      ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
-      DAG.getConstant(Intrinsic::arm64_neon_uaddlv, MVT::i32), CtPop);
-
-  if (VT == MVT::i64)
-    UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
-  return UaddLV;
-}
-
-SDValue ARM64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-
-  if (Op.getValueType().isVector())
-    return LowerVSETCC(Op, DAG);
-
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-  SDLoc dl(Op);
-
-  // We chose ZeroOrOneBooleanContents, so use zero and one.
-  EVT VT = Op.getValueType();
-  SDValue TVal = DAG.getConstant(1, VT);
-  SDValue FVal = DAG.getConstant(0, VT);
-
-  // Handle f128 first, since one possible outcome is a normal integer
-  // comparison which gets picked up by the next if statement.
-  if (LHS.getValueType() == MVT::f128) {
-    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
-
-    // If softenSetCCOperands returned a scalar, use it.
-    if (RHS.getNode() == 0) {
-      assert(LHS.getValueType() == Op.getValueType() &&
-             "Unexpected setcc expansion!");
-      return LHS;
-    }
-  }
-
-  if (LHS.getValueType().isInteger()) {
-    SDValue CCVal;
-    SDValue Cmp =
-        getARM64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
-
-    // Note that we inverted the condition above, so we reverse the order of
-    // the true and false operands here.  This will allow the setcc to be
-    // matched to a single CSINC instruction.
-    return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
-  }
-
-  // Now we know we're dealing with FP values.
-  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
-
-  // If that fails, we'll need to perform an FCMP + CSEL sequence.  Go ahead
-  // and do the comparison.
-  SDValue Cmp = emitComparison(LHS, RHS, dl, DAG);
-
-  ARM64CC::CondCode CC1, CC2;
-  changeFPCCToARM64CC(CC, CC1, CC2);
-  if (CC2 == ARM64CC::AL) {
-    changeFPCCToARM64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
-    SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
-
-    // Note that we inverted the condition above, so we reverse the order of
-    // the true and false operands here.  This will allow the setcc to be
-    // matched to a single CSINC instruction.
-    return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
-  } else {
-    // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
-    // clean.  Some of them require two CSELs to implement.  As is in this case,
-    // we emit the first CSEL and then emit a second using the output of the
-    // first as the RHS.  We're effectively OR'ing the two CC's together.
-
-    // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
-    SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
-    SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
-
-    SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
-    return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
-  }
-}
-
-/// A SELECT_CC operation is really some kind of max or min if both values being
-/// compared are, in some sense, equal to the results in either case. However,
-/// it is permissible to compare f32 values and produce directly extended f64
-/// values.
-///
-/// Extending the comparison operands would also be allowed, but is less likely
-/// to happen in practice since their use is right here. Note that truncate
-/// operations would *not* be semantically equivalent.
-static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
-  if (Cmp == Result)
-    return true;
-
-  ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
-  ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
-  if (CCmp && CResult && Cmp.getValueType() == MVT::f32 &&
-      Result.getValueType() == MVT::f64) {
-    bool Lossy;
-    APFloat CmpVal = CCmp->getValueAPF();
-    CmpVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Lossy);
-    return CResult->getValueAPF().bitwiseIsEqual(CmpVal);
-  }
-
-  return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
-}
-
-SDValue ARM64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
-  SDValue CC = Op->getOperand(0);
-  SDValue TVal = Op->getOperand(1);
-  SDValue FVal = Op->getOperand(2);
-  SDLoc DL(Op);
-
-  unsigned Opc = CC.getOpcode();
-  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
-  // instruction.
-  if (CC.getResNo() == 1 &&
-      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
-    // Only lower legal XALUO ops.
-    if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
-      return SDValue();
-
-    ARM64CC::CondCode OFCC;
-    SDValue Value, Overflow;
-    std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, CC.getValue(0), DAG);
-    SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
-    return DAG.getNode(ARM64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, CCVal,
-                       Overflow);
-  }
-
-  if (CC.getOpcode() == ISD::SETCC)
-    return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal,
-                           cast<CondCodeSDNode>(CC.getOperand(2))->get());
-  else
-    return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal,
-                           FVal, ISD::SETNE);
-}
-
-SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  SDValue TVal = Op.getOperand(2);
-  SDValue FVal = Op.getOperand(3);
-  SDLoc dl(Op);
-
-  // Handle f128 first, because it will result in a comparison of some RTLIB
-  // call result against zero.
-  if (LHS.getValueType() == MVT::f128) {
-    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
-
-    // If softenSetCCOperands returned a scalar, we need to compare the result
-    // against zero to select between true and false values.
-    if (RHS.getNode() == 0) {
-      RHS = DAG.getConstant(0, LHS.getValueType());
-      CC = ISD::SETNE;
-    }
-  }
-
-  // Handle integers first.
-  if (LHS.getValueType().isInteger()) {
-    assert((LHS.getValueType() == RHS.getValueType()) &&
-           (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
-
-    unsigned Opcode = ARM64ISD::CSEL;
-
-    // If both the TVal and the FVal are constants, see if we can swap them in
-    // order to for a CSINV or CSINC out of them.
-    ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
-    ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
-
-    if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
-      std::swap(TVal, FVal);
-      std::swap(CTVal, CFVal);
-      CC = ISD::getSetCCInverse(CC, true);
-    } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
-      std::swap(TVal, FVal);
-      std::swap(CTVal, CFVal);
-      CC = ISD::getSetCCInverse(CC, true);
-    } else if (TVal.getOpcode() == ISD::XOR) {
-      // If TVal is a NOT we want to swap TVal and FVal so that we can match
-      // with a CSINV rather than a CSEL.
-      ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(1));
-
-      if (CVal && CVal->isAllOnesValue()) {
-        std::swap(TVal, FVal);
-        std::swap(CTVal, CFVal);
-        CC = ISD::getSetCCInverse(CC, true);
-      }
-    } else if (TVal.getOpcode() == ISD::SUB) {
-      // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
-      // that we can match with a CSNEG rather than a CSEL.
-      ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(0));
-
-      if (CVal && CVal->isNullValue()) {
-        std::swap(TVal, FVal);
-        std::swap(CTVal, CFVal);
-        CC = ISD::getSetCCInverse(CC, true);
-      }
-    } else if (CTVal && CFVal) {
-      const int64_t TrueVal = CTVal->getSExtValue();
-      const int64_t FalseVal = CFVal->getSExtValue();
-      bool Swap = false;
-
-      // If both TVal and FVal are constants, see if FVal is the
-      // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
-      // instead of a CSEL in that case.
-      if (TrueVal == ~FalseVal) {
-        Opcode = ARM64ISD::CSINV;
-      } else if (TrueVal == -FalseVal) {
-        Opcode = ARM64ISD::CSNEG;
-      } else if (TVal.getValueType() == MVT::i32) {
-        // If our operands are only 32-bit wide, make sure we use 32-bit
-        // arithmetic for the check whether we can use CSINC. This ensures that
-        // the addition in the check will wrap around properly in case there is
-        // an overflow (which would not be the case if we do the check with
-        // 64-bit arithmetic).
-        const uint32_t TrueVal32 = CTVal->getZExtValue();
-        const uint32_t FalseVal32 = CFVal->getZExtValue();
-
-        if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
-          Opcode = ARM64ISD::CSINC;
-
-          if (TrueVal32 > FalseVal32) {
-            Swap = true;
-          }
-        }
-        // 64-bit check whether we can use CSINC.
-      } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
-        Opcode = ARM64ISD::CSINC;
-
-        if (TrueVal > FalseVal) {
-          Swap = true;
-        }
-      }
-
-      // Swap TVal and FVal if necessary.
-      if (Swap) {
-        std::swap(TVal, FVal);
-        std::swap(CTVal, CFVal);
-        CC = ISD::getSetCCInverse(CC, true);
-      }
-
-      if (Opcode != ARM64ISD::CSEL) {
-        // Drop FVal since we can get its value by simply inverting/negating
-        // TVal.
-        FVal = TVal;
-      }
-    }
-
-    SDValue CCVal;
-    SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
-
-    EVT VT = Op.getValueType();
-    return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
-  }
-
-  // Now we know we're dealing with FP values.
-  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
-  assert(LHS.getValueType() == RHS.getValueType());
-  EVT VT = Op.getValueType();
-
-  // Try to match this select into a max/min operation, which have dedicated
-  // opcode in the instruction set.
-  // NOTE: This is not correct in the presence of NaNs, so we only enable this
-  // in no-NaNs mode.
-  if (getTargetMachine().Options.NoNaNsFPMath) {
-    if (selectCCOpsAreFMaxCompatible(LHS, FVal) &&
-        selectCCOpsAreFMaxCompatible(RHS, TVal)) {
-      CC = ISD::getSetCCSwappedOperands(CC);
-      std::swap(TVal, FVal);
-    }
-
-    if (selectCCOpsAreFMaxCompatible(LHS, TVal) &&
-        selectCCOpsAreFMaxCompatible(RHS, FVal)) {
-      switch (CC) {
-      default:
-        break;
-      case ISD::SETGT:
-      case ISD::SETGE:
-      case ISD::SETUGT:
-      case ISD::SETUGE:
-      case ISD::SETOGT:
-      case ISD::SETOGE:
-        return DAG.getNode(ARM64ISD::FMAX, dl, VT, TVal, FVal);
-        break;
-      case ISD::SETLT:
-      case ISD::SETLE:
-      case ISD::SETULT:
-      case ISD::SETULE:
-      case ISD::SETOLT:
-      case ISD::SETOLE:
-        return DAG.getNode(ARM64ISD::FMIN, dl, VT, TVal, FVal);
-        break;
-      }
-    }
-  }
-
-  // If that fails, we'll need to perform an FCMP + CSEL sequence.  Go ahead
-  // and do the comparison.
-  SDValue Cmp = emitComparison(LHS, RHS, dl, DAG);
-
-  // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
-  // clean.  Some of them require two CSELs to implement.
-  ARM64CC::CondCode CC1, CC2;
-  changeFPCCToARM64CC(CC, CC1, CC2);
-  SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
-  SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
-
-  // If we need a second CSEL, emit it, using the output of the first as the
-  // RHS.  We're effectively OR'ing the two CC's together.
-  if (CC2 != ARM64CC::AL) {
-    SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
-    return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
-  }
-
-  // Otherwise, return the output of the first CSEL.
-  return CS1;
-}
-
-SDValue ARM64TargetLowering::LowerJumpTable(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  // Jump table entries as PC relative offsets. No additional tweaking
-  // is necessary here. Just get the address of the jump table.
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  EVT PtrVT = getPointerTy();
-  SDLoc DL(Op);
-
-  SDValue Hi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_PAGE);
-  SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
-                                      ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-  SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
-  return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
-}
-
-SDValue ARM64TargetLowering::LowerConstantPool(SDValue Op,
-                                               SelectionDAG &DAG) const {
-  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  EVT PtrVT = getPointerTy();
-  SDLoc DL(Op);
-
-  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
-    // Use the GOT for the large code model on iOS.
-    if (Subtarget->isTargetMachO()) {
-      SDValue GotAddr = DAG.getTargetConstantPool(
-          CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
-          ARM64II::MO_GOT);
-      return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr);
-    }
-
-    const unsigned char MO_NC = ARM64II::MO_NC;
-    return DAG.getNode(
-        ARM64ISD::WrapperLarge, DL, PtrVT,
-        DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), ARM64II::MO_G3),
-        DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), ARM64II::MO_G2 | MO_NC),
-        DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), ARM64II::MO_G1 | MO_NC),
-        DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), ARM64II::MO_G0 | MO_NC));
-  } else {
-    // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on
-    // ELF, the only valid one on Darwin.
-    SDValue Hi =
-        DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), ARM64II::MO_PAGE);
-    SDValue Lo = DAG.getTargetConstantPool(
-        CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
-        ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-
-    SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
-    return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
-  }
-}
-
-SDValue ARM64TargetLowering::LowerBlockAddress(SDValue Op,
-                                               SelectionDAG &DAG) const {
-  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  EVT PtrVT = getPointerTy();
-  SDLoc DL(Op);
-  if (getTargetMachine().getCodeModel() == CodeModel::Large &&
-      !Subtarget->isTargetMachO()) {
-    const unsigned char MO_NC = ARM64II::MO_NC;
-    return DAG.getNode(
-        ARM64ISD::WrapperLarge, DL, PtrVT,
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G3),
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G2 | MO_NC),
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G1 | MO_NC),
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G0 | MO_NC));
-  } else {
-    SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGE);
-    SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGEOFF |
-                                                             ARM64II::MO_NC);
-    SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
-    return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
-  }
-}
-
-SDValue ARM64TargetLowering::LowerDarwin_VASTART(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  ARM64FunctionInfo *FuncInfo =
-      DAG.getMachineFunction().getInfo<ARM64FunctionInfo>();
-
-  SDLoc DL(Op);
-  SDValue FR =
-      DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
-  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
-                      MachinePointerInfo(SV), false, false, 0);
-}
-
-SDValue ARM64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
-                                                SelectionDAG &DAG) const {
-  // The layout of the va_list struct is specified in the AArch64 Procedure Call
-  // Standard, section B.3.
-  MachineFunction &MF = DAG.getMachineFunction();
-  ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>();
-  SDLoc DL(Op);
-
-  SDValue Chain = Op.getOperand(0);
-  SDValue VAList = Op.getOperand(1);
-  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  SmallVector<SDValue, 4> MemOps;
-
-  // void *__stack at offset 0
-  SDValue Stack =
-      DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
-  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
-                                MachinePointerInfo(SV), false, false, 8));
-
-  // void *__gr_top at offset 8
-  int GPRSize = FuncInfo->getVarArgsGPRSize();
-  if (GPRSize > 0) {
-    SDValue GRTop, GRTopAddr;
-
-    GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                            DAG.getConstant(8, getPointerTy()));
-
-    GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
-    GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
-                        DAG.getConstant(GPRSize, getPointerTy()));
-
-    MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
-                                  MachinePointerInfo(SV, 8), false, false, 8));
-  }
-
-  // void *__vr_top at offset 16
-  int FPRSize = FuncInfo->getVarArgsFPRSize();
-  if (FPRSize > 0) {
-    SDValue VRTop, VRTopAddr;
-    VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                            DAG.getConstant(16, getPointerTy()));
-
-    VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
-    VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
-                        DAG.getConstant(FPRSize, getPointerTy()));
-
-    MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
-                                  MachinePointerInfo(SV, 16), false, false, 8));
-  }
-
-  // int __gr_offs at offset 24
-  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                                   DAG.getConstant(24, getPointerTy()));
-  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
-                                GROffsAddr, MachinePointerInfo(SV, 24), false,
-                                false, 4));
-
-  // int __vr_offs at offset 28
-  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                                   DAG.getConstant(28, getPointerTy()));
-  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
-                                VROffsAddr, MachinePointerInfo(SV, 28), false,
-                                false, 4));
-
-  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
-                     MemOps.size());
-}
-
-SDValue ARM64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
-  return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG)
-                                     : LowerAAPCS_VASTART(Op, DAG);
-}
-
-SDValue ARM64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
-  // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
-  // pointer.
-  unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
-  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
-  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
-
-  return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1),
-                       Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32),
-                       8, false, false, MachinePointerInfo(DestSV),
-                       MachinePointerInfo(SrcSV));
-}
-
-SDValue ARM64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
-  assert(Subtarget->isTargetDarwin() &&
-         "automatic va_arg instruction only works on Darwin");
-
-  const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  EVT VT = Op.getValueType();
-  SDLoc DL(Op);
-  SDValue Chain = Op.getOperand(0);
-  SDValue Addr = Op.getOperand(1);
-  unsigned Align = Op.getConstantOperandVal(3);
-
-  SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr,
-                               MachinePointerInfo(V), false, false, false, 0);
-  Chain = VAList.getValue(1);
-
-  if (Align > 8) {
-    assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
-    VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                         DAG.getConstant(Align - 1, getPointerTy()));
-    VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
-                         DAG.getConstant(-(int64_t)Align, getPointerTy()));
-  }
-
-  Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
-  uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
-
-  // Scalar integer and FP values smaller than 64 bits are implicitly extended
-  // up to 64 bits.  At the very least, we have to increase the striding of the
-  // vaargs list to match this, and for FP values we need to introduce
-  // FP_ROUND nodes as well.
-  if (VT.isInteger() && !VT.isVector())
-    ArgSize = 8;
-  bool NeedFPTrunc = false;
-  if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
-    ArgSize = 8;
-    NeedFPTrunc = true;
-  }
-
-  // Increment the pointer, VAList, to the next vaarg
-  SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                               DAG.getConstant(ArgSize, getPointerTy()));
-  // Store the incremented VAList to the legalized pointer
-  SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
-                                 false, false, 0);
-
-  // Load the actual argument out of the pointer VAList
-  if (NeedFPTrunc) {
-    // Load the value as an f64.
-    SDValue WideFP = DAG.getLoad(MVT::f64, DL, APStore, VAList,
-                                 MachinePointerInfo(), false, false, false, 0);
-    // Round the value down to an f32.
-    SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
-                                   DAG.getIntPtrConstant(1));
-    SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
-    // Merge the rounded value with the chain output of the load.
-    return DAG.getMergeValues(Ops, 2, DL);
-  }
-
-  return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo(), false,
-                     false, false, 0);
-}
-
-SDValue ARM64TargetLowering::LowerFRAMEADDR(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setFrameAddressIsTaken(true);
-
-  EVT VT = Op.getValueType();
-  SDLoc DL(Op);
-  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, ARM64::FP, VT);
-  while (Depth--)
-    FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
-                            MachinePointerInfo(), false, false, false, 0);
-  return FrameAddr;
-}
-
-SDValue ARM64TargetLowering::LowerRETURNADDR(SDValue Op,
-                                             SelectionDAG &DAG) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setReturnAddressIsTaken(true);
-
-  EVT VT = Op.getValueType();
-  SDLoc DL(Op);
-  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  if (Depth) {
-    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
-    SDValue Offset = DAG.getConstant(8, getPointerTy());
-    return DAG.getLoad(VT, DL, DAG.getEntryNode(),
-                       DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
-                       MachinePointerInfo(), false, false, false, 0);
-  }
-
-  // Return LR, which contains the return address. Mark it an implicit live-in.
-  unsigned Reg = MF.addLiveIn(ARM64::LR, &ARM64::GPR64RegClass);
-  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
-}
-
-/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
-/// i64 values and take a 2 x i64 value to shift plus a shift amount.
-SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op,
-                                                  SelectionDAG &DAG) const {
-  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
-  EVT VT = Op.getValueType();
-  unsigned VTBits = VT.getSizeInBits();
-  SDLoc dl(Op);
-  SDValue ShOpLo = Op.getOperand(0);
-  SDValue ShOpHi = Op.getOperand(1);
-  SDValue ShAmt = Op.getOperand(2);
-  SDValue ARMcc;
-  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
-
-  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
-
-  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
-                                 DAG.getConstant(VTBits, MVT::i64), ShAmt);
-  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
-  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
-                                   DAG.getConstant(VTBits, MVT::i64));
-  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
-
-  SDValue Cmp =
-      emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), dl, DAG);
-  SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32);
-
-  SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
-  SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
-  SDValue Lo =
-      DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
-
-  // ARM64 shifts larger than the register width are wrapped rather than
-  // clamped, so we can't just emit "hi >> x".
-  SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
-  SDValue TrueValHi = Opc == ISD::SRA
-                          ? DAG.getNode(Opc, dl, VT, ShOpHi,
-                                        DAG.getConstant(VTBits - 1, MVT::i64))
-                          : DAG.getConstant(0, VT);
-  SDValue Hi =
-      DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
-
-  SDValue Ops[2] = { Lo, Hi };
-  return DAG.getMergeValues(Ops, 2, dl);
-}
-
-/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
-/// i64 values and take a 2 x i64 value to shift plus a shift amount.
-SDValue ARM64TargetLowering::LowerShiftLeftParts(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
-  EVT VT = Op.getValueType();
-  unsigned VTBits = VT.getSizeInBits();
-  SDLoc dl(Op);
-  SDValue ShOpLo = Op.getOperand(0);
-  SDValue ShOpHi = Op.getOperand(1);
-  SDValue ShAmt = Op.getOperand(2);
-  SDValue ARMcc;
-
-  assert(Op.getOpcode() == ISD::SHL_PARTS);
-  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
-                                 DAG.getConstant(VTBits, MVT::i64), ShAmt);
-  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
-  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
-                                   DAG.getConstant(VTBits, MVT::i64));
-  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
-  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
-
-  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
-
-  SDValue Cmp =
-      emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), dl, DAG);
-  SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32);
-  SDValue Hi = DAG.getNode(ARM64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
-
-  // ARM64 shifts of larger than register sizes are wrapped rather than clamped,
-  // so we can't just emit "lo << a" if a is too big.
-  SDValue TrueValLo = DAG.getConstant(0, VT);
-  SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
-  SDValue Lo =
-      DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
-
-  SDValue Ops[2] = { Lo, Hi };
-  return DAG.getMergeValues(Ops, 2, dl);
-}
-
-bool
-ARM64TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
-  // The ARM64 target doesn't support folding offsets into global addresses.
-  return false;
-}
-
-bool ARM64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
-  // FIXME: We should be able to handle f128 as well with a clever lowering.
-  if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32))
-    return true;
-
-  if (VT == MVT::f64)
-    return ARM64_AM::getFP64Imm(Imm) != -1;
-  else if (VT == MVT::f32)
-    return ARM64_AM::getFP32Imm(Imm) != -1;
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-//                          ARM64 Optimization Hooks
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//                          ARM64 Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-// Table of Constraints
-// TODO: This is the current set of constraints supported by ARM for the
-// compiler, not all of them may make sense, e.g. S may be difficult to support.
-//
-// r - A general register
-// w - An FP/SIMD register of some size in the range v0-v31
-// x - An FP/SIMD register of some size in the range v0-v15
-// I - Constant that can be used with an ADD instruction
-// J - Constant that can be used with a SUB instruction
-// K - Constant that can be used with a 32-bit logical instruction
-// L - Constant that can be used with a 64-bit logical instruction
-// M - Constant that can be used as a 32-bit MOV immediate
-// N - Constant that can be used as a 64-bit MOV immediate
-// Q - A memory reference with base register and no offset
-// S - A symbolic address
-// Y - Floating point constant zero
-// Z - Integer constant zero
-//
-//   Note that general register operands will be output using their 64-bit x
-// register name, whatever the size of the variable, unless the asm operand
-// is prefixed by the %w modifier. Floating-point and SIMD register operands
-// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
-// %q modifier.
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-ARM64TargetLowering::ConstraintType
-ARM64TargetLowering::getConstraintType(const std::string &Constraint) const {
-  if (Constraint.size() == 1) {
-    switch (Constraint[0]) {
-    default:
-      break;
-    case 'z':
-      return C_Other;
-    case 'x':
-    case 'w':
-      return C_RegisterClass;
-    // An address with a single base register. Due to the way we
-    // currently handle addresses it is the same as 'r'.
-    case 'Q':
-      return C_Memory;
-    }
-  }
-  return TargetLowering::getConstraintType(Constraint);
-}
-
-/// Examine constraint type and operand type and determine a weight value.
-/// This object must already have been set up with the operand type
-/// and the current alternative constraint selected.
-TargetLowering::ConstraintWeight
-ARM64TargetLowering::getSingleConstraintMatchWeight(
-    AsmOperandInfo &info, const char *constraint) const {
-  ConstraintWeight weight = CW_Invalid;
-  Value *CallOperandVal = info.CallOperandVal;
-  // If we don't have a value, we can't do a match,
-  // but allow it at the lowest weight.
-  if (CallOperandVal == NULL)
-    return CW_Default;
-  Type *type = CallOperandVal->getType();
-  // Look at the constraint type.
-  switch (*constraint) {
-  default:
-    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
-    break;
-  case 'x':
-  case 'w':
-    if (type->isFloatingPointTy() || type->isVectorTy())
-      weight = CW_Register;
-    break;
-  case 'z':
-    weight = CW_Constant;
-    break;
-  }
-  return weight;
-}
-
-std::pair<unsigned, const TargetRegisterClass *>
-ARM64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                  MVT VT) const {
-  if (Constraint.size() == 1) {
-    switch (Constraint[0]) {
-    case 'r':
-      if (VT.getSizeInBits() == 64)
-        return std::make_pair(0U, &ARM64::GPR64commonRegClass);
-      return std::make_pair(0U, &ARM64::GPR32commonRegClass);
-    case 'w':
-      if (VT == MVT::f32)
-        return std::make_pair(0U, &ARM64::FPR32RegClass);
-      if (VT.getSizeInBits() == 64)
-        return std::make_pair(0U, &ARM64::FPR64RegClass);
-      if (VT.getSizeInBits() == 128)
-        return std::make_pair(0U, &ARM64::FPR128RegClass);
-      break;
-    // The instructions that this constraint is designed for can
-    // only take 128-bit registers so just use that regclass.
-    case 'x':
-      if (VT.getSizeInBits() == 128)
-        return std::make_pair(0U, &ARM64::FPR128_loRegClass);
-      break;
-    }
-  }
-  if (StringRef("{cc}").equals_lower(Constraint))
-    return std::make_pair(unsigned(ARM64::CPSR), &ARM64::CCRRegClass);
-
-  // Use the default implementation in TargetLowering to convert the register
-  // constraint into a member of a register class.
-  std::pair<unsigned, const TargetRegisterClass *> Res;
-  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-
-  // Not found as a standard register?
-  if (Res.second == 0) {
-    unsigned Size = Constraint.size();
-    if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
-        tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
-      const std::string Reg =
-          std::string(&Constraint[2], &Constraint[Size - 1]);
-      int RegNo = atoi(Reg.c_str());
-      if (RegNo >= 0 && RegNo <= 31) {
-        // v0 - v31 are aliases of q0 - q31.
-        // By default we'll emit v0-v31 for this unless there's a modifier where
-        // we'll emit the correct register as well.
-        Res.first = ARM64::FPR128RegClass.getRegister(RegNo);
-        Res.second = &ARM64::FPR128RegClass;
-      }
-    }
-  }
-
-  return Res;
-}
-
-/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-/// vector.  If it is invalid, don't add anything to Ops.
-void ARM64TargetLowering::LowerAsmOperandForConstraint(
-    SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
-    SelectionDAG &DAG) const {
-  SDValue Result(0, 0);
-
-  // Currently only support length 1 constraints.
-  if (Constraint.length() != 1)
-    return;
-
-  char ConstraintLetter = Constraint[0];
-  switch (ConstraintLetter) {
-  default:
-    break;
-
-  // This set of constraints deal with valid constants for various instructions.
-  // Validate and return a target constant for them if we can.
-  case 'z': {
-    // 'z' maps to xzr or wzr so it needs an input of 0.
-    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
-    if (!C || C->getZExtValue() != 0)
-      return;
-
-    if (Op.getValueType() == MVT::i64)
-      Result = DAG.getRegister(ARM64::XZR, MVT::i64);
-    else
-      Result = DAG.getRegister(ARM64::WZR, MVT::i32);
-    break;
-  }
-
-  case 'I':
-  case 'J':
-  case 'K':
-  case 'L':
-  case 'M':
-  case 'N':
-    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
-    if (!C)
-      return;
-
-    // Grab the value and do some validation.
-    uint64_t CVal = C->getZExtValue();
-    switch (ConstraintLetter) {
-    // The I constraint applies only to simple ADD or SUB immediate operands:
-    // i.e. 0 to 4095 with optional shift by 12
-    // The J constraint applies only to ADD or SUB immediates that would be
-    // valid when negated, i.e. if [an add pattern] were to be output as a SUB
-    // instruction [or vice versa], in other words -1 to -4095 with optional
-    // left shift by 12.
-    case 'I':
-      if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
-        break;
-      return;
-    case 'J': {
-      uint64_t NVal = -C->getSExtValue();
-      if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal))
-        break;
-      return;
-    }
-    // The K and L constraints apply *only* to logical immediates, including
-    // what used to be the MOVI alias for ORR (though the MOVI alias has now
-    // been removed and MOV should be used). So these constraints have to
-    // distinguish between bit patterns that are valid 32-bit or 64-bit
-    // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
-    // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
-    // versa.
-    case 'K':
-      if (ARM64_AM::isLogicalImmediate(CVal, 32))
-        break;
-      return;
-    case 'L':
-      if (ARM64_AM::isLogicalImmediate(CVal, 64))
-        break;
-      return;
-    // The M and N constraints are a superset of K and L respectively, for use
-    // with the MOV (immediate) alias. As well as the logical immediates they
-    // also match 32 or 64-bit immediates that can be loaded either using a
-    // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
-    // (M) or 64-bit 0x1234000000000000 (N) etc.
-    // As a note some of this code is liberally stolen from the asm parser.
-    case 'M': {
-      if (!isUInt<32>(CVal))
-        return;
-      if (ARM64_AM::isLogicalImmediate(CVal, 32))
-        break;
-      if ((CVal & 0xFFFF) == CVal)
-        break;
-      if ((CVal & 0xFFFF0000ULL) == CVal)
-        break;
-      uint64_t NCVal = ~(uint32_t)CVal;
-      if ((NCVal & 0xFFFFULL) == NCVal)
-        break;
-      if ((NCVal & 0xFFFF0000ULL) == NCVal)
-        break;
-      return;
-    }
-    case 'N': {
-      if (ARM64_AM::isLogicalImmediate(CVal, 64))
-        break;
-      if ((CVal & 0xFFFFULL) == CVal)
-        break;
-      if ((CVal & 0xFFFF0000ULL) == CVal)
-        break;
-      if ((CVal & 0xFFFF00000000ULL) == CVal)
-        break;
-      if ((CVal & 0xFFFF000000000000ULL) == CVal)
-        break;
-      uint64_t NCVal = ~CVal;
-      if ((NCVal & 0xFFFFULL) == NCVal)
-        break;
-      if ((NCVal & 0xFFFF0000ULL) == NCVal)
-        break;
-      if ((NCVal & 0xFFFF00000000ULL) == NCVal)
-        break;
-      if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
-        break;
-      return;
-    }
-    default:
-      return;
-    }
-
-    // All assembler immediates are 64-bit integers.
-    Result = DAG.getTargetConstant(CVal, MVT::i64);
-    break;
-  }
-
-  if (Result.getNode()) {
-    Ops.push_back(Result);
-    return;
-  }
-
-  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
-}
-
-//===----------------------------------------------------------------------===//
-//                     ARM64 Advanced SIMD Support
-//===----------------------------------------------------------------------===//
-
-/// WidenVector - Given a value in the V64 register class, produce the
-/// equivalent value in the V128 register class.
-static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
-  EVT VT = V64Reg.getValueType();
-  unsigned NarrowSize = VT.getVectorNumElements();
-  MVT EltTy = VT.getVectorElementType().getSimpleVT();
-  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
-  SDLoc DL(V64Reg);
-
-  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
-                     V64Reg, DAG.getConstant(0, MVT::i32));
-}
-
-/// getExtFactor - Determine the adjustment factor for the position when
-/// generating an "extract from vector registers" instruction.
-static unsigned getExtFactor(SDValue &V) {
-  EVT EltType = V.getValueType().getVectorElementType();
-  return EltType.getSizeInBits() / 8;
-}
-
-/// NarrowVector - Given a value in the V128 register class, produce the
-/// equivalent value in the V64 register class.
-static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
-  EVT VT = V128Reg.getValueType();
-  unsigned WideSize = VT.getVectorNumElements();
-  MVT EltTy = VT.getVectorElementType().getSimpleVT();
-  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
-  SDLoc DL(V128Reg);
-
-  return DAG.getTargetExtractSubreg(ARM64::dsub, DL, NarrowTy, V128Reg);
-}
-
-// Gather data to see if the operation can be modelled as a
-// shuffle in combination with VEXTs.
-SDValue ARM64TargetLowering::ReconstructShuffle(SDValue Op,
-                                                SelectionDAG &DAG) const {
-  SDLoc dl(Op);
-  EVT VT = Op.getValueType();
-  unsigned NumElts = VT.getVectorNumElements();
-
-  SmallVector<SDValue, 2> SourceVecs;
-  SmallVector<unsigned, 2> MinElts;
-  SmallVector<unsigned, 2> MaxElts;
-
-  for (unsigned i = 0; i < NumElts; ++i) {
-    SDValue V = Op.getOperand(i);
-    if (V.getOpcode() == ISD::UNDEF)
-      continue;
-    else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
-      // A shuffle can only come from building a vector from various
-      // elements of other vectors.
-      return SDValue();
-    }
-
-    // Record this extraction against the appropriate vector if possible...
-    SDValue SourceVec = V.getOperand(0);
-    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
-    bool FoundSource = false;
-    for (unsigned j = 0; j < SourceVecs.size(); ++j) {
-      if (SourceVecs[j] == SourceVec) {
-        if (MinElts[j] > EltNo)
-          MinElts[j] = EltNo;
-        if (MaxElts[j] < EltNo)
-          MaxElts[j] = EltNo;
-        FoundSource = true;
-        break;
-      }
-    }
-
-    // Or record a new source if not...
-    if (!FoundSource) {
-      SourceVecs.push_back(SourceVec);
-      MinElts.push_back(EltNo);
-      MaxElts.push_back(EltNo);
-    }
-  }
-
-  // Currently only do something sane when at most two source vectors
-  // involved.
-  if (SourceVecs.size() > 2)
-    return SDValue();
-
-  SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
-  int VEXTOffsets[2] = { 0, 0 };
-
-  // This loop extracts the usage patterns of the source vectors
-  // and prepares appropriate SDValues for a shuffle if possible.
-  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
-    if (SourceVecs[i].getValueType() == VT) {
-      // No VEXT necessary
-      ShuffleSrcs[i] = SourceVecs[i];
-      VEXTOffsets[i] = 0;
-      continue;
-    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
-      // It probably isn't worth padding out a smaller vector just to
-      // break it down again in a shuffle.
-      return SDValue();
-    }
-
-    // Don't attempt to extract subvectors from BUILD_VECTOR sources
-    // that expand or trunc the original value.
-    // TODO: We can try to bitcast and ANY_EXTEND the result but
-    // we need to consider the cost of vector ANY_EXTEND, and the
-    // legality of all the types.
-    if (SourceVecs[i].getValueType().getVectorElementType() !=
-        VT.getVectorElementType())
-      return SDValue();
-
-    // Since only 64-bit and 128-bit vectors are legal on ARM and
-    // we've eliminated the other cases...
-    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
-           "unexpected vector sizes in ReconstructShuffle");
-
-    if (MaxElts[i] - MinElts[i] >= NumElts) {
-      // Span too large for a VEXT to cope
-      return SDValue();
-    }
-
-    if (MinElts[i] >= NumElts) {
-      // The extraction can just take the second half
-      VEXTOffsets[i] = NumElts;
-      ShuffleSrcs[i] =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
-                      DAG.getIntPtrConstant(NumElts));
-    } else if (MaxElts[i] < NumElts) {
-      // The extraction can just take the first half
-      VEXTOffsets[i] = 0;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                   SourceVecs[i], DAG.getIntPtrConstant(0));
-    } else {
-      // An actual VEXT is needed
-      VEXTOffsets[i] = MinElts[i];
-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                     SourceVecs[i], DAG.getIntPtrConstant(0));
-      SDValue VEXTSrc2 =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
-                      DAG.getIntPtrConstant(NumElts));
-      unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
-      ShuffleSrcs[i] = DAG.getNode(ARM64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
-                                   DAG.getConstant(Imm, MVT::i32));
-    }
-  }
-
-  SmallVector<int, 8> Mask;
-
-  for (unsigned i = 0; i < NumElts; ++i) {
-    SDValue Entry = Op.getOperand(i);
-    if (Entry.getOpcode() == ISD::UNDEF) {
-      Mask.push_back(-1);
-      continue;
-    }
-
-    SDValue ExtractVec = Entry.getOperand(0);
-    int ExtractElt =
-        cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
-    if (ExtractVec == SourceVecs[0]) {
-      Mask.push_back(ExtractElt - VEXTOffsets[0]);
-    } else {
-      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
-    }
-  }
-
-  // Final check before we try to produce nonsense...
-  if (isShuffleMaskLegal(Mask, VT))
-    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
-                                &Mask[0]);
-
-  return SDValue();
-}
-
-// check if an EXT instruction can handle the shuffle mask when the
-// vector sources of the shuffle are the same.
-static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
-  unsigned NumElts = VT.getVectorNumElements();
-
-  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
-  if (M[0] < 0)
-    return false;
-
-  Imm = M[0];
-
-  // If this is a VEXT shuffle, the immediate value is the index of the first
-  // element.  The other shuffle indices must be the successive elements after
-  // the first one.
-  unsigned ExpectedElt = Imm;
-  for (unsigned i = 1; i < NumElts; ++i) {
-    // Increment the expected index.  If it wraps around, just follow it
-    // back to index zero and keep going.
-    ++ExpectedElt;
-    if (ExpectedElt == NumElts)
-      ExpectedElt = 0;
-
-    if (M[i] < 0)
-      continue; // ignore UNDEF indices
-    if (ExpectedElt != static_cast<unsigned>(M[i]))
-      return false;
-  }
-
-  return true;
-}
-
-// check if an EXT instruction can handle the shuffle mask when the
-// vector sources of the shuffle are different.
-static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
-                      unsigned &Imm) {
-  unsigned NumElts = VT.getVectorNumElements();
-  ReverseEXT = false;
-
-  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
-  if (M[0] < 0)
-    return false;
-
-  Imm = M[0];
-
-  // If this is a VEXT shuffle, the immediate value is the index of the first
-  // element.  The other shuffle indices must be the successive elements after
-  // the first one.
-  unsigned ExpectedElt = Imm;
-  for (unsigned i = 1; i < NumElts; ++i) {
-    // Increment the expected index.  If it wraps around, it may still be
-    // a VEXT but the source vectors must be swapped.
-    ExpectedElt += 1;
-    if (ExpectedElt == NumElts * 2) {
-      ExpectedElt = 0;
-      ReverseEXT = true;
-    }
-
-    if (M[i] < 0)
-      continue; // ignore UNDEF indices
-    if (ExpectedElt != static_cast<unsigned>(M[i]))
-      return false;
-  }
-
-  // Adjust the index value if the source operands will be swapped.
-  if (ReverseEXT)
-    Imm -= NumElts;
-
-  return true;
-}
-
-/// isREVMask - Check if a vector shuffle corresponds to a REV
-/// instruction with the specified blocksize.  (The order of the elements
-/// within each block of the vector is reversed.)
-static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
-  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
-         "Only possible block sizes for REV are: 16, 32, 64");
-
-  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
-  if (EltSz == 64)
-    return false;
-
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned BlockElts = M[0] + 1;
-  // If the first shuffle index is UNDEF, be optimistic.
-  if (M[0] < 0)
-    BlockElts = BlockSize / EltSz;
-
-  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
-    return false;
-
-  for (unsigned i = 0; i < NumElts; ++i) {
-    if (M[i] < 0)
-      continue; // ignore UNDEF indices
-    if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
-      return false;
-  }
-
-  return true;
-}
-
-static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  unsigned Idx = WhichResult * NumElts / 2;
-  for (unsigned i = 0; i != NumElts; i += 2) {
-    if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
-        (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
-      return false;
-    Idx += 1;
-  }
-
-  return true;
-}
-
-static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i != NumElts; ++i) {
-    if (M[i] < 0)
-      continue; // ignore UNDEF indices
-    if ((unsigned)M[i] != 2 * i + WhichResult)
-      return false;
-  }
-
-  return true;
-}
-
-static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i < NumElts; i += 2) {
-    if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
-        (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
-      return false;
-  }
-  return true;
-}
-
-/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
-/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
-/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
-static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  unsigned Idx = WhichResult * NumElts / 2;
-  for (unsigned i = 0; i != NumElts; i += 2) {
-    if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
-        (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
-      return false;
-    Idx += 1;
-  }
-
-  return true;
-}
-
-/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
-/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
-/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
-static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned Half = VT.getVectorNumElements() / 2;
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned j = 0; j != 2; ++j) {
-    unsigned Idx = WhichResult;
-    for (unsigned i = 0; i != Half; ++i) {
-      int MIdx = M[i + j * Half];
-      if (MIdx >= 0 && (unsigned)MIdx != Idx)
-        return false;
-      Idx += 2;
-    }
-  }
-
-  return true;
-}
-
-/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
-/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
-/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
-static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i < NumElts; i += 2) {
-    if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
-        (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
-      return false;
-  }
-  return true;
-}
-
-/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
-/// the specified operations to build the shuffle.
-static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
-                                      SDValue RHS, SelectionDAG &DAG,
-                                      SDLoc dl) {
-  unsigned OpNum = (PFEntry >> 26) & 0x0F;
-  unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
-  unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
-
-  enum {
-    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
-    OP_VREV,
-    OP_VDUP0,
-    OP_VDUP1,
-    OP_VDUP2,
-    OP_VDUP3,
-    OP_VEXT1,
-    OP_VEXT2,
-    OP_VEXT3,
-    OP_VUZPL, // VUZP, left result
-    OP_VUZPR, // VUZP, right result
-    OP_VZIPL, // VZIP, left result
-    OP_VZIPR, // VZIP, right result
-    OP_VTRNL, // VTRN, left result
-    OP_VTRNR  // VTRN, right result
-  };
-
-  if (OpNum == OP_COPY) {
-    if (LHSID == (1 * 9 + 2) * 9 + 3)
-      return LHS;
-    assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
-    return RHS;
-  }
-
-  SDValue OpLHS, OpRHS;
-  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
-  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
-  EVT VT = OpLHS.getValueType();
-
-  switch (OpNum) {
-  default:
-    llvm_unreachable("Unknown shuffle opcode!");
-  case OP_VREV:
-    // VREV divides the vector in half and swaps within the half.
-    if (VT.getVectorElementType() == MVT::i32 ||
-        VT.getVectorElementType() == MVT::f32)
-      return DAG.getNode(ARM64ISD::REV64, dl, VT, OpLHS);
-    // vrev <4 x i16> -> REV32
-    if (VT.getVectorElementType() == MVT::i16)
-      return DAG.getNode(ARM64ISD::REV32, dl, VT, OpLHS);
-    // vrev <4 x i8> -> REV16
-    assert(VT.getVectorElementType() == MVT::i8);
-    return DAG.getNode(ARM64ISD::REV16, dl, VT, OpLHS);
-  case OP_VDUP0:
-  case OP_VDUP1:
-  case OP_VDUP2:
-  case OP_VDUP3: {
-    EVT EltTy = VT.getVectorElementType();
-    unsigned Opcode;
-    if (EltTy == MVT::i8)
-      Opcode = ARM64ISD::DUPLANE8;
-    else if (EltTy == MVT::i16)
-      Opcode = ARM64ISD::DUPLANE16;
-    else if (EltTy == MVT::i32 || EltTy == MVT::f32)
-      Opcode = ARM64ISD::DUPLANE32;
-    else if (EltTy == MVT::i64 || EltTy == MVT::f64)
-      Opcode = ARM64ISD::DUPLANE64;
-    else
-      llvm_unreachable("Invalid vector element type?");
-
-    if (VT.getSizeInBits() == 64)
-      OpLHS = WidenVector(OpLHS, DAG);
-    SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64);
-    return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
-  }
-  case OP_VEXT1:
-  case OP_VEXT2:
-  case OP_VEXT3: {
-    unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
-    return DAG.getNode(ARM64ISD::EXT, dl, VT, OpLHS, OpRHS,
-                       DAG.getConstant(Imm, MVT::i32));
-  }
-  case OP_VUZPL:
-    return DAG.getNode(ARM64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
-  case OP_VUZPR:
-    return DAG.getNode(ARM64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
-  case OP_VZIPL:
-    return DAG.getNode(ARM64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
-  case OP_VZIPR:
-    return DAG.getNode(ARM64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
-  case OP_VTRNL:
-    return DAG.getNode(ARM64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
-  case OP_VTRNR:
-    return DAG.getNode(ARM64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
-  }
-}
-
-static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
-                           SelectionDAG &DAG) {
-  // Check to see if we can use the TBL instruction.
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  SDLoc DL(Op);
-
-  EVT EltVT = Op.getValueType().getVectorElementType();
-  unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
-
-  SmallVector<SDValue, 8> TBLMask;
-  for (int Val : ShuffleMask) {
-    for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
-      unsigned Offset = Byte + Val * BytesPerElt;
-      TBLMask.push_back(DAG.getConstant(Offset, MVT::i32));
-    }
-  }
-
-  MVT IndexVT = MVT::v8i8;
-  unsigned IndexLen = 8;
-  if (Op.getValueType().getSizeInBits() == 128) {
-    IndexVT = MVT::v16i8;
-    IndexLen = 16;
-  }
-
-  SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
-  SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
-
-  SDValue Shuffle;
-  if (V2.getNode()->getOpcode() == ISD::UNDEF) {
-    if (IndexLen == 8)
-      V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
-    Shuffle = DAG.getNode(
-        ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-        DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst,
-        DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen));
-  } else {
-    if (IndexLen == 8) {
-      V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
-      Shuffle = DAG.getNode(
-          ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-          DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst,
-          DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen));
-    } else {
-      // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
-      // cannot currently represent the register constraints on the input
-      // table registers.
-      //  Shuffle = DAG.getNode(ARM64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
-      //                   DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
-      //                               &TBLMask[0], IndexLen));
-      Shuffle = DAG.getNode(
-          ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-          DAG.getConstant(Intrinsic::arm64_neon_tbl2, MVT::i32), V1Cst, V2Cst,
-          DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen));
-    }
-  }
-  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
-}
-
-static unsigned getDUPLANEOp(EVT EltType) {
-  if (EltType == MVT::i8)
-    return ARM64ISD::DUPLANE8;
-  if (EltType == MVT::i16)
-    return ARM64ISD::DUPLANE16;
-  if (EltType == MVT::i32 || EltType == MVT::f32)
-    return ARM64ISD::DUPLANE32;
-  if (EltType == MVT::i64 || EltType == MVT::f64)
-    return ARM64ISD::DUPLANE64;
-
-  llvm_unreachable("Invalid vector element type?");
-}
-
-SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  SDLoc dl(Op);
-  EVT VT = Op.getValueType();
-
-  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
-
-  // Convert shuffles that are directly supported on NEON to target-specific
-  // DAG nodes, instead of keeping them as shuffles and matching them again
-  // during code selection.  This is more efficient and avoids the possibility
-  // of inconsistencies between legalization and selection.
-  ArrayRef<int> ShuffleMask = SVN->getMask();
-
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-
-  if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0],
-                                       V1.getValueType().getSimpleVT())) {
-    int Lane = SVN->getSplatIndex();
-    // If this is undef splat, generate it via "just" vdup, if possible.
-    if (Lane == -1)
-      Lane = 0;
-
-    if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
-      return DAG.getNode(ARM64ISD::DUP, dl, V1.getValueType(),
-                         V1.getOperand(0));
-    // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
-    // constant. If so, we can just reference the lane's definition directly.
-    if (V1.getOpcode() == ISD::BUILD_VECTOR &&
-        !isa<ConstantSDNode>(V1.getOperand(Lane)))
-      return DAG.getNode(ARM64ISD::DUP, dl, VT, V1.getOperand(Lane));
-
-    // Otherwise, duplicate from the lane of the input vector.
-    unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
-
-    // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
-    // to make a vector of the same size as this SHUFFLE. We can ignore the
-    // extract entirely, and canonicalise the concat using WidenVector.
-    if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
-      Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
-      V1 = V1.getOperand(0);
-    } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
-      unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
-      Lane -= Idx * VT.getVectorNumElements() / 2;
-      V1 = WidenVector(V1.getOperand(Idx), DAG);
-    } else if (VT.getSizeInBits() == 64)
-      V1 = WidenVector(V1, DAG);
-
-    return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64));
-  }
-
-  if (isREVMask(ShuffleMask, VT, 64))
-    return DAG.getNode(ARM64ISD::REV64, dl, V1.getValueType(), V1, V2);
-  if (isREVMask(ShuffleMask, VT, 32))
-    return DAG.getNode(ARM64ISD::REV32, dl, V1.getValueType(), V1, V2);
-  if (isREVMask(ShuffleMask, VT, 16))
-    return DAG.getNode(ARM64ISD::REV16, dl, V1.getValueType(), V1, V2);
-
-  bool ReverseEXT = false;
-  unsigned Imm;
-  if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
-    if (ReverseEXT)
-      std::swap(V1, V2);
-    Imm *= getExtFactor(V1);
-    return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V2,
-                       DAG.getConstant(Imm, MVT::i32));
-  } else if (V2->getOpcode() == ISD::UNDEF &&
-             isSingletonEXTMask(ShuffleMask, VT, Imm)) {
-    Imm *= getExtFactor(V1);
-    return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V1,
-                       DAG.getConstant(Imm, MVT::i32));
-  }
-
-  unsigned WhichResult;
-  if (isZIPMask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2;
-    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
-  }
-  if (isUZPMask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2;
-    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
-  }
-  if (isTRNMask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2;
-    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
-  }
-
-  if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2;
-    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
-  }
-  if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2;
-    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
-  }
-  if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
-    unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2;
-    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
-  }
-
-  // If the shuffle is not directly supported and it has 4 elements, use
-  // the PerfectShuffle-generated table to synthesize it from other shuffles.
-  unsigned NumElts = VT.getVectorNumElements();
-  if (NumElts == 4) {
-    unsigned PFIndexes[4];
-    for (unsigned i = 0; i != 4; ++i) {
-      if (ShuffleMask[i] < 0)
-        PFIndexes[i] = 8;
-      else
-        PFIndexes[i] = ShuffleMask[i];
-    }
-
-    // Compute the index in the perfect shuffle table.
-    unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
-                            PFIndexes[2] * 9 + PFIndexes[3];
-    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
-    unsigned Cost = (PFEntry >> 30);
-
-    if (Cost <= 4)
-      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
-  }
-
-  return GenerateTBL(Op, ShuffleMask, DAG);
-}
-
-static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
-                               APInt &UndefBits) {
-  EVT VT = BVN->getValueType(0);
-  APInt SplatBits, SplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
-    unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
-
-    for (unsigned i = 0; i < NumSplats; ++i) {
-      CnstBits <<= SplatBitSize;
-      UndefBits <<= SplatBitSize;
-      CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
-      UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
-    }
-
-    return true;
-  }
-
-  return false;
-}
-
-SDValue ARM64TargetLowering::LowerVectorAND(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  BuildVectorSDNode *BVN =
-      dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
-  SDValue LHS = Op.getOperand(0);
-  SDLoc dl(Op);
-  EVT VT = Op.getValueType();
-
-  if (!BVN)
-    return Op;
-
-  APInt CnstBits(VT.getSizeInBits(), 0);
-  APInt UndefBits(VT.getSizeInBits(), 0);
-  if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
-    // We only have BIC vector immediate instruction, which is and-not.
-    CnstBits = ~CnstBits;
-
-    // We make use of a little bit of goto ickiness in order to avoid having to
-    // duplicate the immediate matching logic for the undef toggled case.
-    bool SecondTry = false;
-  AttemptModImm:
-
-    if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
-      CnstBits = CnstBits.zextOrTrunc(64);
-      uint64_t CnstVal = CnstBits.getZExtValue();
-
-      if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-    }
-
-    if (SecondTry)
-      goto FailedModImm;
-    SecondTry = true;
-    CnstBits = ~UndefBits;
-    goto AttemptModImm;
-  }
-
-// We can always fall back to a non-immediate AND.
-FailedModImm:
-  return Op;
-}
-
-// Specialized code to quickly find if PotentialBVec is a BuildVector that
-// consists of only the same constant int value, returned in reference arg
-// ConstVal
-static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
-                                     uint64_t &ConstVal) {
-  BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
-  if (!Bvec)
-    return false;
-  ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
-  if (!FirstElt)
-    return false;
-  EVT VT = Bvec->getValueType(0);
-  unsigned NumElts = VT.getVectorNumElements();
-  for (unsigned i = 1; i < NumElts; ++i)
-    if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
-      return false;
-  ConstVal = FirstElt->getZExtValue();
-  return true;
-}
-
-static unsigned getIntrinsicID(const SDNode *N) {
-  unsigned Opcode = N->getOpcode();
-  switch (Opcode) {
-  default:
-    return Intrinsic::not_intrinsic;
-  case ISD::INTRINSIC_WO_CHAIN: {
-    unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
-    if (IID < Intrinsic::num_intrinsics)
-      return IID;
-    return Intrinsic::not_intrinsic;
-  }
-  }
-}
-
-// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
-// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
-// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
-// Also, logical shift right -> sri, with the same structure.
-static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
-  EVT VT = N->getValueType(0);
-
-  if (!VT.isVector())
-    return SDValue();
-
-  SDLoc DL(N);
-
-  // Is the first op an AND?
-  const SDValue And = N->getOperand(0);
-  if (And.getOpcode() != ISD::AND)
-    return SDValue();
-
-  // Is the second op an shl or lshr?
-  SDValue Shift = N->getOperand(1);
-  // This will have been turned into: ARM64ISD::VSHL vector, #shift
-  // or ARM64ISD::VLSHR vector, #shift
-  unsigned ShiftOpc = Shift.getOpcode();
-  if ((ShiftOpc != ARM64ISD::VSHL && ShiftOpc != ARM64ISD::VLSHR))
-    return SDValue();
-  bool IsShiftRight = ShiftOpc == ARM64ISD::VLSHR;
-
-  // Is the shift amount constant?
-  ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
-  if (!C2node)
-    return SDValue();
-
-  // Is the and mask vector all constant?
-  uint64_t C1;
-  if (!isAllConstantBuildVector(And.getOperand(1), C1))
-    return SDValue();
-
-  // Is C1 == ~C2, taking into account how much one can shift elements of a
-  // particular size?
-  uint64_t C2 = C2node->getZExtValue();
-  unsigned ElemSizeInBits = VT.getVectorElementType().getSizeInBits();
-  if (C2 > ElemSizeInBits)
-    return SDValue();
-  unsigned ElemMask = (1 << ElemSizeInBits) - 1;
-  if ((C1 & ElemMask) != (~C2 & ElemMask))
-    return SDValue();
-
-  SDValue X = And.getOperand(0);
-  SDValue Y = Shift.getOperand(0);
-
-  unsigned Intrin =
-      IsShiftRight ? Intrinsic::arm64_neon_vsri : Intrinsic::arm64_neon_vsli;
-  SDValue ResultSLI =
-      DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                  DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1));
-
-  DEBUG(dbgs() << "arm64-lower: transformed: \n");
-  DEBUG(N->dump(&DAG));
-  DEBUG(dbgs() << "into: \n");
-  DEBUG(ResultSLI->dump(&DAG));
-
-  ++NumShiftInserts;
-  return ResultSLI;
-}
-
-SDValue ARM64TargetLowering::LowerVectorOR(SDValue Op,
-                                           SelectionDAG &DAG) const {
-  // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
-  if (EnableARM64SlrGeneration) {
-    SDValue Res = tryLowerToSLI(Op.getNode(), DAG);
-    if (Res.getNode())
-      return Res;
-  }
-
-  BuildVectorSDNode *BVN =
-      dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
-  SDValue LHS = Op.getOperand(1);
-  SDLoc dl(Op);
-  EVT VT = Op.getValueType();
-
-  // OR commutes, so try swapping the operands.
-  if (!BVN) {
-    LHS = Op.getOperand(0);
-    BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
-  }
-  if (!BVN)
-    return Op;
-
-  APInt CnstBits(VT.getSizeInBits(), 0);
-  APInt UndefBits(VT.getSizeInBits(), 0);
-  if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
-    // We make use of a little bit of goto ickiness in order to avoid having to
-    // duplicate the immediate matching logic for the undef toggled case.
-    bool SecondTry = false;
-  AttemptModImm:
-
-    if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
-      CnstBits = CnstBits.zextOrTrunc(64);
-      uint64_t CnstVal = CnstBits.getZExtValue();
-
-      if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-    }
-
-    if (SecondTry)
-      goto FailedModImm;
-    SecondTry = true;
-    CnstBits = UndefBits;
-    goto AttemptModImm;
-  }
-
-// We can always fall back to a non-immediate OR.
-FailedModImm:
-  return Op;
-}
-
-SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
-                                               SelectionDAG &DAG) const {
-  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
-  SDLoc dl(Op);
-  EVT VT = Op.getValueType();
-
-  APInt CnstBits(VT.getSizeInBits(), 0);
-  APInt UndefBits(VT.getSizeInBits(), 0);
-  if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
-    // We make use of a little bit of goto ickiness in order to avoid having to
-    // duplicate the immediate matching logic for the undef toggled case.
-    bool SecondTry = false;
-  AttemptModImm:
-
-    if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
-      CnstBits = CnstBits.zextOrTrunc(64);
-      uint64_t CnstVal = CnstBits.getZExtValue();
-
-      // Certain magic vector constants (used to express things like NOT
-      // and NEG) are passed through unmodified.  This allows codegen patterns
-      // for these operations to match.  Special-purpose patterns will lower
-      // these immediates to MOVIs if it proves necessary.
-      if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL))
-        return Op;
-
-      // The many faces of MOVI...
-      if (ARM64_AM::isAdvSIMDModImmType10(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType10(CnstVal);
-        if (VT.getSizeInBits() == 128) {
-          SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::v2i64,
-                                    DAG.getConstant(CnstVal, MVT::i32));
-          return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-        }
-
-        // Support the V64 version via subregister insertion.
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::f64,
-                                  DAG.getConstant(CnstVal, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(264, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(272, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType9(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType9(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
-        SDValue Mov = DAG.getNode(ARM64ISD::MOVI, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      // The few faces of FMOV...
-      if (ARM64_AM::isAdvSIMDModImmType11(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType11(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
-        SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType12(CnstVal) &&
-          VT.getSizeInBits() == 128) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType12(CnstVal);
-        SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MVT::v2f64,
-                                  DAG.getConstant(CnstVal, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      // The many faces of MVNI...
-      CnstVal = ~CnstVal;
-      if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(264, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-
-      if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) {
-        CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal);
-        MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-        SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(272, MVT::i32));
-        return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
-      }
-    }
-
-    if (SecondTry)
-      goto FailedModImm;
-    SecondTry = true;
-    CnstBits = UndefBits;
-    goto AttemptModImm;
-  }
-FailedModImm:
-
-  // Scan through the operands to find some interesting properties we can
-  // exploit:
-  //   1) If only one value is used, we can use a DUP, or
-  //   2) if only the low element is not undef, we can just insert that, or
-  //   3) if only one constant value is used (w/ some non-constant lanes),
-  //      we can splat the constant value into the whole vector then fill
-  //      in the non-constant lanes.
-  //   4) FIXME: If different constant values are used, but we can intelligently
-  //             select the values we'll be overwriting for the non-constant
-  //             lanes such that we can directly materialize the vector
-  //             some other way (MOVI, e.g.), we can be sneaky.
-  unsigned NumElts = VT.getVectorNumElements();
-  bool isOnlyLowElement = true;
-  bool usesOnlyOneValue = true;
-  bool usesOnlyOneConstantValue = true;
-  bool isConstant = true;
-  unsigned NumConstantLanes = 0;
-  SDValue Value;
-  SDValue ConstantValue;
-  for (unsigned i = 0; i < NumElts; ++i) {
-    SDValue V = Op.getOperand(i);
-    if (V.getOpcode() == ISD::UNDEF)
-      continue;
-    if (i > 0)
-      isOnlyLowElement = false;
-    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
-      isConstant = false;
-
-    if (isa<ConstantSDNode>(V)) {
-      ++NumConstantLanes;
-      if (!ConstantValue.getNode())
-        ConstantValue = V;
-      else if (ConstantValue != V)
-        usesOnlyOneConstantValue = false;
-    }
-
-    if (!Value.getNode())
-      Value = V;
-    else if (V != Value)
-      usesOnlyOneValue = false;
-  }
-
-  if (!Value.getNode())
-    return DAG.getUNDEF(VT);
-
-  if (isOnlyLowElement)
-    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
-
-  // Use DUP for non-constant splats.  For f32 constant splats, reduce to
-  // i32 and try again.
-  if (usesOnlyOneValue) {
-    if (!isConstant) {
-      if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-          Value.getValueType() != VT)
-        return DAG.getNode(ARM64ISD::DUP, dl, VT, Value);
-
-      // This is actually a DUPLANExx operation, which keeps everything vectory.
-
-      // DUPLANE works on 128-bit vectors, widen it if necessary.
-      SDValue Lane = Value.getOperand(1);
-      Value = Value.getOperand(0);
-      if (Value.getValueType().getSizeInBits() == 64)
-        Value = WidenVector(Value, DAG);
-
-      unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
-      return DAG.getNode(Opcode, dl, VT, Value, Lane);
-    }
-
-    if (VT.getVectorElementType().isFloatingPoint()) {
-      SmallVector<SDValue, 8> Ops;
-      MVT NewType =
-          (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
-      for (unsigned i = 0; i < NumElts; ++i)
-        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
-      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
-      SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
-      Val = LowerBUILD_VECTOR(Val, DAG);
-      if (Val.getNode())
-        return DAG.getNode(ISD::BITCAST, dl, VT, Val);
-    }
-  }
-
-  // If there was only one constant value used and for more than one lane,
-  // start by splatting that value, then replace the non-constant lanes. This
-  // is better than the default, which will perform a separate initialization
-  // for each lane.
-  if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
-    SDValue Val = DAG.getNode(ARM64ISD::DUP, dl, VT, ConstantValue);
-    // Now insert the non-constant lanes.
-    for (unsigned i = 0; i < NumElts; ++i) {
-      SDValue V = Op.getOperand(i);
-      SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
-      if (!isa<ConstantSDNode>(V)) {
-        // Note that type legalization likely mucked about with the VT of the
-        // source operand, so we may have to convert it here before inserting.
-        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
-      }
-    }
-    return Val;
-  }
-
-  // If all elements are constants and the case above didn't get hit, fall back
-  // to the default expansion, which will generate a load from the constant
-  // pool.
-  if (isConstant)
-    return SDValue();
-
-  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
-  if (NumElts >= 4) {
-    SDValue shuffle = ReconstructShuffle(Op, DAG);
-    if (shuffle != SDValue())
-      return shuffle;
-  }
-
-  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
-  // know the default expansion would otherwise fall back on something even
-  // worse. For a vector with one or two non-undef values, that's
-  // scalar_to_vector for the elements followed by a shuffle (provided the
-  // shuffle is valid for the target) and materialization element by element
-  // on the stack followed by a load for everything else.
-  if (!isConstant && !usesOnlyOneValue) {
-    SDValue Vec = DAG.getUNDEF(VT);
-    SDValue Op0 = Op.getOperand(0);
-    unsigned ElemSize = VT.getVectorElementType().getSizeInBits();
-    unsigned i = 0;
-    // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to
-    // a) Avoid a RMW dependency on the full vector register, and
-    // b) Allow the register coalescer to fold away the copy if the
-    //    value is already in an S or D register.
-    if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) {
-      unsigned SubIdx = ElemSize == 32 ? ARM64::ssub : ARM64::dsub;
-      MachineSDNode *N =
-          DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
-                             DAG.getTargetConstant(SubIdx, MVT::i32));
-      Vec = SDValue(N, 0);
-      ++i;
-    }
-    for (; i < NumElts; ++i) {
-      SDValue V = Op.getOperand(i);
-      if (V.getOpcode() == ISD::UNDEF)
-        continue;
-      SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
-      Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
-    }
-    return Vec;
-  }
-
-  // Just use the default expansion. We failed to find a better alternative.
-  return SDValue();
-}
-
-SDValue ARM64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
-                                                    SelectionDAG &DAG) const {
-  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
-
-  // Check for non-constant lane.
-  if (!isa<ConstantSDNode>(Op.getOperand(2)))
-    return SDValue();
-
-  EVT VT = Op.getOperand(0).getValueType();
-
-  // Insertion/extraction are legal for V128 types.
-  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
-      VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
-    return Op;
-
-  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
-      VT != MVT::v1i64 && VT != MVT::v2f32)
-    return SDValue();
-
-  // For V64 types, we perform insertion by expanding the value
-  // to a V128 type and perform the insertion on that.
-  SDLoc DL(Op);
-  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
-  EVT WideTy = WideVec.getValueType();
-
-  SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
-                             Op.getOperand(1), Op.getOperand(2));
-  // Re-narrow the resultant vector.
-  return NarrowVector(Node, DAG);
-}
-
-SDValue ARM64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
-                                                     SelectionDAG &DAG) const {
-  assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
-
-  // Check for non-constant lane.
-  if (!isa<ConstantSDNode>(Op.getOperand(1)))
-    return SDValue();
-
-  EVT VT = Op.getOperand(0).getValueType();
-
-  // Insertion/extraction are legal for V128 types.
-  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
-      VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
-    return Op;
-
-  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
-      VT != MVT::v1i64 && VT != MVT::v2f32)
-    return SDValue();
-
-  // For V64 types, we perform extraction by expanding the value
-  // to a V128 type and perform the extraction on that.
-  SDLoc DL(Op);
-  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
-  EVT WideTy = WideVec.getValueType();
-
-  EVT ExtrTy = WideTy.getVectorElementType();
-  if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
-    ExtrTy = MVT::i32;
-
-  // For extractions, we just return the result directly.
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
-                     Op.getOperand(1));
-}
-
-SDValue ARM64TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
-                                                   SelectionDAG &DAG) const {
-  assert(Op.getOpcode() == ISD::SCALAR_TO_VECTOR && "Unknown opcode!");
-  // Some AdvSIMD intrinsics leave their results in the scalar B/H/S/D
-  // registers. The default lowering will copy those to a GPR then back
-  // to a vector register. Instead, just recognize those cases and reference
-  // the vector register they're already a subreg of.
-  SDValue Op0 = Op->getOperand(0);
-  if (Op0->getOpcode() != ISD::INTRINSIC_WO_CHAIN)
-    return Op;
-  unsigned IID = getIntrinsicID(Op0.getNode());
-  // The below list of intrinsics isn't exhaustive. Add cases as-needed.
-  // FIXME: Even better would be if there were an attribute on the node
-  // that we could query and set in the intrinsics definition or something.
-  unsigned SubIdx;
-  switch (IID) {
-  default:
-    // Early exit if this isn't one of the intrinsics we handle.
-    return Op;
-  case Intrinsic::arm64_neon_uaddv:
-  case Intrinsic::arm64_neon_saddv:
-  case Intrinsic::arm64_neon_uaddlv:
-  case Intrinsic::arm64_neon_saddlv:
-    switch (Op0.getValueType().getSizeInBits()) {
-    default:
-      llvm_unreachable("Illegal result size from ARM64 vector intrinsic!");
-    case 8:
-      SubIdx = ARM64::bsub;
-      break;
-    case 16:
-      SubIdx = ARM64::hsub;
-      break;
-    case 32:
-      SubIdx = ARM64::ssub;
-      break;
-    case 64:
-      SubIdx = ARM64::dsub;
-      break;
-    }
-  }
-  MachineSDNode *N =
-      DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(Op),
-                         Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
-                         Op0, DAG.getTargetConstant(SubIdx, MVT::i32));
-  return SDValue(N, 0);
-}
-
-SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
-                                                    SelectionDAG &DAG) const {
-  EVT VT = Op.getOperand(0).getValueType();
-  SDLoc dl(Op);
-  // Just in case...
-  if (!VT.isVector())
-    return SDValue();
-
-  ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
-  if (!Cst)
-    return SDValue();
-  unsigned Val = Cst->getZExtValue();
-
-  unsigned Size = Op.getValueType().getSizeInBits();
-  if (Val == 0) {
-    switch (Size) {
-    case 8:
-      return DAG.getTargetExtractSubreg(ARM64::bsub, dl, Op.getValueType(),
-                                        Op.getOperand(0));
-    case 16:
-      return DAG.getTargetExtractSubreg(ARM64::hsub, dl, Op.getValueType(),
-                                        Op.getOperand(0));
-    case 32:
-      return DAG.getTargetExtractSubreg(ARM64::ssub, dl, Op.getValueType(),
-                                        Op.getOperand(0));
-    case 64:
-      return DAG.getTargetExtractSubreg(ARM64::dsub, dl, Op.getValueType(),
-                                        Op.getOperand(0));
-    default:
-      llvm_unreachable("Unexpected vector type in extract_subvector!");
-    }
-  }
-  // If this is extracting the upper 64-bits of a 128-bit vector, we match
-  // that directly.
-  if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64)
-    return Op;
-
-  return SDValue();
-}
-
-bool ARM64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
-                                             EVT VT) const {
-  if (VT.getVectorNumElements() == 4 &&
-      (VT.is128BitVector() || VT.is64BitVector())) {
-    unsigned PFIndexes[4];
-    for (unsigned i = 0; i != 4; ++i) {
-      if (M[i] < 0)
-        PFIndexes[i] = 8;
-      else
-        PFIndexes[i] = M[i];
-    }
-
-    // Compute the index in the perfect shuffle table.
-    unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
-                            PFIndexes[2] * 9 + PFIndexes[3];
-    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
-    unsigned Cost = (PFEntry >> 30);
-
-    if (Cost <= 4)
-      return true;
-  }
-
-  bool ReverseVEXT;
-  unsigned Imm, WhichResult;
-
-  return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
-          isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
-          isEXTMask(M, VT, ReverseVEXT, Imm) ||
-          // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
-          isTRNMask(M, VT, WhichResult) || isUZPMask(M, VT, WhichResult) ||
-          isZIPMask(M, VT, WhichResult) ||
-          isTRN_v_undef_Mask(M, VT, WhichResult) ||
-          isUZP_v_undef_Mask(M, VT, WhichResult) ||
-          isZIP_v_undef_Mask(M, VT, WhichResult));
-}
-
-/// getVShiftImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift operation, where all the elements of the
-/// build_vector must have the same constant integer value.
-static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
-  // Ignore bit_converts.
-  while (Op.getOpcode() == ISD::BITCAST)
-    Op = Op.getOperand(0);
-  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
-  APInt SplatBits, SplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
-                                    HasAnyUndefs, ElementBits) ||
-      SplatBitSize > ElementBits)
-    return false;
-  Cnt = SplatBits.getSExtValue();
-  return true;
-}
-
-/// isVShiftLImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift left operation.  That value must be in the range:
-///   0 <= Value < ElementBits for a left shift; or
-///   0 <= Value <= ElementBits for a long left shift.
-static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
-  assert(VT.isVector() && "vector shift count is not a vector type");
-  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
-  if (!getVShiftImm(Op, ElementBits, Cnt))
-    return false;
-  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
-}
-
-/// isVShiftRImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift right operation.  For a shift opcode, the value
-/// is positive, but for an intrinsic the value count must be negative. The
-/// absolute value must be in the range:
-///   1 <= |Value| <= ElementBits for a right shift; or
-///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
-static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
-                         int64_t &Cnt) {
-  assert(VT.isVector() && "vector shift count is not a vector type");
-  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
-  if (!getVShiftImm(Op, ElementBits, Cnt))
-    return false;
-  if (isIntrinsic)
-    Cnt = -Cnt;
-  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
-}
-
-SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
-                                                    SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
-  SDLoc DL(Op);
-  int64_t Cnt;
-
-  if (!Op.getOperand(1).getValueType().isVector())
-    return Op;
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
-
-  switch (Op.getOpcode()) {
-  default:
-    llvm_unreachable("unexpected shift opcode");
-
-  case ISD::SHL:
-    if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
-      return DAG.getNode(ARM64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0),
-                         DAG.getConstant(Cnt, MVT::i32));
-    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                       DAG.getConstant(Intrinsic::arm64_neon_ushl, MVT::i32),
-                       Op.getOperand(0), Op.getOperand(1));
-  case ISD::SRA:
-  case ISD::SRL:
-    // Right shift immediate
-    if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) &&
-        Cnt < EltSize) {
-      unsigned Opc =
-          (Op.getOpcode() == ISD::SRA) ? ARM64ISD::VASHR : ARM64ISD::VLSHR;
-      return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0),
-                         DAG.getConstant(Cnt, MVT::i32));
-    }
-
-    // Right shift register.  Note, there is not a shift right register
-    // instruction, but the shift left register instruction takes a signed
-    // value, where negative numbers specify a right shift.
-    unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::arm64_neon_sshl
-                                                : Intrinsic::arm64_neon_ushl;
-    // negate the shift amount
-    SDValue NegShift = DAG.getNode(ARM64ISD::NEG, DL, VT, Op.getOperand(1));
-    SDValue NegShiftLeft =
-        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                    DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift);
-    return NegShiftLeft;
-  }
-
-  return SDValue();
-}
-
-static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
-                                    ARM64CC::CondCode CC, bool NoNans, EVT VT,
-                                    SDLoc dl, SelectionDAG &DAG) {
-  EVT SrcVT = LHS.getValueType();
-
-  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
-  APInt CnstBits(VT.getSizeInBits(), 0);
-  APInt UndefBits(VT.getSizeInBits(), 0);
-  bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
-  bool IsZero = IsCnst && (CnstBits == 0);
-
-  if (SrcVT.getVectorElementType().isFloatingPoint()) {
-    switch (CC) {
-    default:
-      return SDValue();
-    case ARM64CC::NE: {
-      SDValue Fcmeq;
-      if (IsZero)
-        Fcmeq = DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS);
-      else
-        Fcmeq = DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS);
-      return DAG.getNode(ARM64ISD::NOT, dl, VT, Fcmeq);
-    }
-    case ARM64CC::EQ:
-      if (IsZero)
-        return DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS);
-      return DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS);
-    case ARM64CC::GE:
-      if (IsZero)
-        return DAG.getNode(ARM64ISD::FCMGEz, dl, VT, LHS);
-      return DAG.getNode(ARM64ISD::FCMGE, dl, VT, LHS, RHS);
-    case ARM64CC::GT:
-      if (IsZero)
-        return DAG.getNode(ARM64ISD::FCMGTz, dl, VT, LHS);
-      return DAG.getNode(ARM64ISD::FCMGT, dl, VT, LHS, RHS);
-    case ARM64CC::LS:
-      if (IsZero)
-        return DAG.getNode(ARM64ISD::FCMLEz, dl, VT, LHS);
-      return DAG.getNode(ARM64ISD::FCMGE, dl, VT, RHS, LHS);
-    case ARM64CC::LT:
-      if (!NoNans)
-        return SDValue();
-    // If we ignore NaNs then we can use to the MI implementation.
-    // Fallthrough.
-    case ARM64CC::MI:
-      if (IsZero)
-        return DAG.getNode(ARM64ISD::FCMLTz, dl, VT, LHS);
-      return DAG.getNode(ARM64ISD::FCMGT, dl, VT, RHS, LHS);
-    }
-  }
-
-  switch (CC) {
-  default:
-    return SDValue();
-  case ARM64CC::NE: {
-    SDValue Cmeq;
-    if (IsZero)
-      Cmeq = DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS);
-    else
-      Cmeq = DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS);
-    return DAG.getNode(ARM64ISD::NOT, dl, VT, Cmeq);
-  }
-  case ARM64CC::EQ:
-    if (IsZero)
-      return DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS);
-    return DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS);
-  case ARM64CC::GE:
-    if (IsZero)
-      return DAG.getNode(ARM64ISD::CMGEz, dl, VT, LHS);
-    return DAG.getNode(ARM64ISD::CMGE, dl, VT, LHS, RHS);
-  case ARM64CC::GT:
-    if (IsZero)
-      return DAG.getNode(ARM64ISD::CMGTz, dl, VT, LHS);
-    return DAG.getNode(ARM64ISD::CMGT, dl, VT, LHS, RHS);
-  case ARM64CC::LE:
-    if (IsZero)
-      return DAG.getNode(ARM64ISD::CMLEz, dl, VT, LHS);
-    return DAG.getNode(ARM64ISD::CMGE, dl, VT, RHS, LHS);
-  case ARM64CC::LS:
-    return DAG.getNode(ARM64ISD::CMHS, dl, VT, RHS, LHS);
-  case ARM64CC::CC:
-    return DAG.getNode(ARM64ISD::CMHI, dl, VT, RHS, LHS);
-  case ARM64CC::LT:
-    if (IsZero)
-      return DAG.getNode(ARM64ISD::CMLTz, dl, VT, LHS);
-    return DAG.getNode(ARM64ISD::CMGT, dl, VT, RHS, LHS);
-  case ARM64CC::HI:
-    return DAG.getNode(ARM64ISD::CMHI, dl, VT, LHS, RHS);
-  case ARM64CC::CS:
-    return DAG.getNode(ARM64ISD::CMHS, dl, VT, LHS, RHS);
-  }
-}
-
-SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  SDLoc dl(Op);
-
-  if (LHS.getValueType().getVectorElementType().isInteger()) {
-    assert(LHS.getValueType() == RHS.getValueType());
-    ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC);
-    return EmitVectorComparison(LHS, RHS, ARM64CC, false, Op.getValueType(), dl,
-                                DAG);
-  }
-
-  assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
-         LHS.getValueType().getVectorElementType() == MVT::f64);
-
-  // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
-  // clean.  Some of them require two branches to implement.
-  ARM64CC::CondCode CC1, CC2;
-  changeFPCCToARM64CC(CC, CC1, CC2);
-
-  bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
-  SDValue Cmp1 =
-      EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG);
-  if (!Cmp1.getNode())
-    return SDValue();
-
-  if (CC2 != ARM64CC::AL) {
-    SDValue Cmp2 =
-        EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG);
-    if (!Cmp2.getNode())
-      return SDValue();
-
-    return DAG.getNode(ISD::OR, dl, Cmp1.getValueType(), Cmp1, Cmp2);
-  }
-
-  return Cmp1;
-}
-
-/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
-/// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
-/// specified in the intrinsic calls.
-bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                             const CallInst &I,
-                                             unsigned Intrinsic) const {
-  switch (Intrinsic) {
-  case Intrinsic::arm64_neon_ld2:
-  case Intrinsic::arm64_neon_ld3:
-  case Intrinsic::arm64_neon_ld4:
-  case Intrinsic::arm64_neon_ld2lane:
-  case Intrinsic::arm64_neon_ld3lane:
-  case Intrinsic::arm64_neon_ld4lane:
-  case Intrinsic::arm64_neon_ld2r:
-  case Intrinsic::arm64_neon_ld3r:
-  case Intrinsic::arm64_neon_ld4r: {
-    Info.opc = ISD::INTRINSIC_W_CHAIN;
-    // Conservatively set memVT to the entire set of vectors loaded.
-    uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
-    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
-    Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
-    Info.offset = 0;
-    Info.align = 0;
-    Info.vol = false; // volatile loads with NEON intrinsics not supported
-    Info.readMem = true;
-    Info.writeMem = false;
-    return true;
-  }
-  case Intrinsic::arm64_neon_st2:
-  case Intrinsic::arm64_neon_st3:
-  case Intrinsic::arm64_neon_st4:
-  case Intrinsic::arm64_neon_st2lane:
-  case Intrinsic::arm64_neon_st3lane:
-  case Intrinsic::arm64_neon_st4lane: {
-    Info.opc = ISD::INTRINSIC_VOID;
-    // Conservatively set memVT to the entire set of vectors stored.
-    unsigned NumElts = 0;
-    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
-      Type *ArgTy = I.getArgOperand(ArgI)->getType();
-      if (!ArgTy->isVectorTy())
-        break;
-      NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
-    }
-    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
-    Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
-    Info.offset = 0;
-    Info.align = 0;
-    Info.vol = false; // volatile stores with NEON intrinsics not supported
-    Info.readMem = false;
-    Info.writeMem = true;
-    return true;
-  }
-  case Intrinsic::arm64_ldxr: {
-    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
-    Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
-    Info.ptrVal = I.getArgOperand(0);
-    Info.offset = 0;
-    Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
-    Info.vol = true;
-    Info.readMem = true;
-    Info.writeMem = false;
-    return true;
-  }
-  case Intrinsic::arm64_stxr: {
-    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
-    Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
-    Info.ptrVal = I.getArgOperand(1);
-    Info.offset = 0;
-    Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
-    Info.vol = true;
-    Info.readMem = false;
-    Info.writeMem = true;
-    return true;
-  }
-  case Intrinsic::arm64_ldxp: {
-    Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::i128;
-    Info.ptrVal = I.getArgOperand(0);
-    Info.offset = 0;
-    Info.align = 16;
-    Info.vol = true;
-    Info.readMem = true;
-    Info.writeMem = false;
-    return true;
-  }
-  case Intrinsic::arm64_stxp: {
-    Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::i128;
-    Info.ptrVal = I.getArgOperand(2);
-    Info.offset = 0;
-    Info.align = 16;
-    Info.vol = true;
-    Info.readMem = false;
-    Info.writeMem = true;
-    return true;
-  }
-  default:
-    break;
-  }
-
-  return false;
-}
-
-// Truncations from 64-bit GPR to 32-bit GPR is free.
-bool ARM64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
-  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
-    return false;
-  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
-  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
-  if (NumBits1 <= NumBits2)
-    return false;
-  return true;
-}
-bool ARM64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
-  if (!VT1.isInteger() || !VT2.isInteger())
-    return false;
-  unsigned NumBits1 = VT1.getSizeInBits();
-  unsigned NumBits2 = VT2.getSizeInBits();
-  if (NumBits1 <= NumBits2)
-    return false;
-  return true;
-}
-
-// All 32-bit GPR operations implicitly zero the high-half of the corresponding
-// 64-bit GPR.
-bool ARM64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
-  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
-    return false;
-  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
-  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
-  if (NumBits1 == 32 && NumBits2 == 64)
-    return true;
-  return false;
-}
-bool ARM64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
-  if (!VT1.isInteger() || !VT2.isInteger())
-    return false;
-  unsigned NumBits1 = VT1.getSizeInBits();
-  unsigned NumBits2 = VT2.getSizeInBits();
-  if (NumBits1 == 32 && NumBits2 == 64)
-    return true;
-  return false;
-}
-
-bool ARM64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
-  EVT VT1 = Val.getValueType();
-  if (isZExtFree(VT1, VT2)) {
-    return true;
-  }
-
-  if (Val.getOpcode() != ISD::LOAD)
-    return false;
-
-  // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
-  return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() &&
-          VT2.isInteger() && VT1.getSizeInBits() <= 32);
-}
-
-bool ARM64TargetLowering::hasPairedLoad(Type *LoadedType,
-                                        unsigned &RequiredAligment) const {
-  if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
-    return false;
-  // Cyclone supports unaligned accesses.
-  RequiredAligment = 0;
-  unsigned NumBits = LoadedType->getPrimitiveSizeInBits();
-  return NumBits == 32 || NumBits == 64;
-}
-
-bool ARM64TargetLowering::hasPairedLoad(EVT LoadedType,
-                                        unsigned &RequiredAligment) const {
-  if (!LoadedType.isSimple() ||
-      (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
-    return false;
-  // Cyclone supports unaligned accesses.
-  RequiredAligment = 0;
-  unsigned NumBits = LoadedType.getSizeInBits();
-  return NumBits == 32 || NumBits == 64;
-}
-
-static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
-                       unsigned AlignCheck) {
-  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
-          (DstAlign == 0 || DstAlign % AlignCheck == 0));
-}
-
-EVT ARM64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
-                                             unsigned SrcAlign, bool IsMemset,
-                                             bool ZeroMemset, bool MemcpyStrSrc,
-                                             MachineFunction &MF) const {
-  // Don't use AdvSIMD to implement 16-byte memset. It would have taken one
-  // instruction to materialize the v2i64 zero and one store (with restrictive
-  // addressing mode). Just do two i64 store of zero-registers.
-  bool Fast;
-  const Function *F = MF.getFunction();
-  if (!IsMemset && Size >= 16 &&
-      !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                       Attribute::NoImplicitFloat) &&
-      (memOpAlign(SrcAlign, DstAlign, 16) ||
-       (allowsUnalignedMemoryAccesses(MVT::v2i64, 0, &Fast) && Fast)))
-    return MVT::v2i64;
-
-  return Size >= 8 ? MVT::i64 : MVT::i32;
-}
-
-// 12-bit optionally shifted immediates are legal for adds.
-bool ARM64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
-  if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0))
-    return true;
-  return false;
-}
-
-// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
-// immediates is the same as for an add or a sub.
-bool ARM64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
-  if (Immed < 0)
-    Immed *= -1;
-  return isLegalAddImmediate(Immed);
-}
-
-/// isLegalAddressingMode - Return true if the addressing mode represented
-/// by AM is legal for this target, for a load/store of the specified type.
-bool ARM64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                                Type *Ty) const {
-  // ARM64 has five basic addressing modes:
-  //  reg
-  //  reg + 9-bit signed offset
-  //  reg + SIZE_IN_BYTES * 12-bit unsigned offset
-  //  reg1 + reg2
-  //  reg + SIZE_IN_BYTES * reg
-
-  // No global is ever allowed as a base.
-  if (AM.BaseGV)
-    return false;
-
-  // No reg+reg+imm addressing.
-  if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
-    return false;
-
-  // check reg + imm case:
-  // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
-  uint64_t NumBytes = 0;
-  if (Ty->isSized()) {
-    uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty);
-    NumBytes = NumBits / 8;
-    if (!isPowerOf2_64(NumBits))
-      NumBytes = 0;
-  }
-
-  if (!AM.Scale) {
-    int64_t Offset = AM.BaseOffs;
-
-    // 9-bit signed offset
-    if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1)
-      return true;
-
-    // 12-bit unsigned offset
-    unsigned shift = Log2_64(NumBytes);
-    if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
-        // Must be a multiple of NumBytes (NumBytes is a power of 2)
-        (Offset >> shift) << shift == Offset)
-      return true;
-    return false;
-  }
-
-  // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
-
-  if (!AM.Scale || AM.Scale == 1 ||
-      (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes))
-    return true;
-  return false;
-}
-
-int ARM64TargetLowering::getScalingFactorCost(const AddrMode &AM,
-                                              Type *Ty) const {
-  // Scaling factors are not free at all.
-  // Operands                     | Rt Latency
-  // -------------------------------------------
-  // Rt, [Xn, Xm]                 | 4
-  // -------------------------------------------
-  // Rt, [Xn, Xm, lsl #imm]       | Rn: 4 Rm: 5
-  // Rt, [Xn, Wm, <extend> #imm]  |
-  if (isLegalAddressingMode(AM, Ty))
-    // Scale represents reg2 * scale, thus account for 1 if
-    // it is not equal to 0 or 1.
-    return AM.Scale != 0 && AM.Scale != 1;
-  return -1;
-}
-
-bool ARM64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
-  VT = VT.getScalarType();
-
-  if (!VT.isSimple())
-    return false;
-
-  switch (VT.getSimpleVT().SimpleTy) {
-  case MVT::f32:
-  case MVT::f64:
-    return true;
-  default:
-    break;
-  }
-
-  return false;
-}
-
-const uint16_t *
-ARM64TargetLowering::getScratchRegisters(CallingConv::ID) const {
-  // LR is a callee-save register, but we must treat it as clobbered by any call
-  // site. Hence we include LR in the scratch registers, which are in turn added
-  // as implicit-defs for stackmaps and patchpoints.
-  static const uint16_t ScratchRegs[] = {
-    ARM64::X16, ARM64::X17, ARM64::LR, 0
-  };
-  return ScratchRegs;
-}
-
-bool ARM64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
-                                                            Type *Ty) const {
-  assert(Ty->isIntegerTy());
-
-  unsigned BitSize = Ty->getPrimitiveSizeInBits();
-  if (BitSize == 0)
-    return false;
-
-  int64_t Val = Imm.getSExtValue();
-  if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize))
-    return true;
-
-  if ((int64_t)Val < 0)
-    Val = ~Val;
-  if (BitSize == 32)
-    Val &= (1LL << 32) - 1;
-
-  unsigned LZ = countLeadingZeros((uint64_t)Val);
-  unsigned Shift = (63 - LZ) / 16;
-  // MOVZ is free so return true for one or fewer MOVK.
-  return (Shift < 3) ? true : false;
-}
-
-// Generate SUBS and CSEL for integer abs.
-static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
-  EVT VT = N->getValueType(0);
-
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-  SDLoc DL(N);
-
-  // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
-  // and change it to SUB and CSEL.
-  if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
-      N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
-      N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
-    if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
-      if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
-        SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
-                                  N0.getOperand(0));
-        // Generate SUBS & CSEL.
-        SDValue Cmp =
-            DAG.getNode(ARM64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
-                        N0.getOperand(0), DAG.getConstant(0, VT));
-        return DAG.getNode(ARM64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
-                           DAG.getConstant(ARM64CC::PL, MVT::i32),
-                           SDValue(Cmp.getNode(), 1));
-      }
-  return SDValue();
-}
-
-// performXorCombine - Attempts to handle integer ABS.
-static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
-                                 TargetLowering::DAGCombinerInfo &DCI,
-                                 const ARM64Subtarget *Subtarget) {
-  if (DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  return performIntegerAbsCombine(N, DAG);
-}
-
-static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
-                                 TargetLowering::DAGCombinerInfo &DCI,
-                                 const ARM64Subtarget *Subtarget) {
-  if (DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  // Multiplication of a power of two plus/minus one can be done more
-  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
-  // future CPUs have a cheaper MADD instruction, this may need to be
-  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
-  // 64-bit is 5 cycles, so this is always a win.
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
-    APInt Value = C->getAPIntValue();
-    EVT VT = N->getValueType(0);
-    APInt VP1 = Value + 1;
-    if (VP1.isPowerOf2()) {
-      // Multiplying by one less than a power of two, replace with a shift
-      // and a subtract.
-      SDValue ShiftedVal =
-          DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
-                      DAG.getConstant(VP1.logBase2(), MVT::i64));
-      return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
-    }
-    APInt VM1 = Value - 1;
-    if (VM1.isPowerOf2()) {
-      // Multiplying by one more than a power of two, replace with a shift
-      // and an add.
-      SDValue ShiftedVal =
-          DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
-                      DAG.getConstant(VM1.logBase2(), MVT::i64));
-      return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
-    }
-  }
-  return SDValue();
-}
-
-static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
-  EVT VT = N->getValueType(0);
-  if (VT != MVT::f32 && VT != MVT::f64)
-    return SDValue();
-  // Only optimize when the source and destination types have the same width.
-  if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits())
-    return SDValue();
-
-  // If the result of an integer load is only used by an integer-to-float
-  // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
-  // This eliminates an "integer-to-vector-move UOP and improve throughput.
-  SDValue N0 = N->getOperand(0);
-  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
-      // Do not change the width of a volatile load.
-      !cast<LoadSDNode>(N0)->isVolatile()) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
-                               LN0->getPointerInfo(), LN0->isVolatile(),
-                               LN0->isNonTemporal(), LN0->isInvariant(),
-                               LN0->getAlignment());
-
-    // Make sure successors of the original load stay after it by updating them
-    // to use the new Chain.
-    DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
-
-    unsigned Opcode =
-        (N->getOpcode() == ISD::SINT_TO_FP) ? ARM64ISD::SITOF : ARM64ISD::UITOF;
-    return DAG.getNode(Opcode, SDLoc(N), VT, Load);
-  }
-
-  return SDValue();
-}
-
-/// An EXTR instruction is made up of two shifts, ORed together. This helper
-/// searches for and classifies those shifts.
-static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
-                         bool &FromHi) {
-  if (N.getOpcode() == ISD::SHL)
-    FromHi = false;
-  else if (N.getOpcode() == ISD::SRL)
-    FromHi = true;
-  else
-    return false;
-
-  if (!isa<ConstantSDNode>(N.getOperand(1)))
-    return false;
-
-  ShiftAmount = N->getConstantOperandVal(1);
-  Src = N->getOperand(0);
-  return true;
-}
-
-/// EXTR instruction extracts a contiguous chunk of bits from two existing
-/// registers viewed as a high/low pair. This function looks for the pattern:
-/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
-/// EXTR. Can't quite be done in TableGen because the two immediates aren't
-/// independent.
-static SDValue tryCombineToEXTR(SDNode *N,
-                                TargetLowering::DAGCombinerInfo &DCI) {
-  SelectionDAG &DAG = DCI.DAG;
-  SDLoc DL(N);
-  EVT VT = N->getValueType(0);
-
-  assert(N->getOpcode() == ISD::OR && "Unexpected root");
-
-  if (VT != MVT::i32 && VT != MVT::i64)
-    return SDValue();
-
-  SDValue LHS;
-  uint32_t ShiftLHS = 0;
-  bool LHSFromHi = 0;
-  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
-    return SDValue();
-
-  SDValue RHS;
-  uint32_t ShiftRHS = 0;
-  bool RHSFromHi = 0;
-  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
-    return SDValue();
-
-  // If they're both trying to come from the high part of the register, they're
-  // not really an EXTR.
-  if (LHSFromHi == RHSFromHi)
-    return SDValue();
-
-  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
-    return SDValue();
-
-  if (LHSFromHi) {
-    std::swap(LHS, RHS);
-    std::swap(ShiftLHS, ShiftRHS);
-  }
-
-  return DAG.getNode(ARM64ISD::EXTR, DL, VT, LHS, RHS,
-                     DAG.getConstant(ShiftRHS, MVT::i64));
-}
-
-static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
-                                const ARM64Subtarget *Subtarget) {
-  // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
-  if (!EnableARM64ExtrGeneration)
-    return SDValue();
-  SelectionDAG &DAG = DCI.DAG;
-  EVT VT = N->getValueType(0);
-
-  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
-    return SDValue();
-
-  SDValue Res = tryCombineToEXTR(N, DCI);
-  if (Res.getNode())
-    return Res;
-
-  return SDValue();
-}
-
-static SDValue performBitcastCombine(SDNode *N,
-                                     TargetLowering::DAGCombinerInfo &DCI,
-                                     SelectionDAG &DAG) {
-  // Wait 'til after everything is legalized to try this. That way we have
-  // legal vector types and such.
-  if (DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  // Remove extraneous bitcasts around an extract_subvector.
-  // For example,
-  //    (v4i16 (bitconvert
-  //             (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1)))))
-  //  becomes
-  //    (extract_subvector ((v8i16 ...), (i64 4)))
-
-  // Only interested in 64-bit vectors as the ultimate result.
-  EVT VT = N->getValueType(0);
-  if (!VT.isVector())
-    return SDValue();
-  if (VT.getSimpleVT().getSizeInBits() != 64)
-    return SDValue();
-  // Is the operand an extract_subvector starting at the beginning or halfway
-  // point of the vector? A low half may also come through as an
-  // EXTRACT_SUBREG, so look for that, too.
-  SDValue Op0 = N->getOperand(0);
-  if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR &&
-      !(Op0->isMachineOpcode() &&
-        Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG))
-    return SDValue();
-  uint64_t idx = cast<ConstantSDNode>(Op0->getOperand(1))->getZExtValue();
-  if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
-    if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0)
-      return SDValue();
-  } else if (Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG) {
-    if (idx != ARM64::dsub)
-      return SDValue();
-    // The dsub reference is equivalent to a lane zero subvector reference.
-    idx = 0;
-  }
-  // Look through the bitcast of the input to the extract.
-  if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST)
-    return SDValue();
-  SDValue Source = Op0->getOperand(0)->getOperand(0);
-  // If the source type has twice the number of elements as our destination
-  // type, we know this is an extract of the high or low half of the vector.
-  EVT SVT = Source->getValueType(0);
-  if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2)
-    return SDValue();
-
-  DEBUG(dbgs() << "arm64-lower: bitcast extract_subvector simplification\n");
-
-  // Create the simplified form to just extract the low or high half of the
-  // vector directly rather than bothering with the bitcasts.
-  SDLoc dl(N);
-  unsigned NumElements = VT.getVectorNumElements();
-  if (idx) {
-    SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64);
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
-  } else {
-    SDValue SubReg = DAG.getTargetConstant(ARM64::dsub, MVT::i32);
-    return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
-                                      Source, SubReg),
-                   0);
-  }
-}
-
-static SDValue performConcatVectorsCombine(SDNode *N,
-                                           TargetLowering::DAGCombinerInfo &DCI,
-                                           SelectionDAG &DAG) {
-  // Wait 'til after everything is legalized to try this. That way we have
-  // legal vector types and such.
-  if (DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  SDLoc dl(N);
-  EVT VT = N->getValueType(0);
-
-  // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
-  // splat. The indexed instructions are going to be expecting a DUPLANE64, so
-  // canonicalise to that.
-  if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
-    assert(VT.getVectorElementType().getSizeInBits() == 64);
-    return DAG.getNode(ARM64ISD::DUPLANE64, dl, VT,
-                       WidenVector(N->getOperand(0), DAG),
-                       DAG.getConstant(0, MVT::i64));
-  }
-
-  // Canonicalise concat_vectors so that the right-hand vector has as few
-  // bit-casts as possible before its real operation. The primary matching
-  // destination for these operations will be the narrowing "2" instructions,
-  // which depend on the operation being performed on this right-hand vector.
-  // For example,
-  //    (concat_vectors LHS,  (v1i64 (bitconvert (v4i16 RHS))))
-  // becomes
-  //    (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
-
-  SDValue Op1 = N->getOperand(1);
-  if (Op1->getOpcode() != ISD::BITCAST)
-    return SDValue();
-  SDValue RHS = Op1->getOperand(0);
-  MVT RHSTy = RHS.getValueType().getSimpleVT();
-  // If the RHS is not a vector, this is not the pattern we're looking for.
-  if (!RHSTy.isVector())
-    return SDValue();
-
-  DEBUG(dbgs() << "arm64-lower: concat_vectors bitcast simplification\n");
-
-  MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
-                                  RHSTy.getVectorNumElements() * 2);
-  return DAG.getNode(
-      ISD::BITCAST, dl, VT,
-      DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
-                  DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS));
-}
-
-static SDValue tryCombineFixedPointConvert(SDNode *N,
-                                           TargetLowering::DAGCombinerInfo &DCI,
-                                           SelectionDAG &DAG) {
-  // Wait 'til after everything is legalized to try this. That way we have
-  // legal vector types and such.
-  if (DCI.isBeforeLegalizeOps())
-    return SDValue();
-  // Transform a scalar conversion of a value from a lane extract into a
-  // lane extract of a vector conversion. E.g., from foo1 to foo2:
-  // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
-  // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
-  //
-  // The second form interacts better with instruction selection and the
-  // register allocator to avoid cross-class register copies that aren't
-  // coalescable due to a lane reference.
-
-  // Check the operand and see if it originates from a lane extract.
-  SDValue Op1 = N->getOperand(1);
-  if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
-    // Yep, no additional predication needed. Perform the transform.
-    SDValue IID = N->getOperand(0);
-    SDValue Shift = N->getOperand(2);
-    SDValue Vec = Op1.getOperand(0);
-    SDValue Lane = Op1.getOperand(1);
-    EVT ResTy = N->getValueType(0);
-    EVT VecResTy;
-    SDLoc DL(N);
-
-    // The vector width should be 128 bits by the time we get here, even
-    // if it started as 64 bits (the extract_vector handling will have
-    // done so).
-    assert(Vec.getValueType().getSizeInBits() == 128 &&
-           "unexpected vector size on extract_vector_elt!");
-    if (Vec.getValueType() == MVT::v4i32)
-      VecResTy = MVT::v4f32;
-    else if (Vec.getValueType() == MVT::v2i64)
-      VecResTy = MVT::v2f64;
-    else
-      assert(0 && "unexpected vector type!");
-
-    SDValue Convert =
-        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
-  }
-  return SDValue();
-}
-
-// AArch64 high-vector "long" operations are formed by performing the non-high
-// version on an extract_subvector of each operand which gets the high half:
-//
-//  (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
-//
-// However, there are cases which don't have an extract_high explicitly, but
-// have another operation that can be made compatible with one for free. For
-// example:
-//
-//  (dupv64 scalar) --> (extract_high (dup128 scalar))
-//
-// This routine does the actual conversion of such DUPs, once outer routines
-// have determined that everything else is in order.
-static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
-  // We can handle most types of duplicate, but the lane ones have an extra
-  // operand saying *which* lane, so we need to know.
-  bool IsDUPLANE;
-  switch (N.getOpcode()) {
-  case ARM64ISD::DUP:
-    IsDUPLANE = false;
-    break;
-  case ARM64ISD::DUPLANE8:
-  case ARM64ISD::DUPLANE16:
-  case ARM64ISD::DUPLANE32:
-  case ARM64ISD::DUPLANE64:
-    IsDUPLANE = true;
-    break;
-  default:
-    return SDValue();
-  }
-
-  MVT NarrowTy = N.getSimpleValueType();
-  if (!NarrowTy.is64BitVector())
-    return SDValue();
-
-  MVT ElementTy = NarrowTy.getVectorElementType();
-  unsigned NumElems = NarrowTy.getVectorNumElements();
-  MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2);
-
-  SDValue NewDUP;
-  if (IsDUPLANE)
-    NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0),
-                         N.getOperand(1));
-  else
-    NewDUP = DAG.getNode(ARM64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0));
-
-  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy,
-                     NewDUP, DAG.getConstant(NumElems, MVT::i64));
-}
-
-static bool isEssentiallyExtractSubvector(SDValue N) {
-  if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
-    return true;
-
-  return N.getOpcode() == ISD::BITCAST &&
-         N.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR;
-}
-
-/// \brief Helper structure to keep track of ISD::SET_CC operands.
-struct GenericSetCCInfo {
-  const SDValue *Opnd0;
-  const SDValue *Opnd1;
-  ISD::CondCode CC;
-};
-
-/// \brief Helper structure to keep track of a SET_CC lowered into ARM64 code.
-struct ARM64SetCCInfo {
-  const SDValue *Cmp;
-  ARM64CC::CondCode CC;
-};
-
-/// \brief Helper structure to keep track of SetCC information.
-union SetCCInfo {
-  GenericSetCCInfo Generic;
-  ARM64SetCCInfo ARM64;
-};
-
-/// \brief Helper structure to be able to read SetCC information.
-/// If set to true, IsARM64 field, Info is a ARM64SetCCInfo, otherwise Info is
-/// a GenericSetCCInfo.
-struct SetCCInfoAndKind {
-  SetCCInfo Info;
-  bool IsARM64;
-};
-
-/// \brief Check whether or not \p Op is a SET_CC operation, either a generic or
-/// an
-/// ARM64 lowered one.
-/// \p SetCCInfo is filled accordingly.
-/// \post SetCCInfo is meanginfull only when this function returns true.
-/// \return True when Op is a kind of SET_CC operation.
-static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
-  // If this is a setcc, this is straight forward.
-  if (Op.getOpcode() == ISD::SETCC) {
-    SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
-    SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
-    SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-    SetCCInfo.IsARM64 = false;
-    return true;
-  }
-  // Otherwise, check if this is a matching csel instruction.
-  // In other words:
-  // - csel 1, 0, cc
-  // - csel 0, 1, !cc
-  if (Op.getOpcode() != ARM64ISD::CSEL)
-    return false;
-  // Set the information about the operands.
-  // TODO: we want the operands of the Cmp not the csel
-  SetCCInfo.Info.ARM64.Cmp = &Op.getOperand(3);
-  SetCCInfo.IsARM64 = true;
-  SetCCInfo.Info.ARM64.CC = static_cast<ARM64CC::CondCode>(
-      cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
-
-  // Check that the operands matches the constraints:
-  // (1) Both operands must be constants.
-  // (2) One must be 1 and the other must be 0.
-  ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
-  ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
-
-  // Check (1).
-  if (!TValue || !FValue)
-    return false;
-
-  // Check (2).
-  if (!TValue->isOne()) {
-    // Update the comparison when we are interested in !cc.
-    std::swap(TValue, FValue);
-    SetCCInfo.Info.ARM64.CC =
-        ARM64CC::getInvertedCondCode(SetCCInfo.Info.ARM64.CC);
-  }
-  return TValue->isOne() && FValue->isNullValue();
-}
-
-// The folding we want to perform is:
-// (add x, (setcc cc ...) )
-//   -->
-// (csel x, (add x, 1), !cc ...)
-//
-// The latter will get matched to a CSINC instruction.
-static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
-  assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
-  SDValue LHS = Op->getOperand(0);
-  SDValue RHS = Op->getOperand(1);
-  SetCCInfoAndKind InfoAndKind;
-
-  // If neither operand is a SET_CC, give up.
-  if (!isSetCC(LHS, InfoAndKind)) {
-    std::swap(LHS, RHS);
-    if (!isSetCC(LHS, InfoAndKind))
-      return SDValue();
-  }
-
-  // FIXME: This could be generatized to work for FP comparisons.
-  EVT CmpVT = InfoAndKind.IsARM64
-                  ? InfoAndKind.Info.ARM64.Cmp->getOperand(0).getValueType()
-                  : InfoAndKind.Info.Generic.Opnd0->getValueType();
-  if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
-    return SDValue();
-
-  SDValue CCVal;
-  SDValue Cmp;
-  SDLoc dl(Op);
-  if (InfoAndKind.IsARM64) {
-    CCVal = DAG.getConstant(
-        ARM64CC::getInvertedCondCode(InfoAndKind.Info.ARM64.CC), MVT::i32);
-    Cmp = *InfoAndKind.Info.ARM64.Cmp;
-  } else
-    Cmp = getARM64Cmp(*InfoAndKind.Info.Generic.Opnd0,
-                      *InfoAndKind.Info.Generic.Opnd1,
-                      ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true),
-                      CCVal, DAG, dl);
-
-  EVT VT = Op->getValueType(0);
-  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT));
-  return DAG.getNode(ARM64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
-}
-
-// The basic add/sub long vector instructions have variants with "2" on the end
-// which act on the high-half of their inputs. They are normally matched by
-// patterns like:
-//
-// (add (zeroext (extract_high LHS)),
-//      (zeroext (extract_high RHS)))
-// -> uaddl2 vD, vN, vM
-//
-// However, if one of the extracts is something like a duplicate, this
-// instruction can still be used profitably. This function puts the DAG into a
-// more appropriate form for those patterns to trigger.
-static SDValue performAddSubLongCombine(SDNode *N,
-                                        TargetLowering::DAGCombinerInfo &DCI,
-                                        SelectionDAG &DAG) {
-  if (DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  MVT VT = N->getSimpleValueType(0);
-  if (!VT.is128BitVector()) {
-    if (N->getOpcode() == ISD::ADD)
-      return performSetccAddFolding(N, DAG);
-    return SDValue();
-  }
-
-  // Make sure both branches are extended in the same way.
-  SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1);
-  if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
-       LHS.getOpcode() != ISD::SIGN_EXTEND) ||
-      LHS.getOpcode() != RHS.getOpcode())
-    return SDValue();
-
-  unsigned ExtType = LHS.getOpcode();
-
-  // It's not worth doing if at least one of the inputs isn't already an
-  // extract, but we don't know which it'll be so we have to try both.
-  if (isEssentiallyExtractSubvector(LHS.getOperand(0))) {
-    RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
-    if (!RHS.getNode())
-      return SDValue();
-
-    RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
-  } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) {
-    LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
-    if (!LHS.getNode())
-      return SDValue();
-
-    LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
-  }
-
-  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
-}
-
-// Massage DAGs which we can use the high-half "long" operations on into
-// something isel will recognize better. E.g.
-//
-// (arm64_neon_umull (extract_high vec) (dupv64 scalar)) -->
-//   (arm64_neon_umull (extract_high (v2i64 vec)))
-//                     (extract_high (v2i64 (dup128 scalar)))))
-//
-static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
-                                       TargetLowering::DAGCombinerInfo &DCI,
-                                       SelectionDAG &DAG) {
-  if (DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  SDValue LHS = N->getOperand(1);
-  SDValue RHS = N->getOperand(2);
-  assert(LHS.getValueType().is64BitVector() &&
-         RHS.getValueType().is64BitVector() &&
-         "unexpected shape for long operation");
-
-  // Either node could be a DUP, but it's not worth doing both of them (you'd
-  // just as well use the non-high version) so look for a corresponding extract
-  // operation on the other "wing".
-  if (isEssentiallyExtractSubvector(LHS)) {
-    RHS = tryExtendDUPToExtractHigh(RHS, DAG);
-    if (!RHS.getNode())
-      return SDValue();
-  } else if (isEssentiallyExtractSubvector(RHS)) {
-    LHS = tryExtendDUPToExtractHigh(LHS, DAG);
-    if (!LHS.getNode())
-      return SDValue();
-  }
-
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
-                     N->getOperand(0), LHS, RHS);
-}
-
-static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
-  MVT ElemTy = N->getSimpleValueType(0).getScalarType();
-  unsigned ElemBits = ElemTy.getSizeInBits();
-
-  int64_t ShiftAmount;
-  if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
-    APInt SplatValue, SplatUndef;
-    unsigned SplatBitSize;
-    bool HasAnyUndefs;
-    if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
-                              HasAnyUndefs, ElemBits) ||
-        SplatBitSize != ElemBits)
-      return SDValue();
-
-    ShiftAmount = SplatValue.getSExtValue();
-  } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
-    ShiftAmount = CVN->getSExtValue();
-  } else
-    return SDValue();
-
-  unsigned Opcode;
-  bool IsRightShift;
-  switch (IID) {
-  default:
-    llvm_unreachable("Unknown shift intrinsic");
-  case Intrinsic::arm64_neon_sqshl:
-    Opcode = ARM64ISD::SQSHL_I;
-    IsRightShift = false;
-    break;
-  case Intrinsic::arm64_neon_uqshl:
-    Opcode = ARM64ISD::UQSHL_I;
-    IsRightShift = false;
-    break;
-  case Intrinsic::arm64_neon_srshl:
-    Opcode = ARM64ISD::SRSHR_I;
-    IsRightShift = true;
-    break;
-  case Intrinsic::arm64_neon_urshl:
-    Opcode = ARM64ISD::URSHR_I;
-    IsRightShift = true;
-    break;
-  case Intrinsic::arm64_neon_sqshlu:
-    Opcode = ARM64ISD::SQSHLU_I;
-    IsRightShift = false;
-    break;
-  }
-
-  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits)
-    return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
-                       DAG.getConstant(-ShiftAmount, MVT::i32));
-  else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits)
-    return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
-                       DAG.getConstant(ShiftAmount, MVT::i32));
-
-  return SDValue();
-}
-
-// The CRC32[BH] instructions ignore the high bits of their data operand. Since
-// the intrinsics must be legal and take an i32, this means there's almost
-// certainly going to be a zext in the DAG which we can eliminate.
-static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
-  SDValue AndN = N->getOperand(2);
-  if (AndN.getOpcode() != ISD::AND)
-    return SDValue();
-
-  ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
-  if (!CMask || CMask->getZExtValue() != Mask)
-    return SDValue();
-
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
-                     N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
-}
-
-static SDValue performIntrinsicCombine(SDNode *N,
-                                       TargetLowering::DAGCombinerInfo &DCI,
-                                       const ARM64Subtarget *Subtarget) {
-  SelectionDAG &DAG = DCI.DAG;
-  unsigned IID = getIntrinsicID(N);
-  switch (IID) {
-  default:
-    break;
-  case Intrinsic::arm64_neon_vcvtfxs2fp:
-  case Intrinsic::arm64_neon_vcvtfxu2fp:
-    return tryCombineFixedPointConvert(N, DCI, DAG);
-    break;
-  case Intrinsic::arm64_neon_fmax:
-    return DAG.getNode(ARM64ISD::FMAX, SDLoc(N), N->getValueType(0),
-                       N->getOperand(1), N->getOperand(2));
-  case Intrinsic::arm64_neon_fmin:
-    return DAG.getNode(ARM64ISD::FMIN, SDLoc(N), N->getValueType(0),
-                       N->getOperand(1), N->getOperand(2));
-  case Intrinsic::arm64_neon_smull:
-  case Intrinsic::arm64_neon_umull:
-  case Intrinsic::arm64_neon_pmull:
-  case Intrinsic::arm64_neon_sqdmull:
-    return tryCombineLongOpWithDup(IID, N, DCI, DAG);
-  case Intrinsic::arm64_neon_sqshl:
-  case Intrinsic::arm64_neon_uqshl:
-  case Intrinsic::arm64_neon_sqshlu:
-  case Intrinsic::arm64_neon_srshl:
-  case Intrinsic::arm64_neon_urshl:
-    return tryCombineShiftImm(IID, N, DAG);
-  case Intrinsic::arm64_crc32b:
-  case Intrinsic::arm64_crc32cb:
-    return tryCombineCRC32(0xff, N, DAG);
-  case Intrinsic::arm64_crc32h:
-  case Intrinsic::arm64_crc32ch:
-    return tryCombineCRC32(0xffff, N, DAG);
-  }
-  return SDValue();
-}
-
-static SDValue performExtendCombine(SDNode *N,
-                                    TargetLowering::DAGCombinerInfo &DCI,
-                                    SelectionDAG &DAG) {
-  // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
-  // we can convert that DUP into another extract_high (of a bigger DUP), which
-  // helps the backend to decide that an sabdl2 would be useful, saving a real
-  // extract_high operation.
-  if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
-      N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
-    SDNode *ABDNode = N->getOperand(0).getNode();
-    unsigned IID = getIntrinsicID(ABDNode);
-    if (IID == Intrinsic::arm64_neon_sabd ||
-        IID == Intrinsic::arm64_neon_uabd) {
-      SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
-      if (!NewABD.getNode())
-        return SDValue();
-
-      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
-                         NewABD);
-    }
-  }
-
-  // This is effectively a custom type legalization for ARM64.
-  //
-  // Type legalization will split an extend of a small, legal, type to a larger
-  // illegal type by first splitting the destination type, often creating
-  // illegal source types, which then get legalized in isel-confusing ways,
-  // leading to really terrible codegen. E.g.,
-  //   %result = v8i32 sext v8i8 %value
-  // becomes
-  //   %losrc = extract_subreg %value, ...
-  //   %hisrc = extract_subreg %value, ...
-  //   %lo = v4i32 sext v4i8 %losrc
-  //   %hi = v4i32 sext v4i8 %hisrc
-  // Things go rapidly downhill from there.
-  //
-  // For ARM64, the [sz]ext vector instructions can only go up one element
-  // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
-  // take two instructions.
-  //
-  // This implies that the most efficient way to do the extend from v8i8
-  // to two v4i32 values is to first extend the v8i8 to v8i16, then do
-  // the normal splitting to happen for the v8i16->v8i32.
-
-  // This is pre-legalization to catch some cases where the default
-  // type legalization will create ill-tempered code.
-  if (!DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  // We're only interested in cleaning things up for non-legal vector types
-  // here. If both the source and destination are legal, things will just
-  // work naturally without any fiddling.
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT ResVT = N->getValueType(0);
-  if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
-    return SDValue();
-  // If the vector type isn't a simple VT, it's beyond the scope of what
-  // we're  worried about here. Let legalization do its thing and hope for
-  // the best.
-  if (!ResVT.isSimple())
-    return SDValue();
-
-  SDValue Src = N->getOperand(0);
-  MVT SrcVT = Src->getValueType(0).getSimpleVT();
-  // If the source VT is a 64-bit vector, we can play games and get the
-  // better results we want.
-  if (SrcVT.getSizeInBits() != 64)
-    return SDValue();
-
-  unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
-  unsigned ElementCount = SrcVT.getVectorNumElements();
-  SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount);
-  SDLoc DL(N);
-  Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
-
-  // Now split the rest of the operation into two halves, each with a 64
-  // bit source.
-  EVT LoVT, HiVT;
-  SDValue Lo, Hi;
-  unsigned NumElements = ResVT.getVectorNumElements();
-  assert(!(NumElements & 1) && "Splitting vector, but not in half!");
-  LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
-                                 ResVT.getVectorElementType(), NumElements / 2);
-
-  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
-                               LoVT.getVectorNumElements());
-  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
-                   DAG.getIntPtrConstant(0));
-  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
-                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
-  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
-  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
-
-  // Now combine the parts back together so we still have a single result
-  // like the combiner expects.
-  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
-}
-
-/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
-/// value. The load store optimizer pass will merge them to store pair stores.
-/// This has better performance than a splat of the scalar followed by a split
-/// vector store. Even if the stores are not merged it is four stores vs a dup,
-/// followed by an ext.b and two stores.
-static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
-  SDValue StVal = St->getValue();
-  EVT VT = StVal.getValueType();
-
-  // Don't replace floating point stores, they possibly won't be transformed to
-  // stp because of the store pair suppress pass.
-  if (VT.isFloatingPoint())
-    return SDValue();
-
-  // Check for insert vector elements.
-  if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
-    return SDValue();
-
-  // We can express a splat as store pair(s) for 2 or 4 elements.
-  unsigned NumVecElts = VT.getVectorNumElements();
-  if (NumVecElts != 4 && NumVecElts != 2)
-    return SDValue();
-  SDValue SplatVal = StVal.getOperand(1);
-  unsigned RemainInsertElts = NumVecElts - 1;
-
-  // Check that this is a splat.
-  while (--RemainInsertElts) {
-    SDValue NextInsertElt = StVal.getOperand(0);
-    if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
-      return SDValue();
-    if (NextInsertElt.getOperand(1) != SplatVal)
-      return SDValue();
-    StVal = NextInsertElt;
-  }
-  unsigned OrigAlignment = St->getAlignment();
-  unsigned EltOffset = NumVecElts == 4 ? 4 : 8;
-  unsigned Alignment = std::min(OrigAlignment, EltOffset);
-
-  // Create scalar stores. This is at least as good as the code sequence for a
-  // split unaligned store wich is a dup.s, ext.b, and two stores.
-  // Most of the time the three stores should be replaced by store pair
-  // instructions (stp).
-  SDLoc DL(St);
-  SDValue BasePtr = St->getBasePtr();
-  SDValue NewST1 =
-      DAG.getStore(St->getChain(), DL, SplatVal, BasePtr, St->getPointerInfo(),
-                   St->isVolatile(), St->isNonTemporal(), St->getAlignment());
-
-  unsigned Offset = EltOffset;
-  while (--NumVecElts) {
-    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
-                                    DAG.getConstant(Offset, MVT::i64));
-    NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
-                          St->getPointerInfo(), St->isVolatile(),
-                          St->isNonTemporal(), Alignment);
-    Offset += EltOffset;
-  }
-  return NewST1;
-}
-
-static SDValue performSTORECombine(SDNode *N,
-                                   TargetLowering::DAGCombinerInfo &DCI,
-                                   SelectionDAG &DAG,
-                                   const ARM64Subtarget *Subtarget) {
-  if (!DCI.isBeforeLegalize())
-    return SDValue();
-
-  StoreSDNode *S = cast<StoreSDNode>(N);
-  if (S->isVolatile())
-    return SDValue();
-
-  // Cyclone has bad performance on unaligned 16B stores when crossing line and
-  // page boundries. We want to split such stores.
-  if (!Subtarget->isCyclone())
-    return SDValue();
-
-  // Don't split at Oz.
-  MachineFunction &MF = DAG.getMachineFunction();
-  bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute(
-      AttributeSet::FunctionIndex, Attribute::MinSize);
-  if (IsMinSize)
-    return SDValue();
-
-  SDValue StVal = S->getValue();
-  EVT VT = StVal.getValueType();
-
-  // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
-  // those up regresses performance on micro-benchmarks and olden/bh.
-  if (!VT.isVector() || VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
-    return SDValue();
-
-  // Split unaligned 16B stores. They are terrible for performance.
-  // Don't split stores with alignment of 1 or 2. Code that uses clang vector
-  // extensions can use this to mark that it does not want splitting to happen
-  // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
-  // eliminating alignment hazards is only 1 in 8 for alignment of 2.
-  if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
-      S->getAlignment() <= 2)
-    return SDValue();
-
-  // If we get a splat of a scalar convert this vector store to a store of
-  // scalars. They will be merged into store pairs thereby removing two
-  // instructions.
-  SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S);
-  if (ReplacedSplat != SDValue())
-    return ReplacedSplat;
-
-  SDLoc DL(S);
-  unsigned NumElts = VT.getVectorNumElements() / 2;
-  // Split VT into two.
-  EVT HalfVT =
-      EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
-  SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
-                                   DAG.getIntPtrConstant(0));
-  SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
-                                   DAG.getIntPtrConstant(NumElts));
-  SDValue BasePtr = S->getBasePtr();
-  SDValue NewST1 =
-      DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
-                   S->isVolatile(), S->isNonTemporal(), S->getAlignment());
-  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
-                                  DAG.getConstant(8, MVT::i64));
-  return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
-                      S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(),
-                      S->getAlignment());
-}
-
-// Optimize compare with zero and branch.
-static SDValue performBRCONDCombine(SDNode *N,
-                                    TargetLowering::DAGCombinerInfo &DCI,
-                                    SelectionDAG &DAG) {
-  SDValue Chain = N->getOperand(0);
-  SDValue Dest = N->getOperand(1);
-  SDValue CCVal = N->getOperand(2);
-  SDValue Cmp = N->getOperand(3);
-
-  assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
-  unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
-  if (CC != ARM64CC::EQ && CC != ARM64CC::NE)
-    return SDValue();
-
-  unsigned CmpOpc = Cmp.getOpcode();
-  if (CmpOpc != ARM64ISD::ADDS && CmpOpc != ARM64ISD::SUBS)
-    return SDValue();
-
-  // Only attempt folding if there is only one use of the flag and no use of the
-  // value.
-  if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
-    return SDValue();
-
-  SDValue LHS = Cmp.getOperand(0);
-  SDValue RHS = Cmp.getOperand(1);
-
-  assert(LHS.getValueType() == RHS.getValueType() &&
-         "Expected the value type to be the same for both operands!");
-  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
-    return SDValue();
-
-  if (isa<ConstantSDNode>(LHS) && cast<ConstantSDNode>(LHS)->isNullValue())
-    std::swap(LHS, RHS);
-
-  if (!isa<ConstantSDNode>(RHS) || !cast<ConstantSDNode>(RHS)->isNullValue())
-    return SDValue();
-
-  if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
-      LHS.getOpcode() == ISD::SRL)
-    return SDValue();
-
-  // Fold the compare into the branch instruction.
-  SDValue BR;
-  if (CC == ARM64CC::EQ)
-    BR = DAG.getNode(ARM64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
-  else
-    BR = DAG.getNode(ARM64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
-
-  // Do not add new nodes to DAG combiner worklist.
-  DCI.CombineTo(N, BR, false);
-
-  return SDValue();
-}
-
-SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
-                                               DAGCombinerInfo &DCI) const {
-  SelectionDAG &DAG = DCI.DAG;
-  switch (N->getOpcode()) {
-  default:
-    break;
-  case ISD::ADD:
-  case ISD::SUB:
-    return performAddSubLongCombine(N, DCI, DAG);
-  case ISD::XOR:
-    return performXorCombine(N, DAG, DCI, Subtarget);
-  case ISD::MUL:
-    return performMulCombine(N, DAG, DCI, Subtarget);
-  case ISD::SINT_TO_FP:
-  case ISD::UINT_TO_FP:
-    return performIntToFpCombine(N, DAG);
-  case ISD::OR:
-    return performORCombine(N, DCI, Subtarget);
-  case ISD::INTRINSIC_WO_CHAIN:
-    return performIntrinsicCombine(N, DCI, Subtarget);
-  case ISD::ANY_EXTEND:
-  case ISD::ZERO_EXTEND:
-  case ISD::SIGN_EXTEND:
-    return performExtendCombine(N, DCI, DAG);
-  case ISD::BITCAST:
-    return performBitcastCombine(N, DCI, DAG);
-  case ISD::CONCAT_VECTORS:
-    return performConcatVectorsCombine(N, DCI, DAG);
-  case ISD::STORE:
-    return performSTORECombine(N, DCI, DAG, Subtarget);
-  case ARM64ISD::BRCOND:
-    return performBRCONDCombine(N, DCI, DAG);
-  }
-  return SDValue();
-}
-
-// Check if the return value is used as only a return value, as otherwise
-// we can't perform a tail-call. In particular, we need to check for
-// target ISD nodes that are returns and any other "odd" constructs
-// that the generic analysis code won't necessarily catch.
-bool ARM64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
-  if (N->getNumValues() != 1)
-    return false;
-  if (!N->hasNUsesOfValue(1, 0))
-    return false;
-
-  SDValue TCChain = Chain;
-  SDNode *Copy = *N->use_begin();
-  if (Copy->getOpcode() == ISD::CopyToReg) {
-    // If the copy has a glue operand, we conservatively assume it isn't safe to
-    // perform a tail call.
-    if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
-        MVT::Glue)
-      return false;
-    TCChain = Copy->getOperand(0);
-  } else if (Copy->getOpcode() != ISD::FP_EXTEND)
-    return false;
-
-  bool HasRet = false;
-  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
-       UI != UE; ++UI) {
-    if (UI->getOpcode() != ARM64ISD::RET_FLAG)
-      return false;
-    HasRet = true;
-  }
-
-  if (!HasRet)
-    return false;
-
-  Chain = TCChain;
-  return true;
-}
-
-// Return whether the an instruction can potentially be optimized to a tail
-// call. This will cause the optimizers to attempt to move, or duplicate,
-// return instructions to help enable tail call optimizations for this
-// instruction.
-bool ARM64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
-  if (!EnableARM64TailCalls)
-    return false;
-
-  if (!CI->isTailCall())
-    return false;
-
-  return true;
-}
-
-bool ARM64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
-                                                 SDValue &Offset,
-                                                 ISD::MemIndexedMode &AM,
-                                                 bool &IsInc,
-                                                 SelectionDAG &DAG) const {
-  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
-    return false;
-
-  Base = Op->getOperand(0);
-  // All of the indexed addressing mode instructions take a signed
-  // 9 bit immediate offset.
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
-    int64_t RHSC = (int64_t)RHS->getZExtValue();
-    if (RHSC >= 256 || RHSC <= -256)
-      return false;
-    IsInc = (Op->getOpcode() == ISD::ADD);
-    Offset = Op->getOperand(1);
-    return true;
-  }
-  return false;
-}
-
-bool ARM64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
-                                                    SDValue &Offset,
-                                                    ISD::MemIndexedMode &AM,
-                                                    SelectionDAG &DAG) const {
-  EVT VT;
-  SDValue Ptr;
-  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
-    VT = LD->getMemoryVT();
-    Ptr = LD->getBasePtr();
-  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
-    VT = ST->getMemoryVT();
-    Ptr = ST->getBasePtr();
-  } else
-    return false;
-
-  bool IsInc;
-  if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
-    return false;
-  AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
-  return true;
-}
-
-bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
-                                                     SDValue &Base,
-                                                     SDValue &Offset,
-                                                     ISD::MemIndexedMode &AM,
-                                                     SelectionDAG &DAG) const {
-  EVT VT;
-  SDValue Ptr;
-  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
-    VT = LD->getMemoryVT();
-    Ptr = LD->getBasePtr();
-  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
-    VT = ST->getMemoryVT();
-    Ptr = ST->getBasePtr();
-  } else
-    return false;
-
-  bool IsInc;
-  if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
-    return false;
-  // Post-indexing updates the base, so it's not a valid transform
-  // if that's not the same as the load's pointer.
-  if (Ptr != Base)
-    return false;
-  AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
-  return true;
-}
-
-/// The only 128-bit atomic operation is an stxp that succeeds. In particular
-/// neither ldp nor ldxp are atomic. So the canonical sequence for an atomic
-/// load is:
-///     loop:
-///         ldxp x0, x1, [x8]
-///         stxp w2, x0, x1, [x8]
-///         cbnz w2, loop
-/// If the stxp succeeds then the ldxp managed to get both halves without an
-/// intervening stxp from a different thread and the read was atomic.
-static void ReplaceATOMIC_LOAD_128(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                   SelectionDAG &DAG) {
-  SDLoc DL(N);
-  AtomicSDNode *AN = cast<AtomicSDNode>(N);
-  EVT VT = AN->getMemoryVT();
-  SDValue Zero = DAG.getConstant(0, VT);
-
-  // FIXME: Really want ATOMIC_LOAD_NOP but that doesn't fit into the existing
-  // scheme very well. Given the complexity of what we're already generating, an
-  // extra couple of ORRs probably won't make much difference.
-  SDValue Result = DAG.getAtomic(ISD::ATOMIC_LOAD_OR, DL, AN->getMemoryVT(),
-                                 N->getOperand(0), N->getOperand(1), Zero,
-                                 AN->getMemOperand(), AN->getOrdering(),
-                                 AN->getSynchScope());
-
-  Results.push_back(Result.getValue(0)); // Value
-  Results.push_back(Result.getValue(1)); // Chain
-}
-
-static void ReplaceATOMIC_OP_128(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                 SelectionDAG &DAG, unsigned NewOp) {
-  SDLoc DL(N);
-  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
-  assert(N->getValueType(0) == MVT::i128 &&
-         "Only know how to expand i128 atomics");
-
-  SmallVector<SDValue, 6> Ops;
-  Ops.push_back(N->getOperand(1)); // Ptr
-  // Low part of Val1
-  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
-                            N->getOperand(2), DAG.getIntPtrConstant(0)));
-  // High part of Val1
-  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
-                            N->getOperand(2), DAG.getIntPtrConstant(1)));
-  if (NewOp == ARM64::ATOMIC_CMP_SWAP_I128) {
-    // Low part of Val2
-    Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
-                              N->getOperand(3), DAG.getIntPtrConstant(0)));
-    // High part of Val2
-    Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
-                              N->getOperand(3), DAG.getIntPtrConstant(1)));
-  }
-
-  Ops.push_back(DAG.getTargetConstant(Ordering, MVT::i32));
-  Ops.push_back(N->getOperand(0)); // Chain
-
-  SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
-  SDNode *Result = DAG.getMachineNode(NewOp, DL, Tys, Ops);
-  SDValue OpsF[] = { SDValue(Result, 0), SDValue(Result, 1) };
-  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, OpsF, 2));
-  Results.push_back(SDValue(Result, 2));
-}
-
-void ARM64TargetLowering::ReplaceNodeResults(SDNode *N,
-                                             SmallVectorImpl<SDValue> &Results,
-                                             SelectionDAG &DAG) const {
-  switch (N->getOpcode()) {
-  default:
-    llvm_unreachable("Don't know how to custom expand this");
-  case ISD::ATOMIC_LOAD:
-    ReplaceATOMIC_LOAD_128(N, Results, DAG);
-    return;
-  case ISD::ATOMIC_LOAD_ADD:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_ADD_I128);
-    return;
-  case ISD::ATOMIC_LOAD_SUB:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_SUB_I128);
-    return;
-  case ISD::ATOMIC_LOAD_AND:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_AND_I128);
-    return;
-  case ISD::ATOMIC_LOAD_OR:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_OR_I128);
-    return;
-  case ISD::ATOMIC_LOAD_XOR:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_XOR_I128);
-    return;
-  case ISD::ATOMIC_LOAD_NAND:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_NAND_I128);
-    return;
-  case ISD::ATOMIC_SWAP:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_SWAP_I128);
-    return;
-  case ISD::ATOMIC_LOAD_MIN:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MIN_I128);
-    return;
-  case ISD::ATOMIC_LOAD_MAX:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MAX_I128);
-    return;
-  case ISD::ATOMIC_LOAD_UMIN:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMIN_I128);
-    return;
-  case ISD::ATOMIC_LOAD_UMAX:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMAX_I128);
-    return;
-  case ISD::ATOMIC_CMP_SWAP:
-    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_CMP_SWAP_I128);
-    return;
-  case ISD::FP_TO_UINT:
-  case ISD::FP_TO_SINT:
-    assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
-    // Let normal code take care of it by not adding anything to Results.
-    return;
-  }
-}
diff --git a/lib/Target/ARM64/ARM64ISelLowering.h b/lib/Target/ARM64/ARM64ISelLowering.h
deleted file mode 100644
index a4664ac..0000000
--- a/lib/Target/ARM64/ARM64ISelLowering.h
+++ /dev/null
@@ -1,422 +0,0 @@
-//==-- ARM64ISelLowering.h - ARM64 DAG Lowering Interface --------*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that ARM64 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ARM64_ISELLOWERING_H
-#define LLVM_TARGET_ARM64_ISELLOWERING_H
-
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/Target/TargetLowering.h"
-
-namespace llvm {
-
-namespace ARM64ISD {
-
-enum {
-  FIRST_NUMBER = ISD::BUILTIN_OP_END,
-  WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
-  CALL,         // Function call.
-
-  // Almost the same as a normal call node, except that a TLSDesc relocation is
-  // needed so the linker can relax it correctly if possible.
-  TLSDESC_CALL,
-  ADRP,     // Page address of a TargetGlobalAddress operand.
-  ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
-  LOADgot,  // Load from automatically generated descriptor (e.g. Global
-            // Offset Table, TLS record).
-  RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
-  BRCOND,   // Conditional branch instruction; "b.cond".
-  CSEL,
-  FCSEL, // Conditional move instruction.
-  CSINV, // Conditional select invert.
-  CSNEG, // Conditional select negate.
-  CSINC, // Conditional select increment.
-
-  // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
-  // ELF.
-  THREAD_POINTER,
-  ADC,
-  SBC, // adc, sbc instructions
-
-  // Arithmetic instructions which write flags.
-  ADDS,
-  SUBS,
-  ADCS,
-  SBCS,
-  ANDS,
-
-  // Floating point comparison
-  FCMP,
-
-  // Floating point max and min instructions.
-  FMAX,
-  FMIN,
-
-  // Scalar extract
-  EXTR,
-
-  // Scalar-to-vector duplication
-  DUP,
-  DUPLANE8,
-  DUPLANE16,
-  DUPLANE32,
-  DUPLANE64,
-
-  // Vector immedate moves
-  MOVI,
-  MOVIshift,
-  MOVIedit,
-  MOVImsl,
-  FMOV,
-  MVNIshift,
-  MVNImsl,
-
-  // Vector immediate ops
-  BICi,
-  ORRi,
-
-  // Vector arithmetic negation
-  NEG,
-
-  // Vector shuffles
-  ZIP1,
-  ZIP2,
-  UZP1,
-  UZP2,
-  TRN1,
-  TRN2,
-  REV16,
-  REV32,
-  REV64,
-  EXT,
-
-  // Vector shift by scalar
-  VSHL,
-  VLSHR,
-  VASHR,
-
-  // Vector shift by scalar (again)
-  SQSHL_I,
-  UQSHL_I,
-  SQSHLU_I,
-  SRSHR_I,
-  URSHR_I,
-
-  // Vector comparisons
-  CMEQ,
-  CMGE,
-  CMGT,
-  CMHI,
-  CMHS,
-  FCMEQ,
-  FCMGE,
-  FCMGT,
-
-  // Vector zero comparisons
-  CMEQz,
-  CMGEz,
-  CMGTz,
-  CMLEz,
-  CMLTz,
-  FCMEQz,
-  FCMGEz,
-  FCMGTz,
-  FCMLEz,
-  FCMLTz,
-
-  // Vector bitwise negation
-  NOT,
-
-  // Vector bitwise selection
-  BIT,
-
-  // Compare-and-branch
-  CBZ,
-  CBNZ,
-  TBZ,
-  TBNZ,
-
-  // Tail calls
-  TC_RETURN,
-
-  // Custom prefetch handling
-  PREFETCH,
-
-  // {s|u}int to FP within a FP register.
-  SITOF,
-  UITOF
-};
-
-} // end namespace ARM64ISD
-
-class ARM64Subtarget;
-class ARM64TargetMachine;
-
-class ARM64TargetLowering : public TargetLowering {
-  bool RequireStrictAlign;
-
-public:
-  explicit ARM64TargetLowering(ARM64TargetMachine &TM);
-
-  /// Selects the correct CCAssignFn for a the given CallingConvention
-  /// value.
-  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
-
-  /// computeMaskedBitsForTargetNode - Determine which of the bits specified in
-  /// Mask are known to be either zero or one and return them in the
-  /// KnownZero/KnownOne bitsets.
-  void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero,
-                                      APInt &KnownOne, const SelectionDAG &DAG,
-                                      unsigned Depth = 0) const;
-
-  MVT getScalarShiftAmountTy(EVT LHSTy) const override;
-
-  /// allowsUnalignedMemoryAccesses - Returns true if the target allows
-  /// unaligned memory accesses. of the specified type.
-  bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
-                                     bool *Fast = 0) const override {
-    if (RequireStrictAlign)
-      return false;
-    // FIXME: True for Cyclone, but not necessary others.
-    if (Fast)
-      *Fast = true;
-    return true;
-  }
-
-  /// LowerOperation - Provide custom lowering hooks for some operations.
-  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
-
-  const char *getTargetNodeName(unsigned Opcode) const override;
-
-  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
-
-  /// getFunctionAlignment - Return the Log2 alignment of this function.
-  unsigned getFunctionAlignment(const Function *F) const;
-
-  /// getMaximalGlobalOffset - Returns the maximal possible offset which can
-  /// be used for loads / stores from the global.
-  unsigned getMaximalGlobalOffset() const override;
-
-  /// Returns true if a cast between SrcAS and DestAS is a noop.
-  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
-    // Addrspacecasts are always noops.
-    return true;
-  }
-
-  /// createFastISel - This method returns a target specific FastISel object,
-  /// or null if the target does not support "fast" ISel.
-  FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
-                           const TargetLibraryInfo *libInfo) const override;
-
-  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
-
-  bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
-
-  /// isShuffleMaskLegal - Return true if the given shuffle mask can be
-  /// codegen'd directly, or if it should be stack expanded.
-  bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
-
-  /// getSetCCResultType - Return the ISD::SETCC ValueType
-  EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
-
-  SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
-
-  MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
-                                      unsigned Size, unsigned BinOpcode) const;
-  MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
-                                       unsigned Size) const;
-  MachineBasicBlock *EmitAtomicBinary128(MachineInstr *MI,
-                                         MachineBasicBlock *BB,
-                                         unsigned BinOpcodeLo,
-                                         unsigned BinOpcodeHi) const;
-  MachineBasicBlock *EmitAtomicCmpSwap128(MachineInstr *MI,
-                                          MachineBasicBlock *BB) const;
-  MachineBasicBlock *EmitAtomicMinMax128(MachineInstr *MI,
-                                         MachineBasicBlock *BB,
-                                         unsigned CondCode) const;
-  MachineBasicBlock *EmitF128CSEL(MachineInstr *MI,
-                                  MachineBasicBlock *BB) const;
-
-  MachineBasicBlock *
-  EmitInstrWithCustomInserter(MachineInstr *MI,
-                              MachineBasicBlock *MBB) const override;
-
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
-                          unsigned Intrinsic) const override;
-
-  bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
-  bool isTruncateFree(EVT VT1, EVT VT2) const override;
-
-  bool isZExtFree(Type *Ty1, Type *Ty2) const override;
-  bool isZExtFree(EVT VT1, EVT VT2) const override;
-  bool isZExtFree(SDValue Val, EVT VT2) const override;
-
-  bool hasPairedLoad(Type *LoadedType,
-                     unsigned &RequiredAligment) const override;
-  bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
-
-  bool isLegalAddImmediate(int64_t) const override;
-  bool isLegalICmpImmediate(int64_t) const override;
-
-  EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
-                          bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
-                          MachineFunction &MF) const override;
-
-  /// isLegalAddressingMode - Return true if the addressing mode represented
-  /// by AM is legal for this target, for a load/store of the specified type.
-  bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
-
-  /// \brief Return the cost of the scaling factor used in the addressing
-  /// mode represented by AM for this target, for a load/store
-  /// of the specified type.
-  /// If the AM is supported, the return value must be >= 0.
-  /// If the AM is not supported, it returns a negative value.
-  int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
-
-  /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
-  /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
-  /// expanded to FMAs when this method returns true, otherwise fmuladd is
-  /// expanded to fmul + fadd.
-  bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
-
-  const uint16_t *getScratchRegisters(CallingConv::ID CC) const override;
-
-  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
-                                         Type *Ty) const override;
-
-private:
-  /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
-  /// make the right decision when generating code for different targets.
-  const ARM64Subtarget *Subtarget;
-
-  void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT);
-  void addDRTypeForNEON(MVT VT);
-  void addQRTypeForNEON(MVT VT);
-
-  SDValue
-  LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-                       const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
-                       SelectionDAG &DAG,
-                       SmallVectorImpl<SDValue> &InVals) const override;
-
-  SDValue LowerCall(CallLoweringInfo & /*CLI*/,
-                    SmallVectorImpl<SDValue> &InVals) const override;
-
-  SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
-                          CallingConv::ID CallConv, bool isVarArg,
-                          const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
-                          SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
-                          bool isThisReturn, SDValue ThisVal) const;
-
-  bool isEligibleForTailCallOptimization(
-      SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
-      bool isCalleeStructRet, bool isCallerStructRet,
-      const SmallVectorImpl<ISD::OutputArg> &Outs,
-      const SmallVectorImpl<SDValue> &OutVals,
-      const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
-
-  void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
-                           SDValue &Chain) const;
-
-  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
-                      bool isVarArg,
-                      const SmallVectorImpl<ISD::OutputArg> &Outs,
-                      LLVMContext &Context) const override;
-
-  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-                      const SmallVectorImpl<ISD::OutputArg> &Outs,
-                      const SmallVectorImpl<SDValue> &OutVals, SDLoc DL,
-                      SelectionDAG &DAG) const override;
-
-  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
-                              SelectionDAG &DAG) const;
-  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
-                        RTLIB::Libcall Call) const;
-  SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
-
-  ConstraintType getConstraintType(const std::string &Constraint) const;
-
-  /// Examine constraint string and operand type and determine a weight value.
-  /// The operand object must already have been set up with the operand type.
-  ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info,
-                                                  const char *constraint) const;
-
-  std::pair<unsigned, const TargetRegisterClass *>
-  getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
-  void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
-                                    std::vector<SDValue> &Ops,
-                                    SelectionDAG &DAG) const;
-
-  bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
-  bool mayBeEmittedAsTailCall(CallInst *CI) const;
-  bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
-                              ISD::MemIndexedMode &AM, bool &IsInc,
-                              SelectionDAG &DAG) const;
-  bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
-                                 ISD::MemIndexedMode &AM,
-                                 SelectionDAG &DAG) const;
-  bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
-                                  SDValue &Offset, ISD::MemIndexedMode &AM,
-                                  SelectionDAG &DAG) const;
-
-  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                          SelectionDAG &DAG) const;
-};
-
-namespace ARM64 {
-FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
-                         const TargetLibraryInfo *libInfo);
-} // end namespace ARM64
-
-} // end namespace llvm
-
-#endif // LLVM_TARGET_ARM64_ISELLOWERING_H
diff --git a/lib/Target/ARM64/ARM64InstrAtomics.td b/lib/Target/ARM64/ARM64InstrAtomics.td
deleted file mode 100644
index 0d36e06..0000000
--- a/lib/Target/ARM64/ARM64InstrAtomics.td
+++ /dev/null
@@ -1,293 +0,0 @@
-//===- ARM64InstrAtomics.td - ARM64 Atomic codegen support -*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// ARM64 Atomic operand code-gen constructs.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------
-// Atomic fences
-//===----------------------------------
-def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
-def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
-
-//===----------------------------------
-// Atomic loads
-//===----------------------------------
-
-// When they're actually atomic, only one addressing mode (GPR64sp) is
-// supported, but when they're relaxed and anything can be used, all the
-// standard modes would be valid and may give efficiency gains.
-
-// A atomic load operation that actually needs acquire semantics.
-class acquiring_load<PatFrag base>
-  : PatFrag<(ops node:$ptr), (base node:$ptr), [{
-  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
-  assert(Ordering != AcquireRelease && "unexpected load ordering");
-  return Ordering == Acquire || Ordering == SequentiallyConsistent;
-}]>;
-
-// An atomic load operation that does not need either acquire or release
-// semantics.
-class relaxed_load<PatFrag base>
-  : PatFrag<(ops node:$ptr), (base node:$ptr), [{
-  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
-  return Ordering == Monotonic || Ordering == Unordered;
-}]>;
-
-// 8-bit loads
-def : Pat<(acquiring_load<atomic_load_8>  GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
-def : Pat<(relaxed_load<atomic_load_8> ro_indexed8:$addr),
-          (LDRBBro ro_indexed8:$addr)>;
-def : Pat<(relaxed_load<atomic_load_8> am_indexed8:$addr),
-          (LDRBBui am_indexed8:$addr)>;
-def : Pat<(relaxed_load<atomic_load_8> am_unscaled8:$addr),
-          (LDURBBi am_unscaled8:$addr)>;
-
-// 16-bit loads
-def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
-def : Pat<(relaxed_load<atomic_load_16> ro_indexed16:$addr),
-          (LDRHHro ro_indexed16:$addr)>;
-def : Pat<(relaxed_load<atomic_load_16> am_indexed16:$addr),
-          (LDRHHui am_indexed16:$addr)>;
-def : Pat<(relaxed_load<atomic_load_16> am_unscaled16:$addr),
-          (LDURHHi am_unscaled16:$addr)>;
-
-// 32-bit loads
-def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>;
-def : Pat<(relaxed_load<atomic_load_32> ro_indexed32:$addr),
-          (LDRWro ro_indexed32:$addr)>;
-def : Pat<(relaxed_load<atomic_load_32> am_indexed32:$addr),
-          (LDRWui am_indexed32:$addr)>;
-def : Pat<(relaxed_load<atomic_load_32> am_unscaled32:$addr),
-          (LDURWi am_unscaled32:$addr)>;
-
-// 64-bit loads
-def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>;
-def : Pat<(relaxed_load<atomic_load_64> ro_indexed64:$addr),
-          (LDRXro ro_indexed64:$addr)>;
-def : Pat<(relaxed_load<atomic_load_64> am_indexed64:$addr),
-          (LDRXui am_indexed64:$addr)>;
-def : Pat<(relaxed_load<atomic_load_64> am_unscaled64:$addr),
-          (LDURXi am_unscaled64:$addr)>;
-
-//===----------------------------------
-// Atomic stores
-//===----------------------------------
-
-// When they're actually atomic, only one addressing mode (GPR64sp) is
-// supported, but when they're relaxed and anything can be used, all the
-// standard modes would be valid and may give efficiency gains.
-
-// A store operation that actually needs release semantics.
-class releasing_store<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
-  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
-  assert(Ordering != AcquireRelease && "unexpected store ordering");
-  return Ordering == Release || Ordering == SequentiallyConsistent;
-}]>;
-
-// An atomic store operation that doesn't actually need to be atomic on ARM64.
-class relaxed_store<PatFrag base>
-  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
-  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
-  return Ordering == Monotonic || Ordering == Unordered;
-}]>;
-
-// 8-bit stores
-def : Pat<(releasing_store<atomic_store_8> GPR64sp:$ptr, GPR32:$val),
-          (STLRB GPR32:$val, GPR64sp:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_8> ro_indexed8:$ptr, GPR32:$val),
-          (STRBBro GPR32:$val, ro_indexed8:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_8> am_indexed8:$ptr, GPR32:$val),
-          (STRBBui GPR32:$val, am_indexed8:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_8> am_unscaled8:$ptr, GPR32:$val),
-          (STURBBi GPR32:$val, am_unscaled8:$ptr)>;
-
-// 16-bit stores
-def : Pat<(releasing_store<atomic_store_16> GPR64sp:$ptr, GPR32:$val),
-          (STLRH GPR32:$val, GPR64sp:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_16> ro_indexed16:$ptr, GPR32:$val),
-          (STRHHro GPR32:$val, ro_indexed16:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_16> am_indexed16:$ptr, GPR32:$val),
-          (STRHHui GPR32:$val, am_indexed16:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_16> am_unscaled16:$ptr, GPR32:$val),
-          (STURHHi GPR32:$val, am_unscaled16:$ptr)>;
-
-// 32-bit stores
-def : Pat<(releasing_store<atomic_store_32> GPR64sp:$ptr, GPR32:$val),
-          (STLRW GPR32:$val, GPR64sp:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_32> ro_indexed32:$ptr, GPR32:$val),
-          (STRWro GPR32:$val, ro_indexed32:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_32> am_indexed32:$ptr, GPR32:$val),
-          (STRWui GPR32:$val, am_indexed32:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_32> am_unscaled32:$ptr, GPR32:$val),
-          (STURWi GPR32:$val, am_unscaled32:$ptr)>;
-
-// 64-bit stores
-def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val),
-          (STLRX GPR64:$val, GPR64sp:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_64> ro_indexed64:$ptr, GPR64:$val),
-          (STRXro GPR64:$val, ro_indexed64:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_64> am_indexed64:$ptr, GPR64:$val),
-          (STRXui GPR64:$val, am_indexed64:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_64> am_unscaled64:$ptr, GPR64:$val),
-          (STURXi GPR64:$val, am_unscaled64:$ptr)>;
-
-//===----------------------------------
-// Atomic read-modify-write operations
-//===----------------------------------
-
-// More complicated operations need lots of C++ support, so we just create
-// skeletons here for the C++ code to refer to.
-
-let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
-multiclass AtomicSizes {
-  def _I8 : Pseudo<(outs GPR32:$dst),
-                   (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
-  def _I16 : Pseudo<(outs GPR32:$dst),
-                    (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
-  def _I32 : Pseudo<(outs GPR32:$dst),
-                    (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
-  def _I64 : Pseudo<(outs GPR64:$dst),
-                    (ins GPR64sp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
-  def _I128 : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
-                     (ins GPR64sp:$ptr, GPR64:$incrlo, GPR64:$incrhi,
-                          i32imm:$ordering), []>;
-}
-}
-
-defm ATOMIC_LOAD_ADD  : AtomicSizes;
-defm ATOMIC_LOAD_SUB  : AtomicSizes;
-defm ATOMIC_LOAD_AND  : AtomicSizes;
-defm ATOMIC_LOAD_OR   : AtomicSizes;
-defm ATOMIC_LOAD_XOR  : AtomicSizes;
-defm ATOMIC_LOAD_NAND : AtomicSizes;
-defm ATOMIC_SWAP      : AtomicSizes;
-let Defs = [CPSR] in {
-  // These operations need a CMP to calculate the correct value
-  defm ATOMIC_LOAD_MIN  : AtomicSizes;
-  defm ATOMIC_LOAD_MAX  : AtomicSizes;
-  defm ATOMIC_LOAD_UMIN : AtomicSizes;
-  defm ATOMIC_LOAD_UMAX : AtomicSizes;
-}
-
-class AtomicCmpSwap<RegisterClass GPRData>
-  : Pseudo<(outs GPRData:$dst),
-           (ins GPR64sp:$ptr, GPRData:$old, GPRData:$new,
-                i32imm:$ordering), []> {
-  let usesCustomInserter = 1;
-  let hasCtrlDep = 1;
-  let mayLoad = 1;
-  let mayStore = 1;
-  let Defs = [CPSR];
-}
-
-def ATOMIC_CMP_SWAP_I8  : AtomicCmpSwap<GPR32>;
-def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
-def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
-def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
-
-def ATOMIC_CMP_SWAP_I128
-  : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
-           (ins GPR64sp:$ptr, GPR64:$oldlo, GPR64:$oldhi,
-                GPR64:$newlo, GPR64:$newhi, i32imm:$ordering), []> {
-  let usesCustomInserter = 1;
-  let hasCtrlDep = 1;
-  let mayLoad = 1;
-  let mayStore = 1;
-  let Defs = [CPSR];
-}
-
-//===----------------------------------
-// Low-level exclusive operations
-//===----------------------------------
-
-// Load-exclusives.
-
-def ldxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
-  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
-
-def ldxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
-  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
-
-def ldxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
-  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
-
-def ldxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
-  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
-
-def : Pat<(ldxr_1 am_noindex:$addr),
-          (SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>;
-def : Pat<(ldxr_2 am_noindex:$addr),
-          (SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>;
-def : Pat<(ldxr_4 am_noindex:$addr),
-          (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>;
-def : Pat<(ldxr_8 am_noindex:$addr), (LDXRX am_noindex:$addr)>;
-
-def : Pat<(and (ldxr_1 am_noindex:$addr), 0xff),
-          (SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>;
-def : Pat<(and (ldxr_2 am_noindex:$addr), 0xffff),
-          (SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>;
-def : Pat<(and (ldxr_4 am_noindex:$addr), 0xffffffff),
-          (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>;
-
-// Store-exclusives.
-
-def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stxr node:$val, node:$ptr), [{
-  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
-
-def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stxr node:$val, node:$ptr), [{
-  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
-
-def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stxr node:$val, node:$ptr), [{
-  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
-
-def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
-                     (int_arm64_stxr node:$val, node:$ptr), [{
-  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
-
-def : Pat<(stxr_1 GPR64:$val, am_noindex:$addr),
-          (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-def : Pat<(stxr_2 GPR64:$val, am_noindex:$addr),
-          (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-def : Pat<(stxr_4 GPR64:$val, am_noindex:$addr),
-          (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-def : Pat<(stxr_8 GPR64:$val, am_noindex:$addr),
-          (STXRX GPR64:$val, am_noindex:$addr)>;
-
-def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), am_noindex:$addr),
-          (STXRB GPR32:$val, am_noindex:$addr)>;
-def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), am_noindex:$addr),
-          (STXRH GPR32:$val, am_noindex:$addr)>;
-def : Pat<(stxr_4 (zext GPR32:$val), am_noindex:$addr),
-          (STXRW GPR32:$val, am_noindex:$addr)>;
-
-def : Pat<(stxr_1 (and GPR64:$val, 0xff), am_noindex:$addr),
-          (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-def : Pat<(stxr_2 (and GPR64:$val, 0xffff), am_noindex:$addr),
-          (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), am_noindex:$addr),
-          (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-
-
-// And clear exclusive.
-
-def : Pat<(int_arm64_clrex), (CLREX 0xf)>;
diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td
deleted file mode 100644
index 38406f8..0000000
--- a/lib/Target/ARM64/ARM64InstrFormats.td
+++ /dev/null
@@ -1,8193 +0,0 @@
-//===- ARM64InstrFormats.td - ARM64 Instruction Formats ------*- tblgen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//  Describe ARM64 instructions format here
-//
-
-// Format specifies the encoding used by the instruction.  This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-class Format<bits<2> val> {
-  bits<2> Value = val;
-}
-
-def PseudoFrm   : Format<0>;
-def NormalFrm   : Format<1>; // Do we need any others?
-
-// ARM64 Instruction Format
-class ARM64Inst<Format f, string cstr> : Instruction {
-  field bits<32> Inst; // Instruction encoding.
-  // Mask of bits that cause an encoding to be UNPREDICTABLE.
-  // If a bit is set, then if the corresponding bit in the
-  // target encoding differs from its value in the "Inst" field,
-  // the instruction is UNPREDICTABLE (SoftFail in abstract parlance).
-  field bits<32> Unpredictable = 0;
-  // SoftFail is the generic name for this field, but we alias it so
-  // as to make it more obvious what it means in ARM-land.
-  field bits<32> SoftFail = Unpredictable;
-  let Namespace   = "ARM64";
-  Format F        = f;
-  bits<2> Form    = F.Value;
-  let Pattern     = [];
-  let Constraints = cstr;
-}
-
-// Pseudo instructions (don't have encoding information)
-class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = "">
-    : ARM64Inst<PseudoFrm, cstr> {
-  dag OutOperandList = oops;
-  dag InOperandList  = iops;
-  let Pattern        = pattern;
-  let isCodeGenOnly  = 1;
-}
-
-// Real instructions (have encoding information)
-class EncodedI<string cstr, list<dag> pattern> : ARM64Inst<NormalFrm, cstr> {
-  let Pattern = pattern;
-  let Size = 4;
-}
-
-// Normal instructions
-class I<dag oops, dag iops, string asm, string operands, string cstr,
-        list<dag> pattern>
-    : EncodedI<cstr, pattern> {
-  dag OutOperandList = oops;
-  dag InOperandList  = iops;
-  let AsmString      = !strconcat(asm, operands);
-}
-
-class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
-class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
-class UnOpFrag<dag res>  : PatFrag<(ops node:$LHS), res>;
-
-// Helper fragment for an extract of the high portion of a 128-bit vector.
-def extract_high_v16i8 :
-   UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>;
-def extract_high_v8i16 :
-   UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>;
-def extract_high_v4i32 :
-   UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>;
-def extract_high_v2i64 :
-   UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>;
-
-//===----------------------------------------------------------------------===//
-// Asm Operand Classes.
-//
-
-// Shifter operand for arithmetic shifted encodings.
-def ShifterOperand : AsmOperandClass {
-  let Name = "Shifter";
-}
-
-// Shifter operand for mov immediate encodings.
-def MovImm32ShifterOperand : AsmOperandClass {
-  let SuperClasses = [ShifterOperand];
-  let Name = "MovImm32Shifter";
-}
-def MovImm64ShifterOperand : AsmOperandClass {
-  let SuperClasses = [ShifterOperand];
-  let Name = "MovImm64Shifter";
-}
-
-// Shifter operand for arithmetic register shifted encodings.
-def ArithmeticShifterOperand : AsmOperandClass {
-  let SuperClasses = [ShifterOperand];
-  let Name = "ArithmeticShifter";
-}
-
-// Shifter operand for arithmetic shifted encodings for ADD/SUB instructions.
-def AddSubShifterOperand : AsmOperandClass {
-  let SuperClasses = [ArithmeticShifterOperand];
-  let Name = "AddSubShifter";
-}
-
-// Shifter operand for logical vector 128/64-bit shifted encodings.
-def LogicalVecShifterOperand : AsmOperandClass {
-  let SuperClasses = [ShifterOperand];
-  let Name = "LogicalVecShifter";
-}
-def LogicalVecHalfWordShifterOperand : AsmOperandClass {
-  let SuperClasses = [LogicalVecShifterOperand];
-  let Name = "LogicalVecHalfWordShifter";
-}
-
-// The "MSL" shifter on the vector MOVI instruction.
-def MoveVecShifterOperand : AsmOperandClass {
-  let SuperClasses = [ShifterOperand];
-  let Name = "MoveVecShifter";
-}
-
-// Extend operand for arithmetic encodings.
-def ExtendOperand : AsmOperandClass { let Name = "Extend"; }
-def ExtendOperand64 : AsmOperandClass {
-  let SuperClasses = [ExtendOperand];
-  let Name = "Extend64";
-}
-// 'extend' that's a lsl of a 64-bit register.
-def ExtendOperandLSL64 : AsmOperandClass {
-  let SuperClasses = [ExtendOperand];
-  let Name = "ExtendLSL64";
-}
-
-// 8-bit floating-point immediate encodings.
-def FPImmOperand : AsmOperandClass {
-  let Name = "FPImm";
-  let ParserMethod = "tryParseFPImm";
-}
-
-// 8-bit immediate for AdvSIMD where 64-bit values of the form:
-// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
-// are encoded as the eight bit value 'abcdefgh'.
-def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; }
-
-
-//===----------------------------------------------------------------------===//
-// Operand Definitions.
-//
-
-// ADR[P] instruction labels.
-def AdrpOperand : AsmOperandClass {
-  let Name = "AdrpLabel";
-  let ParserMethod = "tryParseAdrpLabel";
-}
-def adrplabel : Operand<i64> {
-  let EncoderMethod = "getAdrLabelOpValue";
-  let PrintMethod = "printAdrpLabel";
-  let ParserMatchClass = AdrpOperand;
-}
-
-def AdrOperand : AsmOperandClass {
-  let Name = "AdrLabel";
-  let ParserMethod = "tryParseAdrLabel";
-}
-def adrlabel : Operand<i64> {
-  let EncoderMethod = "getAdrLabelOpValue";
-  let ParserMatchClass = AdrOperand;
-}
-
-// simm9 predicate - True if the immediate is in the range [-256, 255].
-def SImm9Operand : AsmOperandClass {
-  let Name = "SImm9";
-  let DiagnosticType = "InvalidMemoryIndexedSImm9";
-}
-def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
-  let ParserMatchClass = SImm9Operand;
-}
-
-// simm7s4 predicate - True if the immediate is a multiple of 4 in the range
-// [-256, 252].
-def SImm7s4Operand : AsmOperandClass {
-  let Name = "SImm7s4";
-  let DiagnosticType = "InvalidMemoryIndexed32SImm7";
-}
-def simm7s4 : Operand<i32> {
-  let ParserMatchClass = SImm7s4Operand;
-  let PrintMethod = "printImmScale4";
-}
-
-// simm7s8 predicate - True if the immediate is a multiple of 8 in the range
-// [-512, 504].
-def SImm7s8Operand : AsmOperandClass {
-  let Name = "SImm7s8";
-  let DiagnosticType = "InvalidMemoryIndexed64SImm7";
-}
-def simm7s8 : Operand<i32> {
-  let ParserMatchClass = SImm7s8Operand;
-  let PrintMethod = "printImmScale8";
-}
-
-// simm7s16 predicate - True if the immediate is a multiple of 16 in the range
-// [-1024, 1008].
-def SImm7s16Operand : AsmOperandClass {
-  let Name = "SImm7s16";
-  let DiagnosticType = "InvalidMemoryIndexed64SImm7";
-}
-def simm7s16 : Operand<i32> {
-  let ParserMatchClass = SImm7s16Operand;
-  let PrintMethod = "printImmScale16";
-}
-
-// imm0_65535 predicate - True if the immediate is in the range [0,65535].
-def Imm0_65535Operand : AsmOperandClass { let Name = "Imm0_65535"; }
-def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
-  return ((uint32_t)Imm) < 65536;
-}]> {
-  let ParserMatchClass = Imm0_65535Operand;
-}
-
-def Imm1_8Operand : AsmOperandClass {
-  let Name = "Imm1_8";
-  let DiagnosticType = "InvalidImm1_8";
-}
-def Imm1_16Operand : AsmOperandClass {
-  let Name = "Imm1_16";
-  let DiagnosticType = "InvalidImm1_16";
-}
-def Imm1_32Operand : AsmOperandClass {
-  let Name = "Imm1_32";
-  let DiagnosticType = "InvalidImm1_32";
-}
-def Imm1_64Operand : AsmOperandClass {
-  let Name = "Imm1_64";
-  let DiagnosticType = "InvalidImm1_64";
-}
-
-def MovZSymbolG3AsmOperand : AsmOperandClass {
-  let Name = "MovZSymbolG3";
-  let RenderMethod = "addImmOperands";
-}
-
-def movz_symbol_g3 : Operand<i32> {
-  let ParserMatchClass = MovZSymbolG3AsmOperand;
-}
-
-def MovZSymbolG2AsmOperand : AsmOperandClass {
-  let Name = "MovZSymbolG2";
-  let RenderMethod = "addImmOperands";
-}
-
-def movz_symbol_g2 : Operand<i32> {
-  let ParserMatchClass = MovZSymbolG2AsmOperand;
-}
-
-def MovZSymbolG1AsmOperand : AsmOperandClass {
-  let Name = "MovZSymbolG1";
-  let RenderMethod = "addImmOperands";
-}
-
-def movz_symbol_g1 : Operand<i32> {
-  let ParserMatchClass = MovZSymbolG1AsmOperand;
-}
-
-def MovZSymbolG0AsmOperand : AsmOperandClass {
-  let Name = "MovZSymbolG0";
-  let RenderMethod = "addImmOperands";
-}
-
-def movz_symbol_g0 : Operand<i32> {
-  let ParserMatchClass = MovZSymbolG0AsmOperand;
-}
-
-def MovKSymbolG2AsmOperand : AsmOperandClass {
-  let Name = "MovKSymbolG2";
-  let RenderMethod = "addImmOperands";
-}
-
-def movk_symbol_g2 : Operand<i32> {
-  let ParserMatchClass = MovKSymbolG2AsmOperand;
-}
-
-def MovKSymbolG1AsmOperand : AsmOperandClass {
-  let Name = "MovKSymbolG1";
-  let RenderMethod = "addImmOperands";
-}
-
-def movk_symbol_g1 : Operand<i32> {
-  let ParserMatchClass = MovKSymbolG1AsmOperand;
-}
-
-def MovKSymbolG0AsmOperand : AsmOperandClass {
-  let Name = "MovKSymbolG0";
-  let RenderMethod = "addImmOperands";
-}
-
-def movk_symbol_g0 : Operand<i32> {
-  let ParserMatchClass = MovKSymbolG0AsmOperand;
-}
-
-def fixedpoint32 : Operand<i32> {
-  let EncoderMethod = "getFixedPointScaleOpValue";
-  let DecoderMethod = "DecodeFixedPointScaleImm";
-  let ParserMatchClass = Imm1_32Operand;
-}
-def fixedpoint64 : Operand<i64> {
-  let EncoderMethod = "getFixedPointScaleOpValue";
-  let DecoderMethod = "DecodeFixedPointScaleImm";
-  let ParserMatchClass = Imm1_64Operand;
-}
-
-def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
-}]> {
-  let EncoderMethod = "getVecShiftR8OpValue";
-  let DecoderMethod = "DecodeVecShiftR8Imm";
-  let ParserMatchClass = Imm1_8Operand;
-}
-def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
-}]> {
-  let EncoderMethod = "getVecShiftR16OpValue";
-  let DecoderMethod = "DecodeVecShiftR16Imm";
-  let ParserMatchClass = Imm1_16Operand;
-}
-def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
-}]> {
-  let EncoderMethod = "getVecShiftR16OpValue";
-  let DecoderMethod = "DecodeVecShiftR16ImmNarrow";
-  let ParserMatchClass = Imm1_8Operand;
-}
-def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
-}]> {
-  let EncoderMethod = "getVecShiftR32OpValue";
-  let DecoderMethod = "DecodeVecShiftR32Imm";
-  let ParserMatchClass = Imm1_32Operand;
-}
-def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
-}]> {
-  let EncoderMethod = "getVecShiftR32OpValue";
-  let DecoderMethod = "DecodeVecShiftR32ImmNarrow";
-  let ParserMatchClass = Imm1_16Operand;
-}
-def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
-}]> {
-  let EncoderMethod = "getVecShiftR64OpValue";
-  let DecoderMethod = "DecodeVecShiftR64Imm";
-  let ParserMatchClass = Imm1_64Operand;
-}
-def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
-}]> {
-  let EncoderMethod = "getVecShiftR64OpValue";
-  let DecoderMethod = "DecodeVecShiftR64ImmNarrow";
-  let ParserMatchClass = Imm1_32Operand;
-}
-
-def Imm0_7Operand : AsmOperandClass { let Name = "Imm0_7"; }
-def Imm0_15Operand : AsmOperandClass { let Name = "Imm0_15"; }
-def Imm0_31Operand : AsmOperandClass { let Name = "Imm0_31"; }
-def Imm0_63Operand : AsmOperandClass { let Name = "Imm0_63"; }
-
-def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) < 8);
-}]> {
-  let EncoderMethod = "getVecShiftL8OpValue";
-  let DecoderMethod = "DecodeVecShiftL8Imm";
-  let ParserMatchClass = Imm0_7Operand;
-}
-def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) < 16);
-}]> {
-  let EncoderMethod = "getVecShiftL16OpValue";
-  let DecoderMethod = "DecodeVecShiftL16Imm";
-  let ParserMatchClass = Imm0_15Operand;
-}
-def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) < 32);
-}]> {
-  let EncoderMethod = "getVecShiftL32OpValue";
-  let DecoderMethod = "DecodeVecShiftL32Imm";
-  let ParserMatchClass = Imm0_31Operand;
-}
-def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
-  return (((uint32_t)Imm) < 64);
-}]> {
-  let EncoderMethod = "getVecShiftL64OpValue";
-  let DecoderMethod = "DecodeVecShiftL64Imm";
-  let ParserMatchClass = Imm0_63Operand;
-}
-
-
-// Crazy immediate formats used by 32-bit and 64-bit logical immediate
-// instructions for splatting repeating bit patterns across the immediate.
-def logical_imm32_XFORM : SDNodeXForm<imm, [{
-  uint64_t enc = ARM64_AM::encodeLogicalImmediate(N->getZExtValue(), 32);
-  return CurDAG->getTargetConstant(enc, MVT::i32);
-}]>;
-def logical_imm64_XFORM : SDNodeXForm<imm, [{
-  uint64_t enc = ARM64_AM::encodeLogicalImmediate(N->getZExtValue(), 64);
-  return CurDAG->getTargetConstant(enc, MVT::i32);
-}]>;
-
-def LogicalImm32Operand : AsmOperandClass { let Name = "LogicalImm32"; }
-def LogicalImm64Operand : AsmOperandClass { let Name = "LogicalImm64"; }
-def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{
-  return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 32);
-}], logical_imm32_XFORM> {
-  let PrintMethod = "printLogicalImm32";
-  let ParserMatchClass = LogicalImm32Operand;
-}
-def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{
-  return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 64);
-}], logical_imm64_XFORM> {
-  let PrintMethod = "printLogicalImm64";
-  let ParserMatchClass = LogicalImm64Operand;
-}
-
-// imm0_255 predicate - True if the immediate is in the range [0,255].
-def Imm0_255Operand : AsmOperandClass { let Name = "Imm0_255"; }
-def imm0_255 : Operand<i32>, ImmLeaf<i32, [{
-  return ((uint32_t)Imm) < 256;
-}]> {
-  let ParserMatchClass = Imm0_255Operand;
-}
-
-// imm0_127 predicate - True if the immediate is in the range [0,127]
-def Imm0_127Operand : AsmOperandClass { let Name = "Imm0_127"; }
-def imm0_127 : Operand<i32>, ImmLeaf<i32, [{
-  return ((uint32_t)Imm) < 128;
-}]> {
-  let ParserMatchClass = Imm0_127Operand;
-}
-
-// NOTE: These imm0_N operands have to be of type i64 because i64 is the size
-// for all shift-amounts.
-
-// imm0_63 predicate - True if the immediate is in the range [0,63]
-def imm0_63 : Operand<i64>, ImmLeaf<i64, [{
-  return ((uint64_t)Imm) < 64;
-}]> {
-  let ParserMatchClass = Imm0_63Operand;
-}
-
-// imm0_31 predicate - True if the immediate is in the range [0,31]
-def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
-  return ((uint64_t)Imm) < 32;
-}]> {
-  let ParserMatchClass = Imm0_31Operand;
-}
-
-// imm0_15 predicate - True if the immediate is in the range [0,15]
-def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
-  return ((uint64_t)Imm) < 16;
-}]> {
-  let ParserMatchClass = Imm0_15Operand;
-}
-
-// imm0_7 predicate - True if the immediate is in the range [0,7]
-def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
-  return ((uint64_t)Imm) < 8;
-}]> {
-  let ParserMatchClass = Imm0_7Operand;
-}
-
-// An arithmetic shifter operand:
-//  {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
-//  {5-0} - imm6
-def arith_shift : Operand<i32> {
-  let PrintMethod = "printShifter";
-  let ParserMatchClass = ArithmeticShifterOperand;
-}
-
-class arith_shifted_reg<ValueType Ty, RegisterClass regclass>
-    : Operand<Ty>,
-      ComplexPattern<Ty, 2, "SelectArithShiftedRegister", []> {
-  let PrintMethod = "printShiftedRegister";
-  let MIOperandInfo = (ops regclass, arith_shift);
-}
-
-def arith_shifted_reg32 : arith_shifted_reg<i32, GPR32>;
-def arith_shifted_reg64 : arith_shifted_reg<i64, GPR64>;
-
-// An arithmetic shifter operand:
-//  {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror
-//  {5-0} - imm6
-def logical_shift : Operand<i32> {
-  let PrintMethod = "printShifter";
-  let ParserMatchClass = ShifterOperand;
-}
-
-class logical_shifted_reg<ValueType Ty, RegisterClass regclass>
-    : Operand<Ty>,
-      ComplexPattern<Ty, 2, "SelectLogicalShiftedRegister", []> {
-  let PrintMethod = "printShiftedRegister";
-  let MIOperandInfo = (ops regclass, logical_shift);
-}
-
-def logical_shifted_reg32 : logical_shifted_reg<i32, GPR32>;
-def logical_shifted_reg64 : logical_shifted_reg<i64, GPR64>;
-
-// A logical vector shifter operand:
-//  {7-6} - shift type: 00 = lsl
-//  {5-0} - imm6: #0, #8, #16, or #24
-def logical_vec_shift : Operand<i32> {
-  let PrintMethod = "printShifter";
-  let EncoderMethod = "getVecShifterOpValue";
-  let ParserMatchClass = LogicalVecShifterOperand;
-}
-
-// A logical vector half-word shifter operand:
-//  {7-6} - shift type: 00 = lsl
-//  {5-0} - imm6: #0 or #8
-def logical_vec_hw_shift : Operand<i32> {
-  let PrintMethod = "printShifter";
-  let EncoderMethod = "getVecShifterOpValue";
-  let ParserMatchClass = LogicalVecHalfWordShifterOperand;
-}
-
-// A vector move shifter operand:
-//  {0} - imm1: #8 or #16
-def move_vec_shift : Operand<i32> {
-  let PrintMethod = "printShifter";
-  let EncoderMethod = "getMoveVecShifterOpValue";
-  let ParserMatchClass = MoveVecShifterOperand;
-}
-
-// An ADD/SUB immediate shifter operand:
-//  {7-6} - shift type: 00 = lsl
-//  {5-0} - imm6: #0 or #12
-def addsub_shift : Operand<i32> {
-  let ParserMatchClass = AddSubShifterOperand;
-}
-
-class addsub_shifted_imm<ValueType Ty>
-    : Operand<Ty>, ComplexPattern<Ty, 2, "SelectArithImmed", [imm]> {
-  let PrintMethod = "printAddSubImm";
-  let EncoderMethod = "getAddSubImmOpValue";
-  let MIOperandInfo = (ops i32imm, addsub_shift);
-}
-
-def addsub_shifted_imm32 : addsub_shifted_imm<i32>;
-def addsub_shifted_imm64 : addsub_shifted_imm<i64>;
-
-class neg_addsub_shifted_imm<ValueType Ty>
-    : Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> {
-  let PrintMethod = "printAddSubImm";
-  let EncoderMethod = "getAddSubImmOpValue";
-  let MIOperandInfo = (ops i32imm, addsub_shift);
-}
-
-def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm<i32>;
-def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm<i64>;
-
-// An extend operand:
-//  {5-3} - extend type
-//  {2-0} - imm3
-def arith_extend : Operand<i32> {
-  let PrintMethod = "printExtend";
-  let ParserMatchClass = ExtendOperand;
-}
-def arith_extend64 : Operand<i32> {
-  let PrintMethod = "printExtend";
-  let ParserMatchClass = ExtendOperand64;
-}
-
-// 'extend' that's a lsl of a 64-bit register.
-def arith_extendlsl64 : Operand<i32> {
-  let PrintMethod = "printExtend";
-  let ParserMatchClass = ExtendOperandLSL64;
-}
-
-class arith_extended_reg32<ValueType Ty> : Operand<Ty>,
-                    ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> {
-  let PrintMethod = "printExtendedRegister";
-  let MIOperandInfo = (ops GPR32, arith_extend);
-}
-
-class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
-                    ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> {
-  let PrintMethod = "printExtendedRegister";
-  let MIOperandInfo = (ops GPR32, arith_extend64);
-}
-
-// Floating-point immediate.
-def fpimm32 : Operand<f32>,
-              PatLeaf<(f32 fpimm), [{
-      return ARM64_AM::getFP32Imm(N->getValueAPF()) != -1;
-    }], SDNodeXForm<fpimm, [{
-      APFloat InVal = N->getValueAPF();
-      uint32_t enc = ARM64_AM::getFP32Imm(InVal);
-      return CurDAG->getTargetConstant(enc, MVT::i32);
-    }]>> {
-  let ParserMatchClass = FPImmOperand;
-  let PrintMethod = "printFPImmOperand";
-}
-def fpimm64 : Operand<f64>,
-              PatLeaf<(f64 fpimm), [{
-      return ARM64_AM::getFP64Imm(N->getValueAPF()) != -1;
-    }], SDNodeXForm<fpimm, [{
-      APFloat InVal = N->getValueAPF();
-      uint32_t enc = ARM64_AM::getFP64Imm(InVal);
-      return CurDAG->getTargetConstant(enc, MVT::i32);
-    }]>> {
-  let ParserMatchClass = FPImmOperand;
-  let PrintMethod = "printFPImmOperand";
-}
-
-def fpimm8 : Operand<i32> {
-  let ParserMatchClass = FPImmOperand;
-  let PrintMethod = "printFPImmOperand";
-}
-
-def fpimm0 : PatLeaf<(fpimm), [{
-  return N->isExactlyValue(+0.0);
-}]>;
-
-// 8-bit immediate for AdvSIMD where 64-bit values of the form:
-// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
-// are encoded as the eight bit value 'abcdefgh'.
-def simdimmtype10 : Operand<i32>,
-                    PatLeaf<(f64 fpimm), [{
-      return ARM64_AM::isAdvSIMDModImmType10(N->getValueAPF()
-                                               .bitcastToAPInt()
-                                               .getZExtValue());
-    }], SDNodeXForm<fpimm, [{
-      APFloat InVal = N->getValueAPF();
-      uint32_t enc = ARM64_AM::encodeAdvSIMDModImmType10(N->getValueAPF()
-                                                           .bitcastToAPInt()
-                                                           .getZExtValue());
-      return CurDAG->getTargetConstant(enc, MVT::i32);
-    }]>> {
-  let ParserMatchClass = SIMDImmType10Operand;
-  let PrintMethod = "printSIMDType10Operand";
-}
-
-
-//---
-// Sytem management
-//---
-
-// Base encoding for system instruction operands.
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class BaseSystemI<bit L, dag oops, dag iops, string asm, string operands>
-    : I<oops, iops, asm, operands, "", []> {
-  let Inst{31-22} = 0b1101010100;
-  let Inst{21}    = L;
-}
-
-// System instructions which do not have an Rt register.
-class SimpleSystemI<bit L, dag iops, string asm, string operands>
-    : BaseSystemI<L, (outs), iops, asm, operands> {
-  let Inst{4-0} = 0b11111;
-}
-
-// System instructions which have an Rt register.
-class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
-    : BaseSystemI<L, oops, iops, asm, operands>,
-      Sched<[WriteSys]> {
-  bits<5> Rt;
-  let Inst{4-0} = Rt;
-}
-
-// Hint instructions that take both a CRm and a 3-bit immediate.
-class HintI<string mnemonic>
-    : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "">,
-      Sched<[WriteHint]> {
-  bits <7> imm;
-  let Inst{20-12} = 0b000110010;
-  let Inst{11-5} = imm;
-}
-
-// System instructions taking a single literal operand which encodes into
-// CRm. op2 differentiates the opcodes.
-def BarrierAsmOperand : AsmOperandClass {
-  let Name = "Barrier";
-  let ParserMethod = "tryParseBarrierOperand";
-}
-def barrier_op : Operand<i32> {
-  let PrintMethod = "printBarrierOption";
-  let ParserMatchClass = BarrierAsmOperand;
-}
-class CRmSystemI<Operand crmtype, bits<3> opc, string asm>
-    : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm">,
-      Sched<[WriteBarrier]> {
-  bits<4> CRm;
-  let Inst{20-12} = 0b000110011;
-  let Inst{11-8} = CRm;
-  let Inst{7-5} = opc;
-}
-
-// MRS/MSR system instructions.
-def SystemRegisterOperand : AsmOperandClass {
-  let Name = "SystemRegister";
-  let ParserMethod = "tryParseSystemRegister";
-}
-// concatenation of 1, op0, op1, CRn, CRm, op2. 16-bit immediate.
-def sysreg_op : Operand<i32> {
-  let ParserMatchClass = SystemRegisterOperand;
-  let DecoderMethod = "DecodeSystemRegister";
-  let PrintMethod = "printSystemRegister";
-}
-
-class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins sysreg_op:$systemreg),
-                       "mrs", "\t$Rt, $systemreg"> {
-  bits<15> systemreg;
-  let Inst{20} = 1;
-  let Inst{19-5} = systemreg;
-}
-
-// FIXME: Some of these def CPSR, others don't. Best way to model that?
-// Explicitly modeling each of the system register as a register class
-// would do it, but feels like overkill at this point.
-class MSRI : RtSystemI<0, (outs), (ins sysreg_op:$systemreg, GPR64:$Rt),
-                       "msr", "\t$systemreg, $Rt"> {
-  bits<15> systemreg;
-  let Inst{20} = 1;
-  let Inst{19-5} = systemreg;
-}
-
-def SystemCPSRFieldOperand : AsmOperandClass {
-  let Name = "SystemCPSRField";
-  let ParserMethod = "tryParseCPSRField";
-}
-def cpsrfield_op : Operand<i32> {
-  let ParserMatchClass = SystemCPSRFieldOperand;
-  let PrintMethod = "printSystemCPSRField";
-}
-
-let Defs = [CPSR] in
-class MSRcpsrI : SimpleSystemI<0, (ins cpsrfield_op:$cpsr_field, imm0_15:$imm),
-                               "msr", "\t$cpsr_field, $imm">,
-                 Sched<[WriteSys]> {
-  bits<6> cpsrfield;
-  bits<4> imm;
-  let Inst{20-19} = 0b00;
-  let Inst{18-16} = cpsrfield{5-3};
-  let Inst{15-12} = 0b0100;
-  let Inst{11-8} = imm;
-  let Inst{7-5} = cpsrfield{2-0};
-
-  let DecoderMethod = "DecodeSystemCPSRInstruction";
-}
-
-// SYS and SYSL generic system instructions.
-def SysCRAsmOperand : AsmOperandClass {
-  let Name = "SysCR";
-  let ParserMethod = "tryParseSysCROperand";
-}
-
-def sys_cr_op : Operand<i32> {
-  let PrintMethod = "printSysCROperand";
-  let ParserMatchClass = SysCRAsmOperand;
-}
-
-class SystemI<bit L, string asm>
-  : SimpleSystemI<L,
-                  (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2),
-                   asm, "\t$op1, $Cn, $Cm, $op2">,
-    Sched<[WriteSys]> {
-  bits<3> op1;
-  bits<4> Cn;
-  bits<4> Cm;
-  bits<3> op2;
-  let Inst{20-19} = 0b01;
-  let Inst{18-16} = op1;
-  let Inst{15-12} = Cn;
-  let Inst{11-8}  = Cm;
-  let Inst{7-5}   = op2;
-}
-
-class SystemXtI<bit L, string asm>
-  : RtSystemI<L, (outs),
-       (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, GPR64:$Rt),
-       asm, "\t$op1, $Cn, $Cm, $op2, $Rt"> {
-  bits<3> op1;
-  bits<4> Cn;
-  bits<4> Cm;
-  bits<3> op2;
-  let Inst{20-19} = 0b01;
-  let Inst{18-16} = op1;
-  let Inst{15-12} = Cn;
-  let Inst{11-8}  = Cm;
-  let Inst{7-5}   = op2;
-}
-
-class SystemLXtI<bit L, string asm>
-  : RtSystemI<L, (outs),
-       (ins GPR64:$Rt, imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2),
-       asm, "\t$Rt, $op1, $Cn, $Cm, $op2"> {
-  bits<3> op1;
-  bits<4> Cn;
-  bits<4> Cm;
-  bits<3> op2;
-  let Inst{20-19} = 0b01;
-  let Inst{18-16} = op1;
-  let Inst{15-12} = Cn;
-  let Inst{11-8}  = Cm;
-  let Inst{7-5}   = op2;
-}
-
-
-// Branch (register) instructions:
-//
-//  case opc of
-//    0001 blr
-//    0000 br
-//    0101 dret
-//    0100 eret
-//    0010 ret
-//    otherwise UNDEFINED
-class BaseBranchReg<bits<4> opc, dag oops, dag iops, string asm,
-                    string operands, list<dag> pattern>
-    : I<oops, iops, asm, operands, "", pattern>, Sched<[WriteBrReg]> {
-  let Inst{31-25} = 0b1101011;
-  let Inst{24-21} = opc;
-  let Inst{20-16} = 0b11111;
-  let Inst{15-10} = 0b000000;
-  let Inst{4-0}   = 0b00000;
-}
-
-class BranchReg<bits<4> opc, string asm, list<dag> pattern>
-    : BaseBranchReg<opc, (outs), (ins GPR64:$Rn), asm, "\t$Rn", pattern> {
-  bits<5> Rn;
-  let Inst{9-5} = Rn;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1, isReturn = 1 in
-class SpecialReturn<bits<4> opc, string asm>
-    : BaseBranchReg<opc, (outs), (ins), asm, "", []> {
-  let Inst{9-5} = 0b11111;
-}
-
-//---
-// Conditional branch instruction.
-//---
-// Branch condition code.
-// 4-bit immediate. Pretty-printed as .<cc>
-def dotCcode : Operand<i32> {
-  let PrintMethod = "printDotCondCode";
-}
-
-// Conditional branch target. 19-bit immediate. The low two bits of the target
-// offset are implied zero and so are not part of the immediate.
-def BranchTarget19Operand : AsmOperandClass {
-  let Name = "BranchTarget19";
-}
-def am_brcond : Operand<OtherVT> {
-  let EncoderMethod = "getCondBranchTargetOpValue";
-  let DecoderMethod = "DecodeCondBranchTarget";
-  let PrintMethod = "printAlignedBranchTarget";
-  let ParserMatchClass = BranchTarget19Operand;
-}
-
-class BranchCond : I<(outs), (ins dotCcode:$cond, am_brcond:$target),
-                     "b", "$cond\t$target", "",
-                     [(ARM64brcond bb:$target, imm:$cond, CPSR)]>,
-                   Sched<[WriteBr]> {
-  let isBranch = 1;
-  let isTerminator = 1;
-  let Uses = [CPSR];
-
-  bits<4> cond;
-  bits<19> target;
-  let Inst{31-24} = 0b01010100;
-  let Inst{23-5} = target;
-  let Inst{4} = 0;
-  let Inst{3-0} = cond;
-}
-
-//---
-// Compare-and-branch instructions.
-//---
-class BaseCmpBranch<RegisterClass regtype, bit op, string asm, SDNode node>
-    : I<(outs), (ins regtype:$Rt, am_brcond:$target),
-         asm, "\t$Rt, $target", "",
-         [(node regtype:$Rt, bb:$target)]>,
-      Sched<[WriteBr]> {
-  let isBranch = 1;
-  let isTerminator = 1;
-
-  bits<5> Rt;
-  bits<19> target;
-  let Inst{30-25} = 0b011010;
-  let Inst{24}    = op;
-  let Inst{23-5}  = target;
-  let Inst{4-0}   = Rt;
-}
-
-multiclass CmpBranch<bit op, string asm, SDNode node> {
-  def W : BaseCmpBranch<GPR32, op, asm, node> {
-    let Inst{31} = 0;
-  }
-  def X : BaseCmpBranch<GPR64, op, asm, node> {
-    let Inst{31} = 1;
-  }
-}
-
-//---
-// Test-bit-and-branch instructions.
-//---
-// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of
-// the target offset are implied zero and so are not part of the immediate.
-def BranchTarget14Operand : AsmOperandClass {
-  let Name = "BranchTarget14";
-}
-def am_tbrcond : Operand<OtherVT> {
-  let EncoderMethod = "getTestBranchTargetOpValue";
-  let PrintMethod = "printAlignedBranchTarget";
-  let ParserMatchClass = BranchTarget14Operand;
-}
-
-class TestBranch<bit op, string asm, SDNode node>
-    : I<(outs), (ins GPR64:$Rt, imm0_63:$bit_off, am_tbrcond:$target),
-       asm, "\t$Rt, $bit_off, $target", "",
-       [(node GPR64:$Rt, imm0_63:$bit_off, bb:$target)]>,
-      Sched<[WriteBr]> {
-  let isBranch = 1;
-  let isTerminator = 1;
-
-  bits<5> Rt;
-  bits<6> bit_off;
-  bits<14> target;
-
-  let Inst{31}    = bit_off{5};
-  let Inst{30-25} = 0b011011;
-  let Inst{24}    = op;
-  let Inst{23-19} = bit_off{4-0};
-  let Inst{18-5}  = target;
-  let Inst{4-0}   = Rt;
-
-  let DecoderMethod = "DecodeTestAndBranch";
-}
-
-//---
-// Unconditional branch (immediate) instructions.
-//---
-def BranchTarget26Operand : AsmOperandClass {
-  let Name = "BranchTarget26";
-}
-def am_b_target : Operand<OtherVT> {
-  let EncoderMethod = "getBranchTargetOpValue";
-  let PrintMethod = "printAlignedBranchTarget";
-  let ParserMatchClass = BranchTarget26Operand;
-}
-def am_bl_target : Operand<i64> {
-  let EncoderMethod = "getBranchTargetOpValue";
-  let PrintMethod = "printAlignedBranchTarget";
-  let ParserMatchClass = BranchTarget26Operand;
-}
-
-class BImm<bit op, dag iops, string asm, list<dag> pattern>
-    : I<(outs), iops, asm, "\t$addr", "", pattern>, Sched<[WriteBr]> {
-  bits<26> addr;
-  let Inst{31}    = op;
-  let Inst{30-26} = 0b00101;
-  let Inst{25-0}  = addr;
-
-  let DecoderMethod = "DecodeUnconditionalBranch";
-}
-
-class BranchImm<bit op, string asm, list<dag> pattern>
-    : BImm<op, (ins am_b_target:$addr), asm, pattern>;
-class CallImm<bit op, string asm, list<dag> pattern>
-    : BImm<op, (ins am_bl_target:$addr), asm, pattern>;
-
-//---
-// Basic one-operand data processing instructions.
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseOneOperandData<bits<3> opc, RegisterClass regtype, string asm,
-                         SDPatternOperator node>
-  : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
-      [(set regtype:$Rd, (node regtype:$Rn))]>,
-    Sched<[WriteI]> {
-  bits<5> Rd;
-  bits<5> Rn;
-
-  let Inst{30-13} = 0b101101011000000000;
-  let Inst{12-10} = opc;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-multiclass OneOperandData<bits<3> opc, string asm,
-                          SDPatternOperator node = null_frag> {
-  def Wr : BaseOneOperandData<opc, GPR32, asm, node> {
-    let Inst{31} = 0;
-  }
-
-  def Xr : BaseOneOperandData<opc, GPR64, asm, node> {
-    let Inst{31} = 1;
-  }
-}
-
-class OneWRegData<bits<3> opc, string asm, SDPatternOperator node>
-    : BaseOneOperandData<opc, GPR32, asm, node> {
-  let Inst{31} = 0;
-}
-
-class OneXRegData<bits<3> opc, string asm, SDPatternOperator node>
-    : BaseOneOperandData<opc, GPR64, asm, node> {
-  let Inst{31} = 1;
-}
-
-//---
-// Basic two-operand data processing instructions.
-//---
-class BaseBaseAddSubCarry<bit isSub, RegisterClass regtype, string asm,
-                          list<dag> pattern>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
-        asm, "\t$Rd, $Rn, $Rm", "", pattern>,
-      Sched<[WriteI]> {
-  let Uses = [CPSR];
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{30}    = isSub;
-  let Inst{28-21} = 0b11010000;
-  let Inst{20-16} = Rm;
-  let Inst{15-10} = 0;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-class BaseAddSubCarry<bit isSub, RegisterClass regtype, string asm,
-                      SDNode OpNode>
-    : BaseBaseAddSubCarry<isSub, regtype, asm,
-        [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, CPSR))]>;
-
-class BaseAddSubCarrySetFlags<bit isSub, RegisterClass regtype, string asm,
-                              SDNode OpNode>
-    : BaseBaseAddSubCarry<isSub, regtype, asm,
-        [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, CPSR)),
-         (implicit CPSR)]> {
-  let Defs = [CPSR];
-}
-
-multiclass AddSubCarry<bit isSub, string asm, string asm_setflags,
-                       SDNode OpNode, SDNode OpNode_setflags> {
-  def Wr : BaseAddSubCarry<isSub, GPR32, asm, OpNode> {
-    let Inst{31} = 0;
-    let Inst{29} = 0;
-  }
-  def Xr : BaseAddSubCarry<isSub, GPR64, asm, OpNode> {
-    let Inst{31} = 1;
-    let Inst{29} = 0;
-  }
-
-  // Sets flags.
-  def SWr : BaseAddSubCarrySetFlags<isSub, GPR32, asm_setflags,
-                                    OpNode_setflags> {
-    let Inst{31} = 0;
-    let Inst{29} = 1;
-  }
-  def SXr : BaseAddSubCarrySetFlags<isSub, GPR64, asm_setflags,
-                                    OpNode_setflags> {
-    let Inst{31} = 1;
-    let Inst{29} = 1;
-  }
-}
-
-class BaseTwoOperand<bits<4> opc, RegisterClass regtype, string asm,
-                     SDPatternOperator OpNode>
-  : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
-      asm, "\t$Rd, $Rn, $Rm", "",
-      [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{30-21} = 0b0011010110;
-  let Inst{20-16} = Rm;
-  let Inst{15-14} = 0b00;
-  let Inst{13-10} = opc;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-class BaseDiv<bit isSigned, RegisterClass regtype, string asm,
-              SDPatternOperator OpNode>
-    : BaseTwoOperand<{0,0,1,?}, regtype, asm, OpNode> {
-  let Inst{10}    = isSigned;
-}
-
-multiclass Div<bit isSigned, string asm, SDPatternOperator OpNode> {
-  def Wr : BaseDiv<isSigned, GPR32, asm, OpNode>,
-           Sched<[WriteID32]> {
-    let Inst{31} = 0;
-  }
-  def Xr : BaseDiv<isSigned, GPR64, asm, OpNode>,
-           Sched<[WriteID64]> {
-    let Inst{31} = 1;
-  }
-}
-
-class BaseShift<bits<2> shift_type, RegisterClass regtype, string asm,
-                SDPatternOperator OpNode = null_frag>
-  : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>,
-    Sched<[WriteIS]> {
-  let Inst{11-10} = shift_type;
-}
-
-multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> {
-  def Wr : BaseShift<shift_type, GPR32, asm> {
-    let Inst{31} = 0;
-  }
-
-  def Xr : BaseShift<shift_type, GPR64, asm, OpNode> {
-    let Inst{31} = 1;
-  }
-
-  def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)),
-            (!cast<Instruction>(NAME # "Wr") GPR32:$Rn,
-                                             (EXTRACT_SUBREG i64:$Rm, sub_32))>;
-
-  def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))),
-            (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
-
-  def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))),
-            (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
-
-  def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))),
-            (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
-}
-
-class ShiftAlias<string asm, Instruction inst, RegisterClass regtype>
-    : InstAlias<asm#" $dst, $src1, $src2",
-                (inst regtype:$dst, regtype:$src1, regtype:$src2)>;
-
-class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
-                       RegisterClass addtype, string asm,
-                       list<dag> pattern>
-  : I<(outs addtype:$Rd), (ins multype:$Rn, multype:$Rm, addtype:$Ra),
-      asm, "\t$Rd, $Rn, $Rm, $Ra", "", pattern> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<5> Ra;
-  let Inst{30-24} = 0b0011011;
-  let Inst{23-21} = opc;
-  let Inst{20-16} = Rm;
-  let Inst{15}    = isSub;
-  let Inst{14-10} = Ra;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
-  def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
-      [(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
-      Sched<[WriteIM32]> {
-    let Inst{31} = 0;
-  }
-
-  def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
-      [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
-      Sched<[WriteIM64]> {
-    let Inst{31} = 1;
-  }
-}
-
-class WideMulAccum<bit isSub, bits<3> opc, string asm,
-                   SDNode AccNode, SDNode ExtNode>
-  : BaseMulAccum<isSub, opc, GPR32, GPR64, asm,
-    [(set GPR64:$Rd, (AccNode GPR64:$Ra,
-                            (mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>,
-    Sched<[WriteIM32]> {
-  let Inst{31} = 1;
-}
-
-class MulHi<bits<3> opc, string asm, SDNode OpNode>
-  : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
-      asm, "\t$Rd, $Rn, $Rm", "",
-      [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>,
-    Sched<[WriteIM64]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{31-24} = 0b10011011;
-  let Inst{23-21} = opc;
-  let Inst{20-16} = Rm;
-  let Inst{15-10} = 0b011111;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-class MulAccumWAlias<string asm, Instruction inst>
-    : InstAlias<asm#" $dst, $src1, $src2",
-                (inst GPR32:$dst, GPR32:$src1, GPR32:$src2, WZR)>;
-class MulAccumXAlias<string asm, Instruction inst>
-    : InstAlias<asm#" $dst, $src1, $src2",
-                (inst GPR64:$dst, GPR64:$src1, GPR64:$src2, XZR)>;
-class WideMulAccumAlias<string asm, Instruction inst>
-    : InstAlias<asm#" $dst, $src1, $src2",
-                (inst GPR64:$dst, GPR32:$src1, GPR32:$src2, XZR)>;
-
-class BaseCRC32<bit sf, bits<2> sz, bit C, RegisterClass StreamReg,
-              SDPatternOperator OpNode, string asm>
-  : I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm),
-      asm, "\t$Rd, $Rn, $Rm", "",
-      [(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>,
-    Sched<[WriteISReg]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-
-  let Inst{31} = sf;
-  let Inst{30-21} = 0b0011010110;
-  let Inst{20-16} = Rm;
-  let Inst{15-13} = 0b010;
-  let Inst{12} = C;
-  let Inst{11-10} = sz;
-  let Inst{9-5} = Rn;
-  let Inst{4-0} = Rd;
-}
-
-//---
-// Address generation.
-//---
-
-class ADRI<bit page, string asm, Operand adr, list<dag> pattern>
-    : I<(outs GPR64:$Xd), (ins adr:$label), asm, "\t$Xd, $label", "",
-        pattern>,
-      Sched<[WriteI]> {
-  bits<5>  Xd;
-  bits<21> label;
-  let Inst{31}    = page;
-  let Inst{30-29} = label{1-0};
-  let Inst{28-24} = 0b10000;
-  let Inst{23-5}  = label{20-2};
-  let Inst{4-0}   = Xd;
-
-  let DecoderMethod = "DecodeAdrInstruction";
-}
-
-//---
-// Move immediate.
-//---
-
-def movimm32_imm : Operand<i32> {
-  let ParserMatchClass = Imm0_65535Operand;
-  let EncoderMethod = "getMoveWideImmOpValue";
-}
-def movimm32_shift : Operand<i32> {
-  let PrintMethod = "printShifter";
-  let ParserMatchClass = MovImm32ShifterOperand;
-}
-def movimm64_shift : Operand<i32> {
-  let PrintMethod = "printShifter";
-  let ParserMatchClass = MovImm64ShifterOperand;
-}
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseMoveImmediate<bits<2> opc, RegisterClass regtype, Operand shifter,
-                        string asm>
-  : I<(outs regtype:$Rd), (ins movimm32_imm:$imm, shifter:$shift),
-       asm, "\t$Rd, $imm$shift", "", []>,
-    Sched<[WriteImm]> {
-  bits<5> Rd;
-  bits<16> imm;
-  bits<6> shift;
-  let Inst{30-29} = opc;
-  let Inst{28-23} = 0b100101;
-  let Inst{22-21} = shift{5-4};
-  let Inst{20-5}  = imm;
-  let Inst{4-0}   = Rd;
-
-  let DecoderMethod = "DecodeMoveImmInstruction";
-}
-
-multiclass MoveImmediate<bits<2> opc, string asm> {
-  def Wi : BaseMoveImmediate<opc, GPR32, movimm32_shift, asm> {
-    let Inst{31} = 0;
-  }
-
-  def Xi : BaseMoveImmediate<opc, GPR64, movimm64_shift, asm> {
-    let Inst{31} = 1;
-  }
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseInsertImmediate<bits<2> opc, RegisterClass regtype, Operand shifter,
-                          string asm>
-  : I<(outs regtype:$Rd),
-      (ins regtype:$src, movimm32_imm:$imm, shifter:$shift),
-       asm, "\t$Rd, $imm$shift", "$src = $Rd", []>,
-    Sched<[WriteI]> {
-  bits<5> Rd;
-  bits<16> imm;
-  bits<6> shift;
-  let Inst{30-29} = opc;
-  let Inst{28-23} = 0b100101;
-  let Inst{22-21} = shift{5-4};
-  let Inst{20-5}  = imm;
-  let Inst{4-0}   = Rd;
-
-  let DecoderMethod = "DecodeMoveImmInstruction";
-}
-
-multiclass InsertImmediate<bits<2> opc, string asm> {
-  def Wi : BaseInsertImmediate<opc, GPR32, movimm32_shift, asm> {
-    let Inst{31} = 0;
-  }
-
-  def Xi : BaseInsertImmediate<opc, GPR64, movimm64_shift, asm> {
-    let Inst{31} = 1;
-  }
-}
-
-//---
-// Add/Subtract
-//---
-
-class BaseAddSubImm<bit isSub, bit setFlags, RegisterClass dstRegtype,
-                    RegisterClass srcRegtype, addsub_shifted_imm immtype,
-                    string asm, SDPatternOperator OpNode>
-    : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm),
-        asm, "\t$Rd, $Rn, $imm", "",
-        [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>,
-      Sched<[WriteI]>  {
-  bits<5>  Rd;
-  bits<5>  Rn;
-  bits<14> imm;
-  let Inst{30}    = isSub;
-  let Inst{29}    = setFlags;
-  let Inst{28-24} = 0b10001;
-  let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
-  let Inst{21-10} = imm{11-0};
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-  let DecoderMethod = "DecodeBaseAddSubImm";
-}
-
-class BaseAddSubRegPseudo<RegisterClass regtype,
-                          SDPatternOperator OpNode>
-    : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
-             [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
-      Sched<[WriteI]>;
-
-class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype,
-                     arith_shifted_reg shifted_regtype, string asm,
-                     SDPatternOperator OpNode>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
-        asm, "\t$Rd, $Rn, $Rm", "",
-        [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>,
-      Sched<[WriteISReg]> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> src1;
-  bits<5> src2;
-  bits<8> shift;
-  let Inst{30}    = isSub;
-  let Inst{29}    = setFlags;
-  let Inst{28-24} = 0b01011;
-  let Inst{23-22} = shift{7-6};
-  let Inst{21}    = 0;
-  let Inst{20-16} = src2;
-  let Inst{15-10} = shift{5-0};
-  let Inst{9-5}   = src1;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeThreeAddrSRegInstruction";
-}
-
-class BaseAddSubEReg<bit isSub, bit setFlags, RegisterClass dstRegtype,
-                     RegisterClass src1Regtype, Operand src2Regtype,
-                     string asm, SDPatternOperator OpNode>
-    : I<(outs dstRegtype:$R1),
-        (ins src1Regtype:$R2, src2Regtype:$R3),
-        asm, "\t$R1, $R2, $R3", "",
-        [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>,
-      Sched<[WriteIEReg]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<6> ext;
-  let Inst{30}    = isSub;
-  let Inst{29}    = setFlags;
-  let Inst{28-24} = 0b01011;
-  let Inst{23-21} = 0b001;
-  let Inst{20-16} = Rm;
-  let Inst{15-13} = ext{5-3};
-  let Inst{12-10} = ext{2-0};
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-
-  let DecoderMethod = "DecodeAddSubERegInstruction";
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseAddSubEReg64<bit isSub, bit setFlags, RegisterClass dstRegtype,
-                       RegisterClass src1Regtype, RegisterClass src2Regtype,
-                       Operand ext_op, string asm>
-    : I<(outs dstRegtype:$Rd),
-        (ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext),
-        asm, "\t$Rd, $Rn, $Rm$ext", "", []>,
-      Sched<[WriteIEReg]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<6> ext;
-  let Inst{30}    = isSub;
-  let Inst{29}    = setFlags;
-  let Inst{28-24} = 0b01011;
-  let Inst{23-21} = 0b001;
-  let Inst{20-16} = Rm;
-  let Inst{15}    = ext{5};
-  let Inst{12-10} = ext{2-0};
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-
-  let DecoderMethod = "DecodeAddSubERegInstruction";
-}
-
-// Aliases for register+register add/subtract.
-class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype,
-                     RegisterClass src1Regtype, RegisterClass src2Regtype,
-                     int shiftExt>
-    : InstAlias<asm#" $dst, $src1, $src2",
-                (inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2,
-                      shiftExt)>;
-
-multiclass AddSub<bit isSub, string mnemonic,
-                  SDPatternOperator OpNode = null_frag> {
-  let hasSideEffects = 0 in {
-  // Add/Subtract immediate
-  def Wri  : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
-                           mnemonic, OpNode> {
-    let Inst{31} = 0;
-  }
-  def Xri  : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
-                           mnemonic, OpNode> {
-    let Inst{31} = 1;
-  }
-
-  // Add/Subtract register - Only used for CodeGen
-  def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>;
-  def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>;
-
-  // Add/Subtract shifted register
-  def Wrs : BaseAddSubSReg<isSub, 0, GPR32, arith_shifted_reg32, mnemonic,
-                           OpNode> {
-    let Inst{31} = 0;
-  }
-  def Xrs : BaseAddSubSReg<isSub, 0, GPR64, arith_shifted_reg64, mnemonic,
-                           OpNode> {
-    let Inst{31} = 1;
-  }
-  }
-
-  // Add/Subtract extended register
-  let AddedComplexity = 1, hasSideEffects = 0 in {
-  def Wrx : BaseAddSubEReg<isSub, 0, GPR32sp, GPR32sp,
-                           arith_extended_reg32<i32>, mnemonic, OpNode> {
-    let Inst{31} = 0;
-  }
-  def Xrx : BaseAddSubEReg<isSub, 0, GPR64sp, GPR64sp,
-                           arith_extended_reg32to64<i64>, mnemonic, OpNode> {
-    let Inst{31} = 1;
-  }
-  }
-
-  def Xrx64 : BaseAddSubEReg64<isSub, 0, GPR64sp, GPR64sp, GPR64,
-                               arith_extendlsl64, mnemonic> {
-    // UXTX and SXTX only.
-    let Inst{14-13} = 0b11;
-    let Inst{31} = 1;
-  }
-
-  // Register/register aliases with no shift when SP is not used.
-  def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
-                       GPR32, GPR32, GPR32, 0>;
-  def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"),
-                       GPR64, GPR64, GPR64, 0>;
-
-  // Register/register aliases with no shift when either the destination or
-  // first source register is SP.  This relies on the shifted register aliases
-  // above matching first in the case when SP is not used.
-  def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"),
-                       GPR32sp, GPR32sp, GPR32, 16>; // UXTW #0
-  def : AddSubRegAlias<mnemonic,
-                       !cast<Instruction>(NAME#"Xrx64"),
-                       GPR64sp, GPR64sp, GPR64, 24>; // UXTX #0
-}
-
-multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode> {
-  let isCompare = 1, Defs = [CPSR] in {
-  // Add/Subtract immediate
-  def Wri  : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
-                           mnemonic, OpNode> {
-    let Inst{31} = 0;
-  }
-  def Xri  : BaseAddSubImm<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64,
-                           mnemonic, OpNode> {
-    let Inst{31} = 1;
-  }
-
-  // Add/Subtract register
-  def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>;
-  def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>;
-
-  // Add/Subtract shifted register
-  def Wrs : BaseAddSubSReg<isSub, 1, GPR32, arith_shifted_reg32, mnemonic,
-                           OpNode> {
-    let Inst{31} = 0;
-  }
-  def Xrs : BaseAddSubSReg<isSub, 1, GPR64, arith_shifted_reg64, mnemonic,
-                           OpNode> {
-    let Inst{31} = 1;
-  }
-
-  // Add/Subtract extended register
-  let AddedComplexity = 1 in {
-  def Wrx : BaseAddSubEReg<isSub, 1, GPR32, GPR32sp,
-                           arith_extended_reg32<i32>, mnemonic, OpNode> {
-    let Inst{31} = 0;
-  }
-  def Xrx : BaseAddSubEReg<isSub, 1, GPR64, GPR64sp,
-                           arith_extended_reg32<i64>, mnemonic, OpNode> {
-    let Inst{31} = 1;
-  }
-  }
-
-  def Xrx64 : BaseAddSubEReg64<isSub, 1, GPR64, GPR64sp, GPR64,
-                               arith_extendlsl64, mnemonic> {
-    // UXTX and SXTX only.
-    let Inst{14-13} = 0b11;
-    let Inst{31} = 1;
-  }
-  } // Defs = [CPSR]
-
-  // Register/register aliases with no shift when SP is not used.
-  def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
-                       GPR32, GPR32, GPR32, 0>;
-  def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"),
-                       GPR64, GPR64, GPR64, 0>;
-
-  // Register/register aliases with no shift when the first source register
-  // is SP.  This relies on the shifted register aliases above matching first
-  // in the case when SP is not used.
-  def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"),
-                       GPR32, GPR32sp, GPR32, 16>; // UXTW #0
-  def : AddSubRegAlias<mnemonic,
-                       !cast<Instruction>(NAME#"Xrx64"),
-                       GPR64, GPR64sp, GPR64, 24>; // UXTX #0
-}
-
-//---
-// Extract
-//---
-def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
-                                      SDTCisPtrTy<3>]>;
-def ARM64Extr : SDNode<"ARM64ISD::EXTR", SDTA64EXTR>;
-
-class BaseExtractImm<RegisterClass regtype, Operand imm_type, string asm,
-                     list<dag> patterns>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, imm_type:$imm),
-         asm, "\t$Rd, $Rn, $Rm, $imm", "", patterns>,
-      Sched<[WriteExtr, ReadExtrHi]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<6> imm;
-
-  let Inst{30-23} = 0b00100111;
-  let Inst{21}    = 0;
-  let Inst{20-16} = Rm;
-  let Inst{15-10} = imm;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass ExtractImm<string asm> {
-  def Wrri : BaseExtractImm<GPR32, imm0_31, asm,
-                      [(set GPR32:$Rd,
-                        (ARM64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> {
-    let Inst{31} = 0;
-    let Inst{22} = 0;
-  }
-  def Xrri : BaseExtractImm<GPR64, imm0_63, asm,
-                      [(set GPR64:$Rd,
-                        (ARM64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> {
-
-    let Inst{31} = 1;
-    let Inst{22} = 1;
-  }
-}
-
-//---
-// Bitfield
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseBitfieldImm<bits<2> opc,
-                      RegisterClass regtype, Operand imm_type, string asm>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms),
-         asm, "\t$Rd, $Rn, $immr, $imms", "", []>,
-      Sched<[WriteIS]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<6> immr;
-  bits<6> imms;
-
-  let Inst{30-29} = opc;
-  let Inst{28-23} = 0b100110;
-  let Inst{21-16} = immr;
-  let Inst{15-10} = imms;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass BitfieldImm<bits<2> opc, string asm> {
-  def Wri : BaseBitfieldImm<opc, GPR32, imm0_31, asm> {
-    let Inst{31} = 0;
-    let Inst{22} = 0;
-  }
-  def Xri : BaseBitfieldImm<opc, GPR64, imm0_63, asm> {
-    let Inst{31} = 1;
-    let Inst{22} = 1;
-  }
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseBitfieldImmWith2RegArgs<bits<2> opc,
-                      RegisterClass regtype, Operand imm_type, string asm>
-    : I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr,
-                             imm_type:$imms),
-         asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>,
-      Sched<[WriteIS]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<6> immr;
-  bits<6> imms;
-
-  let Inst{30-29} = opc;
-  let Inst{28-23} = 0b100110;
-  let Inst{21-16} = immr;
-  let Inst{15-10} = imms;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass BitfieldImmWith2RegArgs<bits<2> opc, string asm> {
-  def Wri : BaseBitfieldImmWith2RegArgs<opc, GPR32, imm0_31, asm> {
-    let Inst{31} = 0;
-    let Inst{22} = 0;
-  }
-  def Xri : BaseBitfieldImmWith2RegArgs<opc, GPR64, imm0_63, asm> {
-    let Inst{31} = 1;
-    let Inst{22} = 1;
-  }
-}
-
-//---
-// Logical
-//---
-
-// Logical (immediate)
-class BaseLogicalImm<bits<2> opc, RegisterClass dregtype,
-                     RegisterClass sregtype, Operand imm_type, string asm,
-                     list<dag> pattern>
-    : I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm),
-         asm, "\t$Rd, $Rn, $imm", "", pattern>,
-      Sched<[WriteI]> {
-  bits<5>  Rd;
-  bits<5>  Rn;
-  bits<13> imm;
-  let Inst{30-29} = opc;
-  let Inst{28-23} = 0b100100;
-  let Inst{22}    = imm{12};
-  let Inst{21-16} = imm{11-6};
-  let Inst{15-10} = imm{5-0};
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-
-  let DecoderMethod = "DecodeLogicalImmInstruction";
-}
-
-// Logical (shifted register)
-class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype,
-                      logical_shifted_reg shifted_regtype, string asm,
-                      list<dag> pattern>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
-        asm, "\t$Rd, $Rn, $Rm", "", pattern>,
-      Sched<[WriteISReg]> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> src1;
-  bits<5> src2;
-  bits<8> shift;
-  let Inst{30-29} = opc;
-  let Inst{28-24} = 0b01010;
-  let Inst{23-22} = shift{7-6};
-  let Inst{21}    = N;
-  let Inst{20-16} = src2;
-  let Inst{15-10} = shift{5-0};
-  let Inst{9-5}   = src1;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeThreeAddrSRegInstruction";
-}
-
-// Aliases for register+register logical instructions.
-class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
-    : InstAlias<asm#" $dst, $src1, $src2",
-                (inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>;
-
-let AddedComplexity = 6 in
-multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode> {
-  def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic,
-                           [(set GPR32sp:$Rd, (OpNode GPR32:$Rn,
-                                               logical_imm32:$imm))]> {
-    let Inst{31} = 0;
-    let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
-  }
-  def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic,
-                           [(set GPR64sp:$Rd, (OpNode GPR64:$Rn,
-                                               logical_imm64:$imm))]> {
-    let Inst{31} = 1;
-  }
-}
-
-multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> {
-  let isCompare = 1, Defs = [CPSR] in {
-  def Wri  : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic,
-      [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> {
-    let Inst{31} = 0;
-    let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
-  }
-  def Xri  : BaseLogicalImm<opc, GPR64, GPR64, logical_imm64, mnemonic,
-      [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_imm64:$imm))]> {
-    let Inst{31} = 1;
-  }
-  } // end Defs = [CPSR]
-}
-
-class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
-    : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
-             [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
-      Sched<[WriteI]>;
-
-// Split from LogicalImm as not all instructions have both.
-multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
-                      SDPatternOperator OpNode> {
-  def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
-  def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
-
-  def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
-                            [(set GPR32:$Rd, (OpNode GPR32:$Rn,
-                                                 logical_shifted_reg32:$Rm))]> {
-    let Inst{31} = 0;
-  }
-  def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic,
-                            [(set GPR64:$Rd, (OpNode GPR64:$Rn,
-                                                 logical_shifted_reg64:$Rm))]> {
-    let Inst{31} = 1;
-  }
-
-  def : LogicalRegAlias<mnemonic,
-                        !cast<Instruction>(NAME#"Wrs"), GPR32>;
-  def : LogicalRegAlias<mnemonic,
-                        !cast<Instruction>(NAME#"Xrs"), GPR64>;
-}
-
-// Split from LogicalReg to allow setting CPSR Defs
-multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic> {
-  let Defs = [CPSR], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-  def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic, []>{
-    let Inst{31} = 0;
-  }
-  def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic, []>{
-    let Inst{31} = 1;
-  }
-  } // Defs = [CPSR]
-
-  def : LogicalRegAlias<mnemonic,
-                        !cast<Instruction>(NAME#"Wrs"), GPR32>;
-  def : LogicalRegAlias<mnemonic,
-                        !cast<Instruction>(NAME#"Xrs"), GPR64>;
-}
-
-//---
-// Conditionally set flags
-//---
-
-// Condition code.
-// 4-bit immediate. Pretty-printed as <cc>
-def ccode : Operand<i32> {
-  let PrintMethod = "printCondCode";
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
-    : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
-         asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
-      Sched<[WriteI]> {
-  let Uses = [CPSR];
-  let Defs = [CPSR];
-
-  bits<5> Rn;
-  bits<5> imm;
-  bits<4> nzcv;
-  bits<4> cond;
-
-  let Inst{30}    = op;
-  let Inst{29-21} = 0b111010010;
-  let Inst{20-16} = imm;
-  let Inst{15-12} = cond;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4}     = 0b0;
-  let Inst{3-0}   = nzcv;
-}
-
-multiclass CondSetFlagsImm<bit op, string asm> {
-  def Wi : BaseCondSetFlagsImm<op, GPR32, asm> {
-    let Inst{31} = 0;
-  }
-  def Xi : BaseCondSetFlagsImm<op, GPR64, asm> {
-    let Inst{31} = 1;
-  }
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
-    : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
-         asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
-      Sched<[WriteI]> {
-  let Uses = [CPSR];
-  let Defs = [CPSR];
-
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<4> nzcv;
-  bits<4> cond;
-
-  let Inst{30}    = op;
-  let Inst{29-21} = 0b111010010;
-  let Inst{20-16} = Rm;
-  let Inst{15-12} = cond;
-  let Inst{11-10} = 0b00;
-  let Inst{9-5}   = Rn;
-  let Inst{4}     = 0b0;
-  let Inst{3-0}   = nzcv;
-}
-
-multiclass CondSetFlagsReg<bit op, string asm> {
-  def Wr : BaseCondSetFlagsReg<op, GPR32, asm> {
-    let Inst{31} = 0;
-  }
-  def Xr : BaseCondSetFlagsReg<op, GPR64, asm> {
-    let Inst{31} = 1;
-  }
-}
-
-//---
-// Conditional select
-//---
-
-class BaseCondSelect<bit op, bits<2> op2, RegisterClass regtype, string asm>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
-         asm, "\t$Rd, $Rn, $Rm, $cond", "",
-         [(set regtype:$Rd,
-               (ARM64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), CPSR))]>,
-      Sched<[WriteI]> {
-  let Uses = [CPSR];
-
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<4> cond;
-
-  let Inst{30}    = op;
-  let Inst{29-21} = 0b011010100;
-  let Inst{20-16} = Rm;
-  let Inst{15-12} = cond;
-  let Inst{11-10} = op2;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass CondSelect<bit op, bits<2> op2, string asm> {
-  def Wr : BaseCondSelect<op, op2, GPR32, asm> {
-    let Inst{31} = 0;
-  }
-  def Xr : BaseCondSelect<op, op2, GPR64, asm> {
-    let Inst{31} = 1;
-  }
-}
-
-class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm,
-                       PatFrag frag>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
-         asm, "\t$Rd, $Rn, $Rm, $cond", "",
-         [(set regtype:$Rd,
-               (ARM64csel regtype:$Rn, (frag regtype:$Rm),
-               (i32 imm:$cond), CPSR))]>,
-      Sched<[WriteI]> {
-  let Uses = [CPSR];
-
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<4> cond;
-
-  let Inst{30}    = op;
-  let Inst{29-21} = 0b011010100;
-  let Inst{20-16} = Rm;
-  let Inst{15-12} = cond;
-  let Inst{11-10} = op2;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> {
-  def Wr : BaseCondSelectOp<op, op2, GPR32, asm, frag> {
-    let Inst{31} = 0;
-  }
-  def Xr : BaseCondSelectOp<op, op2, GPR64, asm, frag> {
-    let Inst{31} = 1;
-  }
-}
-
-//---
-// Special Mask Value
-//---
-def maski8_or_more : Operand<i32>,
-  ImmLeaf<i32, [{ return (Imm & 0xff) == 0xff; }]> {
-}
-def maski16_or_more : Operand<i32>,
-  ImmLeaf<i32, [{ return (Imm & 0xffff) == 0xffff; }]> {
-}
-
-
-//---
-// Load/store
-//---
-
-// (unsigned immediate)
-// Indexed for 8-bit registers. offset is in range [0,4095].
-def MemoryIndexed8Operand : AsmOperandClass {
-  let Name = "MemoryIndexed8";
-  let DiagnosticType = "InvalidMemoryIndexed8";
-}
-def am_indexed8 : Operand<i64>,
-                  ComplexPattern<i64, 2, "SelectAddrModeIndexed8", []> {
-  let PrintMethod = "printAMIndexed8";
-  let EncoderMethod
-      = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale1>";
-  let ParserMatchClass = MemoryIndexed8Operand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-
-// Indexed for 16-bit registers. offset is multiple of 2 in range [0,8190],
-// stored as immval/2 (the 12-bit literal that encodes directly into the insn).
-def MemoryIndexed16Operand : AsmOperandClass {
-  let Name = "MemoryIndexed16";
-  let DiagnosticType = "InvalidMemoryIndexed16";
-}
-def am_indexed16 : Operand<i64>,
-                   ComplexPattern<i64, 2, "SelectAddrModeIndexed16", []> {
-  let PrintMethod = "printAMIndexed16";
-  let EncoderMethod
-      = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale2>";
-  let ParserMatchClass = MemoryIndexed16Operand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-
-// Indexed for 32-bit registers. offset is multiple of 4 in range [0,16380],
-// stored as immval/4 (the 12-bit literal that encodes directly into the insn).
-def MemoryIndexed32Operand : AsmOperandClass {
-  let Name = "MemoryIndexed32";
-  let DiagnosticType = "InvalidMemoryIndexed32";
-}
-def am_indexed32 : Operand<i64>,
-                   ComplexPattern<i64, 2, "SelectAddrModeIndexed32", []> {
-  let PrintMethod = "printAMIndexed32";
-  let EncoderMethod
-      = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale4>";
-  let ParserMatchClass = MemoryIndexed32Operand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-
-// Indexed for 64-bit registers. offset is multiple of 8 in range [0,32760],
-// stored as immval/8 (the 12-bit literal that encodes directly into the insn).
-def MemoryIndexed64Operand : AsmOperandClass {
-  let Name = "MemoryIndexed64";
-  let DiagnosticType = "InvalidMemoryIndexed64";
-}
-def am_indexed64 : Operand<i64>,
-                   ComplexPattern<i64, 2, "SelectAddrModeIndexed64", []> {
-  let PrintMethod = "printAMIndexed64";
-  let EncoderMethod
-      = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale8>";
-  let ParserMatchClass = MemoryIndexed64Operand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-
-// Indexed for 128-bit registers. offset is multiple of 16 in range [0,65520],
-// stored as immval/16 (the 12-bit literal that encodes directly into the insn).
-def MemoryIndexed128Operand : AsmOperandClass {
-  let Name = "MemoryIndexed128";
-  let DiagnosticType = "InvalidMemoryIndexed128";
-}
-def am_indexed128 : Operand<i64>,
-                   ComplexPattern<i64, 2, "SelectAddrModeIndexed128", []> {
-  let PrintMethod = "printAMIndexed128";
-  let EncoderMethod
-      = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale16>";
-  let ParserMatchClass = MemoryIndexed128Operand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-
-// No offset.
-def MemoryNoIndexOperand : AsmOperandClass { let Name = "MemoryNoIndex"; }
-def am_noindex : Operand<i64>,
-                 ComplexPattern<i64, 1, "SelectAddrModeNoIndex", []> {
-  let PrintMethod = "printAMNoIndex";
-  let ParserMatchClass = MemoryNoIndexOperand;
-  let MIOperandInfo = (ops GPR64sp:$base);
-}
-
-class BaseLoadStoreUI<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
-                      string asm, list<dag> pattern>
-    : I<oops, iops, asm, "\t$Rt, $addr", "", pattern> {
-  bits<5> dst;
-
-  bits<17> addr;
-  bits<5> base = addr{4-0};
-  bits<12> offset = addr{16-5};
-
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b01;
-  let Inst{23-22} = opc;
-  let Inst{21-10} = offset;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeUnsignedLdStInstruction";
-}
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class LoadUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             Operand indextype, string asm, list<dag> pattern>
-    : BaseLoadStoreUI<sz, V, opc,
-                      (outs regtype:$Rt), (ins indextype:$addr), asm, pattern>,
-      Sched<[WriteLD]>;
-
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-class StoreUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             Operand indextype, string asm, list<dag> pattern>
-    : BaseLoadStoreUI<sz, V, opc,
-                      (outs), (ins regtype:$Rt, indextype:$addr), asm, pattern>,
-      Sched<[WriteST]>;
-
-def PrefetchOperand : AsmOperandClass {
-  let Name = "Prefetch";
-  let ParserMethod = "tryParsePrefetch";
-}
-def prfop : Operand<i32> {
-  let PrintMethod = "printPrefetchOp";
-  let ParserMatchClass = PrefetchOperand;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class PrefetchUI<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
-    : BaseLoadStoreUI<sz, V, opc,
-                      (outs), (ins prfop:$Rt, am_indexed64:$addr), asm, pat>,
-      Sched<[WriteLD]>;
-
-//---
-// Load literal
-//---
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class LoadLiteral<bits<2> opc, bit V, RegisterClass regtype, string asm>
-    : I<(outs regtype:$Rt), (ins am_brcond:$label),
-        asm, "\t$Rt, $label", "", []>,
-      Sched<[WriteLD]> {
-  bits<5> Rt;
-  bits<19> label;
-  let Inst{31-30} = opc;
-  let Inst{29-27} = 0b011;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-5}  = label;
-  let Inst{4-0}   = Rt;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class PrefetchLiteral<bits<2> opc, bit V, string asm, list<dag> pat>
-    : I<(outs), (ins prfop:$Rt, am_brcond:$label),
-        asm, "\t$Rt, $label", "", pat>,
-      Sched<[WriteLD]> {
-  bits<5> Rt;
-  bits<19> label;
-  let Inst{31-30} = opc;
-  let Inst{29-27} = 0b011;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-5}  = label;
-  let Inst{4-0}   = Rt;
-}
-
-//---
-// Load/store register offset
-//---
-
-class MemROAsmOperand<int sz> : AsmOperandClass {
-  let Name = "MemoryRegisterOffset"#sz;
-}
-
-def MemROAsmOperand8 : MemROAsmOperand<8>;
-def MemROAsmOperand16 : MemROAsmOperand<16>;
-def MemROAsmOperand32 : MemROAsmOperand<32>;
-def MemROAsmOperand64 : MemROAsmOperand<64>;
-def MemROAsmOperand128 : MemROAsmOperand<128>;
-
-class ro_indexed<int sz> : Operand<i64> { // ComplexPattern<...>
-  let PrintMethod = "printMemoryRegOffset"#sz;
-  let MIOperandInfo = (ops GPR64sp:$base, GPR64:$offset, i32imm:$extend);
-}
-
-def ro_indexed8 : ro_indexed<8>, ComplexPattern<i64, 3, "SelectAddrModeRO8", []> {
-  let ParserMatchClass = MemROAsmOperand8;
-}
-
-def ro_indexed16 : ro_indexed<16>, ComplexPattern<i64, 3, "SelectAddrModeRO16", []> {
-  let ParserMatchClass = MemROAsmOperand16;
-}
-
-def ro_indexed32 : ro_indexed<32>, ComplexPattern<i64, 3, "SelectAddrModeRO32", []> {
-  let ParserMatchClass = MemROAsmOperand32;
-}
-
-def ro_indexed64 : ro_indexed<64>, ComplexPattern<i64, 3, "SelectAddrModeRO64", []> {
-  let ParserMatchClass = MemROAsmOperand64;
-}
-
-def ro_indexed128 : ro_indexed<128>, ComplexPattern<i64, 3, "SelectAddrModeRO128", []> {
-  let ParserMatchClass = MemROAsmOperand128;
-}
-
-class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-                      string asm, dag ins, dag outs, list<dag> pat>
-    : I<ins, outs, asm, "\t$Rt, $addr", "", pat> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> base;
-  bits<5> offset;
-  bits<4> extend;
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-22} = opc;
-  let Inst{21}    = 1;
-  let Inst{20-16} = offset;
-  let Inst{15-13} = extend{3-1};
-
-  let Inst{12}    = extend{0};
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-class Load8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm, list<dag> pat>
-  : LoadStore8RO<sz, V, opc, regtype, asm,
-                 (outs regtype:$Rt), (ins ro_indexed8:$addr), pat>,
-    Sched<[WriteLDIdx, ReadAdrBase]>;
-
-class Store8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm, list<dag> pat>
-  : LoadStore8RO<sz, V, opc, regtype, asm,
-                 (outs), (ins regtype:$Rt, ro_indexed8:$addr), pat>,
-    Sched<[WriteSTIdx, ReadAdrBase]>;
-
-class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-                      string asm, dag ins, dag outs, list<dag> pat>
-    : I<ins, outs, asm, "\t$Rt, $addr", "", pat> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> base;
-  bits<5> offset;
-  bits<4> extend;
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-22} = opc;
-  let Inst{21}    = 1;
-  let Inst{20-16} = offset;
-  let Inst{15-13} = extend{3-1};
-
-  let Inst{12}    = extend{0};
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-class Load16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm, list<dag> pat>
-  : LoadStore16RO<sz, V, opc, regtype, asm,
-                 (outs regtype:$Rt), (ins ro_indexed16:$addr), pat>,
-    Sched<[WriteLDIdx, ReadAdrBase]>;
-
-class Store16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm, list<dag> pat>
-  : LoadStore16RO<sz, V, opc, regtype, asm,
-                 (outs), (ins regtype:$Rt, ro_indexed16:$addr), pat>,
-    Sched<[WriteSTIdx, ReadAdrBase]>;
-
-class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-                      string asm, dag ins, dag outs, list<dag> pat>
-    : I<ins, outs, asm, "\t$Rt, $addr", "", pat> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> base;
-  bits<5> offset;
-  bits<4> extend;
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-22} = opc;
-  let Inst{21}    = 1;
-  let Inst{20-16} = offset;
-  let Inst{15-13} = extend{3-1};
-
-  let Inst{12}    = extend{0};
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-class Load32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm, list<dag> pat>
-  : LoadStore32RO<sz, V, opc, regtype, asm,
-                 (outs regtype:$Rt), (ins ro_indexed32:$addr), pat>,
-    Sched<[WriteLDIdx, ReadAdrBase]>;
-
-class Store32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm, list<dag> pat>
-  : LoadStore32RO<sz, V, opc, regtype, asm,
-                 (outs), (ins regtype:$Rt, ro_indexed32:$addr), pat>,
-    Sched<[WriteSTIdx, ReadAdrBase]>;
-
-class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-                      string asm, dag ins, dag outs, list<dag> pat>
-    : I<ins, outs, asm, "\t$Rt, $addr", "", pat> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> base;
-  bits<5> offset;
-  bits<4> extend;
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-22} = opc;
-  let Inst{21}    = 1;
-  let Inst{20-16} = offset;
-  let Inst{15-13} = extend{3-1};
-
-  let Inst{12}    = extend{0};
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class Load64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm, list<dag> pat>
-  : LoadStore64RO<sz, V, opc, regtype, asm,
-                 (outs regtype:$Rt), (ins ro_indexed64:$addr), pat>,
-    Sched<[WriteLDIdx, ReadAdrBase]>;
-
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-class Store64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm, list<dag> pat>
-  : LoadStore64RO<sz, V, opc, regtype, asm,
-                 (outs), (ins regtype:$Rt, ro_indexed64:$addr), pat>,
-    Sched<[WriteSTIdx, ReadAdrBase]>;
-
-
-class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-                      string asm, dag ins, dag outs, list<dag> pat>
-    : I<ins, outs, asm, "\t$Rt, $addr", "", pat> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> base;
-  bits<5> offset;
-  bits<4> extend;
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-22} = opc;
-  let Inst{21}    = 1;
-  let Inst{20-16} = offset;
-  let Inst{15-13} = extend{3-1};
-
-  let Inst{12}    = extend{0};
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class Load128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm, list<dag> pat>
-  : LoadStore128RO<sz, V, opc, regtype, asm,
-                 (outs regtype:$Rt), (ins ro_indexed128:$addr), pat>,
-    Sched<[WriteLDIdx, ReadAdrBase]>;
-
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-class Store128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm, list<dag> pat>
-  : LoadStore128RO<sz, V, opc, regtype, asm,
-                 (outs), (ins regtype:$Rt, ro_indexed128:$addr), pat>,
-    Sched<[WriteSTIdx, ReadAdrBase]>;
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class PrefetchRO<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
-    : I<(outs), (ins prfop:$Rt, ro_indexed64:$addr), asm,
-         "\t$Rt, $addr", "", pat>,
-      Sched<[WriteLD]> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> base;
-  bits<5> offset;
-  bits<4> extend;
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-22} = opc;
-  let Inst{21}    = 1;
-  let Inst{20-16} = offset;
-  let Inst{15-13} = extend{3-1};
-
-  let Inst{12}    = extend{0};
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-//---
-// Load/store unscaled immediate
-//---
-
-def MemoryUnscaledOperand : AsmOperandClass {
-  let Name = "MemoryUnscaled";
-  let DiagnosticType = "InvalidMemoryIndexedSImm9";
-}
-class am_unscaled_operand : Operand<i64> {
-  let PrintMethod = "printAMUnscaled";
-  let ParserMatchClass = MemoryUnscaledOperand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def am_unscaled   : am_unscaled_operand;
-def am_unscaled8  : am_unscaled_operand,
-                    ComplexPattern<i64, 2, "SelectAddrModeUnscaled8", []>;
-def am_unscaled16 : am_unscaled_operand,
-                    ComplexPattern<i64, 2, "SelectAddrModeUnscaled16", []>;
-def am_unscaled32 : am_unscaled_operand,
-                    ComplexPattern<i64, 2, "SelectAddrModeUnscaled32", []>;
-def am_unscaled64 : am_unscaled_operand,
-                    ComplexPattern<i64, 2, "SelectAddrModeUnscaled64", []>;
-def am_unscaled128 : am_unscaled_operand,
-                    ComplexPattern<i64, 2, "SelectAddrModeUnscaled128", []>;
-
-class BaseLoadStoreUnscale<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
-                           string asm, list<dag> pattern>
-    : I<oops, iops, asm, "\t$Rt, $addr", "", pattern> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> base;
-  bits<9> offset;
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-22} = opc;
-  let Inst{21}    = 0;
-  let Inst{20-12} = offset;
-  let Inst{11-10} = 0b00;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeSignedLdStInstruction";
-}
-
-let AddedComplexity = 1 in // try this before LoadUI
-class LoadUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-                   Operand amtype, string asm, list<dag> pattern>
-    : BaseLoadStoreUnscale<sz, V, opc, (outs regtype:$Rt),
-                           (ins amtype:$addr), asm, pattern>,
-      Sched<[WriteLD]>;
-
-let AddedComplexity = 1 in // try this before StoreUI
-class StoreUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-                    Operand amtype, string asm, list<dag> pattern>
-    : BaseLoadStoreUnscale<sz, V, opc, (outs),
-                           (ins regtype:$Rt, amtype:$addr), asm, pattern>,
-      Sched<[WriteST]>;
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class PrefetchUnscaled<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
-    : BaseLoadStoreUnscale<sz, V, opc, (outs),
-                           (ins prfop:$Rt, am_unscaled:$addr), asm, pat>,
-      Sched<[WriteLD]>;
-
-//---
-// Load/store unscaled immediate, unprivileged
-//---
-
-class BaseLoadStoreUnprivileged<bits<2> sz, bit V, bits<2> opc,
-                                dag oops, dag iops, string asm>
-    : I<oops, iops, asm, "\t$Rt, $addr", "", []> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> base;
-  bits<9> offset;
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-22} = opc;
-  let Inst{21}    = 0;
-  let Inst{20-12} = offset;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeSignedLdStInstruction";
-}
-
-let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in {
-class LoadUnprivileged<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-                   string asm>
-    : BaseLoadStoreUnprivileged<sz, V, opc,
-                      (outs regtype:$Rt), (ins am_unscaled:$addr), asm>,
-      Sched<[WriteLD]>;
-}
-
-let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
-class StoreUnprivileged<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-                    string asm>
-    : BaseLoadStoreUnprivileged<sz, V, opc,
-                      (outs), (ins regtype:$Rt, am_unscaled:$addr), asm>,
-      Sched<[WriteST]>;
-}
-
-//---
-// Load/store pre-indexed
-//---
-
-class BaseLoadStorePreIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
-                          string asm, string cstr>
-    : I<oops, iops, asm, "\t$Rt, $addr!", cstr, []> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling.
-  bits<5> dst;
-  bits<5> base;
-  bits<9> offset;
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0;
-  let Inst{23-22} = opc;
-  let Inst{21}    = 0;
-  let Inst{20-12} = offset;
-  let Inst{11-10} = 0b11;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeSignedLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-// FIXME: Modeling the write-back of these instructions for isel is tricky.
-//        we need the complex addressing mode for the memory reference, but
-//        we also need the write-back specified as a tied operand to the
-//        base register. That combination does not play nicely with
-//        the asm matcher and friends.
-class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm>
-    : BaseLoadStorePreIdx<sz, V, opc,
-                     (outs regtype:$Rt/*, GPR64sp:$wback*/),
-                     (ins am_unscaled:$addr), asm, ""/*"$addr.base = $wback"*/>,
-      Sched<[WriteLD, WriteAdr]>;
-
-let mayStore = 1, mayLoad = 0 in
-class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm>
-    : BaseLoadStorePreIdx<sz, V, opc,
-                      (outs/* GPR64sp:$wback*/),
-                      (ins regtype:$Rt, am_unscaled:$addr),
-                       asm, ""/*"$addr.base = $wback"*/>,
-      Sched<[WriteAdr, WriteST]>;
-} // hasSideEffects = 0
-
-// ISel pseudo-instructions which have the tied operands. When the MC lowering
-// logic finally gets smart enough to strip off tied operands that are just
-// for isel convenience, we can get rid of these pseudos and just reference
-// the real instructions directly.
-//
-// Ironically, also because of the writeback operands, we can't put the
-// matcher pattern directly on the instruction, but need to define it
-// separately.
-//
-// Loads aren't matched with patterns here at all, but rather in C++
-// custom lowering.
-let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in {
-class LoadPreIdxPseudo<RegisterClass regtype>
-    : Pseudo<(outs regtype:$Rt, GPR64sp:$wback),
-             (ins am_noindex:$addr, simm9:$offset), [],
-              "$addr.base = $wback,@earlyclobber $wback">,
-      Sched<[WriteLD, WriteAdr]>;
-class LoadPostIdxPseudo<RegisterClass regtype>
-    : Pseudo<(outs regtype:$Rt, GPR64sp:$wback),
-             (ins am_noindex:$addr, simm9:$offset), [],
-              "$addr.base = $wback,@earlyclobber $wback">,
-      Sched<[WriteLD, WriteI]>;
-}
-multiclass StorePreIdxPseudo<RegisterClass regtype, ValueType Ty,
-                             SDPatternOperator OpNode> {
-  let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
-  def _isel: Pseudo<(outs GPR64sp:$wback),
-                    (ins regtype:$Rt, am_noindex:$addr, simm9:$offset), [],
-                    "$addr.base = $wback,@earlyclobber $wback">,
-      Sched<[WriteAdr, WriteST]>;
-
-  def : Pat<(OpNode (Ty regtype:$Rt), am_noindex:$addr, simm9:$offset),
-            (!cast<Instruction>(NAME#_isel) regtype:$Rt, am_noindex:$addr,
-                                            simm9:$offset)>;
-}
-
-//---
-// Load/store post-indexed
-//---
-
-// (pre-index) load/stores.
-class BaseLoadStorePostIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
-                          string asm, string cstr>
-    : I<oops, iops, asm, "\t$Rt, $addr, $idx", cstr, []> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling.
-  bits<5> dst;
-  bits<5> base;
-  bits<9> offset;
-  let Inst{31-30} = sz;
-  let Inst{29-27} = 0b111;
-  let Inst{26}    = V;
-  let Inst{25-24} = 0b00;
-  let Inst{23-22} = opc;
-  let Inst{21}    = 0b0;
-  let Inst{20-12} = offset;
-  let Inst{11-10} = 0b01;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodeSignedLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-// FIXME: Modeling the write-back of these instructions for isel is tricky.
-//        we need the complex addressing mode for the memory reference, but
-//        we also need the write-back specified as a tied operand to the
-//        base register. That combination does not play nicely with
-//        the asm matcher and friends.
-class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm>
-    : BaseLoadStorePostIdx<sz, V, opc,
-                      (outs regtype:$Rt/*, GPR64sp:$wback*/),
-                      (ins am_noindex:$addr, simm9:$idx),
-                      asm, ""/*"$addr.base = $wback"*/>,
-      Sched<[WriteLD, WriteI]>;
-
-let mayStore = 1, mayLoad = 0 in
-class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
-             string asm>
-    : BaseLoadStorePostIdx<sz, V, opc,
-                      (outs/* GPR64sp:$wback*/),
-                      (ins regtype:$Rt, am_noindex:$addr, simm9:$idx),
-                       asm, ""/*"$addr.base = $wback"*/>,
-    Sched<[WriteAdr, WriteST, ReadAdrBase]>;
-} // hasSideEffects = 0
-
-// ISel pseudo-instructions which have the tied operands. When the MC lowering
-// logic finally gets smart enough to strip off tied operands that are just
-// for isel convenience, we can get rid of these pseudos and just reference
-// the real instructions directly.
-//
-// Ironically, also because of the writeback operands, we can't put the
-// matcher pattern directly on the instruction, but need to define it
-// separately.
-multiclass StorePostIdxPseudo<RegisterClass regtype, ValueType Ty,
-                              SDPatternOperator OpNode, Instruction Insn> {
-  let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
-  def _isel: Pseudo<(outs GPR64sp:$wback),
-                    (ins regtype:$Rt, am_noindex:$addr, simm9:$idx), [],
-                    "$addr.base = $wback,@earlyclobber $wback">,
-      PseudoInstExpansion<(Insn regtype:$Rt, am_noindex:$addr, simm9:$idx)>,
-      Sched<[WriteAdr, WriteST, ReadAdrBase]>;
-
-  def : Pat<(OpNode (Ty regtype:$Rt), am_noindex:$addr, simm9:$idx),
-            (!cast<Instruction>(NAME#_isel) regtype:$Rt, am_noindex:$addr,
-                                            simm9:$idx)>;
-}
-
-//---
-// Load/store pair
-//---
-
-// (indexed, offset)
-
-class BaseLoadStorePairOffset<bits<2> opc, bit V, bit L, dag oops, dag iops,
-                              string asm>
-    : I<oops, iops, asm, "\t$Rt, $Rt2, $addr", "", []> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> dst2;
-  bits<5> base;
-  bits<7> offset;
-  let Inst{31-30} = opc;
-  let Inst{29-27} = 0b101;
-  let Inst{26}    = V;
-  let Inst{25-23} = 0b010;
-  let Inst{22}    = L;
-  let Inst{21-15} = offset;
-  let Inst{14-10} = dst2;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodePairLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-class LoadPairOffset<bits<2> opc, bit V, RegisterClass regtype,
-                     Operand indextype, string asm>
-    : BaseLoadStorePairOffset<opc, V, 1,
-                              (outs regtype:$Rt, regtype:$Rt2),
-                              (ins indextype:$addr), asm>,
-      Sched<[WriteLD, WriteLDHi]>;
-
-let mayLoad = 0, mayStore = 1 in
-class StorePairOffset<bits<2> opc, bit V, RegisterClass regtype,
-                      Operand indextype, string asm>
-    : BaseLoadStorePairOffset<opc, V, 0, (outs),
-                             (ins regtype:$Rt, regtype:$Rt2, indextype:$addr),
-                             asm>,
-      Sched<[WriteSTP]>;
-} // hasSideEffects = 0
-
-// (pre-indexed)
-
-def MemoryIndexed32SImm7 : AsmOperandClass {
-  let Name = "MemoryIndexed32SImm7";
-  let DiagnosticType = "InvalidMemoryIndexed32SImm7";
-}
-def am_indexed32simm7 : Operand<i32> { // ComplexPattern<...>
-  let PrintMethod = "printAMIndexed32";
-  let ParserMatchClass = MemoryIndexed32SImm7;
-  let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset);
-}
-
-def MemoryIndexed64SImm7 : AsmOperandClass {
-  let Name = "MemoryIndexed64SImm7";
-  let DiagnosticType = "InvalidMemoryIndexed64SImm7";
-}
-def am_indexed64simm7 : Operand<i32> { // ComplexPattern<...>
-  let PrintMethod = "printAMIndexed64";
-  let ParserMatchClass = MemoryIndexed64SImm7;
-  let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset);
-}
-
-def MemoryIndexed128SImm7 : AsmOperandClass {
-  let Name = "MemoryIndexed128SImm7";
-  let DiagnosticType = "InvalidMemoryIndexed128SImm7";
-}
-def am_indexed128simm7 : Operand<i32> { // ComplexPattern<...>
-  let PrintMethod = "printAMIndexed128";
-  let ParserMatchClass = MemoryIndexed128SImm7;
-  let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset);
-}
-
-class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
-                              string asm>
-    : I<oops, iops, asm, "\t$Rt, $Rt2, $addr!", "", []> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> dst2;
-  bits<5> base;
-  bits<7> offset;
-  let Inst{31-30} = opc;
-  let Inst{29-27} = 0b101;
-  let Inst{26}    = V;
-  let Inst{25-23} = 0b011;
-  let Inst{22}    = L;
-  let Inst{21-15} = offset;
-  let Inst{14-10} = dst2;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodePairLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-class LoadPairPreIdx<bits<2> opc, bit V, RegisterClass regtype,
-                     Operand addrmode, string asm>
-    : BaseLoadStorePairPreIdx<opc, V, 1,
-                              (outs regtype:$Rt, regtype:$Rt2),
-                              (ins addrmode:$addr), asm>,
-      Sched<[WriteLD, WriteLDHi, WriteAdr]>;
-
-let mayStore = 1, mayLoad = 0 in
-class StorePairPreIdx<bits<2> opc, bit V, RegisterClass regtype,
-                      Operand addrmode, string asm>
-    : BaseLoadStorePairPreIdx<opc, V, 0, (outs),
-                             (ins regtype:$Rt, regtype:$Rt2, addrmode:$addr),
-                             asm>,
-      Sched<[WriteAdr, WriteSTP]>;
-} // hasSideEffects = 0
-
-// (post-indexed)
-
-class BaseLoadStorePairPostIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
-                              string asm>
-    : I<oops, iops, asm, "\t$Rt, $Rt2, $addr, $idx", "", []> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> dst2;
-  bits<5> base;
-  bits<7> offset;
-  let Inst{31-30} = opc;
-  let Inst{29-27} = 0b101;
-  let Inst{26}    = V;
-  let Inst{25-23} = 0b001;
-  let Inst{22}    = L;
-  let Inst{21-15} = offset;
-  let Inst{14-10} = dst2;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodePairLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-class LoadPairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
-                      Operand idxtype, string asm>
-    : BaseLoadStorePairPostIdx<opc, V, 1,
-                              (outs regtype:$Rt, regtype:$Rt2),
-                              (ins am_noindex:$addr, idxtype:$idx), asm>,
-      Sched<[WriteLD, WriteLDHi, WriteAdr]>;
-
-let mayStore = 1, mayLoad = 0 in
-class StorePairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
-                       Operand idxtype, string asm>
-    : BaseLoadStorePairPostIdx<opc, V, 0, (outs),
-                             (ins regtype:$Rt, regtype:$Rt2,
-                                  am_noindex:$addr, idxtype:$idx),
-                             asm>,
-      Sched<[WriteAdr, WriteSTP]>;
-} // hasSideEffects = 0
-
-//  (no-allocate)
-
-class BaseLoadStorePairNoAlloc<bits<2> opc, bit V, bit L, dag oops, dag iops,
-                              string asm>
-    : I<oops, iops, asm, "\t$Rt, $Rt2, $addr", "", []> {
-  // The operands are in order to match the 'addr' MI operands, so we
-  // don't need an encoder method and by-name matching. Just use the default
-  // in-order handling. Since we're using by-order, make sure the names
-  // do not match.
-  bits<5> dst;
-  bits<5> dst2;
-  bits<5> base;
-  bits<7> offset;
-  let Inst{31-30} = opc;
-  let Inst{29-27} = 0b101;
-  let Inst{26}    = V;
-  let Inst{25-23} = 0b000;
-  let Inst{22}    = L;
-  let Inst{21-15} = offset;
-  let Inst{14-10} = dst2;
-  let Inst{9-5}   = base;
-  let Inst{4-0}   = dst;
-
-  let DecoderMethod = "DecodePairLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-class LoadPairNoAlloc<bits<2> opc, bit V, RegisterClass regtype,
-                     Operand indextype, string asm>
-    : BaseLoadStorePairNoAlloc<opc, V, 1,
-                              (outs regtype:$Rt, regtype:$Rt2),
-                              (ins indextype:$addr), asm>,
-      Sched<[WriteLD, WriteLDHi]>;
-
-let mayStore = 1, mayLoad = 0 in
-class StorePairNoAlloc<bits<2> opc, bit V, RegisterClass regtype,
-                      Operand indextype, string asm>
-    : BaseLoadStorePairNoAlloc<opc, V, 0, (outs),
-                             (ins regtype:$Rt, regtype:$Rt2, indextype:$addr),
-                             asm>,
-      Sched<[WriteSTP]>;
-} // hasSideEffects = 0
-
-//---
-// Load/store exclusive
-//---
-
-// True exclusive operations write to and/or read from the system's exclusive
-// monitors, which as far as a compiler is concerned can be modelled as a
-// random shared memory address. Hence LoadExclusive mayStore.
-let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
-class BaseLoadStoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
-                             dag oops, dag iops, string asm, string operands>
-    : I<oops, iops, asm, operands, "", []> {
-  let Inst{31-30} = sz;
-  let Inst{29-24} = 0b001000;
-  let Inst{23}    = o2;
-  let Inst{22}    = L;
-  let Inst{21}    = o1;
-  let Inst{15}    = o0;
-
-  let DecoderMethod = "DecodeExclusiveLdStInstruction";
-}
-
-// Neither Rs nor Rt2 operands.
-class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0,
-                               dag oops, dag iops, string asm, string operands>
-    : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> {
-  bits<5> reg;
-  bits<5> base;
-  let Inst{20-16} = 0b11111;
-  let Inst{14-10} = 0b11111;
-  let Inst{9-5} = base;
-  let Inst{4-0} = reg;
-}
-
-// Simple load acquires don't set the exclusive monitor
-let mayLoad = 1, mayStore = 0 in
-class LoadAcquire<bits<2> sz, bit o2, bit L, bit o1, bit o0,
-                  RegisterClass regtype, string asm>
-    : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt),
-                               (ins am_noindex:$addr), asm, "\t$Rt, $addr">,
-      Sched<[WriteLD]>;
-
-class LoadExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
-                    RegisterClass regtype, string asm>
-    : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt),
-                               (ins am_noindex:$addr), asm, "\t$Rt, $addr">,
-      Sched<[WriteLD]>;
-
-class LoadExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
-                       RegisterClass regtype, string asm>
-    : BaseLoadStoreExclusive<sz, o2, L, o1, o0,
-                             (outs regtype:$Rt, regtype:$Rt2),
-                             (ins am_noindex:$addr), asm,
-                             "\t$Rt, $Rt2, $addr">,
-      Sched<[WriteLD, WriteLDHi]> {
-  bits<5> dst1;
-  bits<5> dst2;
-  bits<5> base;
-  let Inst{20-16} = 0b11111;
-  let Inst{14-10} = dst2;
-  let Inst{9-5} = base;
-  let Inst{4-0} = dst1;
-}
-
-// Simple store release operations do not check the exclusive monitor.
-let mayLoad = 0, mayStore = 1 in
-class StoreRelease<bits<2> sz, bit o2, bit L, bit o1, bit o0,
-                   RegisterClass regtype, string asm>
-    : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs),
-                               (ins regtype:$Rt, am_noindex:$addr),
-                               asm, "\t$Rt, $addr">,
-      Sched<[WriteST]>;
-
-let mayLoad = 1, mayStore = 1 in
-class StoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
-                     RegisterClass regtype, string asm>
-    : BaseLoadStoreExclusive<sz, o2, L, o1, o0, (outs GPR32:$Ws),
-                             (ins regtype:$Rt, am_noindex:$addr),
-                             asm, "\t$Ws, $Rt, $addr">,
-      Sched<[WriteSTX]> {
-  bits<5> status;
-  bits<5> reg;
-  bits<5> base;
-  let Inst{20-16} = status;
-  let Inst{14-10} = 0b11111;
-  let Inst{9-5} = base;
-  let Inst{4-0} = reg;
-
-  let Constraints = "@earlyclobber $Ws";
-}
-
-class StoreExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
-                         RegisterClass regtype, string asm>
-    : BaseLoadStoreExclusive<sz, o2, L, o1, o0,
-                             (outs GPR32:$Ws),
-                             (ins regtype:$Rt, regtype:$Rt2, am_noindex:$addr),
-                              asm, "\t$Ws, $Rt, $Rt2, $addr">,
-      Sched<[WriteSTX]> {
-  bits<5> status;
-  bits<5> dst1;
-  bits<5> dst2;
-  bits<5> base;
-  let Inst{20-16} = status;
-  let Inst{14-10} = dst2;
-  let Inst{9-5} = base;
-  let Inst{4-0} = dst1;
-
-  let Constraints = "@earlyclobber $Ws";
-}
-
-//---
-// Exception generation
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
-    : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>,
-      Sched<[WriteSys]> {
-  bits<16> imm;
-  let Inst{31-24} = 0b11010100;
-  let Inst{23-21} = op1;
-  let Inst{20-5}  = imm;
-  let Inst{4-2}   = 0b000;
-  let Inst{1-0}   = ll;
-}
-
-//---
-// Floating point to integer conversion
-//---
-
-class BaseFPToIntegerUnscaled<bits<2> type, bits<2> rmode, bits<3> opcode,
-                      RegisterClass srcType, RegisterClass dstType,
-                      string asm, list<dag> pattern>
-    : I<(outs dstType:$Rd), (ins srcType:$Rn),
-         asm, "\t$Rd, $Rn", "", pattern>,
-      Sched<[WriteFCvt]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{30}    = 0;
-  let Inst{28-24} = 0b11110;
-  let Inst{23-22} = type;
-  let Inst{21}    = 1;
-  let Inst{20-19} = rmode;
-  let Inst{18-16} = opcode;
-  let Inst{15-10} = 0;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode,
-                      RegisterClass srcType, RegisterClass dstType,
-                      Operand immType, string asm>
-    : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
-         asm, "\t$Rd, $Rn, $scale", "", []>,
-      Sched<[WriteFCvt]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<6> scale;
-  let Inst{30}    = 0;
-  let Inst{28-24} = 0b11110;
-  let Inst{23-22} = type;
-  let Inst{21}    = 0;
-  let Inst{20-19} = rmode;
-  let Inst{18-16} = opcode;
-  let Inst{15-10} = scale;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass FPToInteger<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator OpN> {
-  // Unscaled single-precision to 32-bit
-  def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm,
-                                     [(set GPR32:$Rd, (OpN FPR32:$Rn))]> {
-    let Inst{31} = 0; // 32-bit GPR flag
-  }
-
-  // Unscaled single-precision to 64-bit
-  def UXSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR64, asm,
-                                     [(set GPR64:$Rd, (OpN FPR32:$Rn))]> {
-    let Inst{31} = 1; // 64-bit GPR flag
-  }
-
-  // Unscaled double-precision to 32-bit
-  def UWDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR32, asm,
-                                     [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn)))]> {
-    let Inst{31} = 0; // 32-bit GPR flag
-  }
-
-  // Unscaled double-precision to 64-bit
-  def UXDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR64, asm,
-                                     [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn)))]> {
-    let Inst{31} = 1; // 64-bit GPR flag
-  }
-
-  // Scaled single-precision to 32-bit
-  def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32,
-                              fixedpoint32, asm> {
-    let Inst{31} = 0; // 32-bit GPR flag
-  }
-
-  // Scaled single-precision to 64-bit
-  def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64,
-                              fixedpoint64, asm> {
-    let Inst{31} = 1; // 64-bit GPR flag
-  }
-
-  // Scaled double-precision to 32-bit
-  def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32,
-                              fixedpoint32, asm> {
-    let Inst{31} = 0; // 32-bit GPR flag
-  }
-
-  // Scaled double-precision to 64-bit
-  def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64,
-                              fixedpoint64, asm> {
-    let Inst{31} = 1; // 64-bit GPR flag
-  }
-}
-
-//---
-// Integer to floating point conversion
-//---
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseIntegerToFP<bit isUnsigned,
-                      RegisterClass srcType, RegisterClass dstType,
-                      Operand immType, string asm>
-    : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
-         asm, "\t$Rd, $Rn, $scale", "", []>,
-      Sched<[WriteFCvt]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<6> scale;
-  let Inst{30-23} = 0b00111100;
-  let Inst{21-17} = 0b00001;
-  let Inst{16}    = isUnsigned;
-  let Inst{15-10} = scale;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-class BaseIntegerToFPUnscaled<bit isUnsigned,
-                      RegisterClass srcType, RegisterClass dstType,
-                      ValueType dvt, string asm, SDNode node>
-    : I<(outs dstType:$Rd), (ins srcType:$Rn),
-         asm, "\t$Rd, $Rn", "", [(set (dvt dstType:$Rd), (node srcType:$Rn))]>,
-      Sched<[WriteFCvt]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<6> scale;
-  let Inst{30-23} = 0b00111100;
-  let Inst{21-17} = 0b10001;
-  let Inst{16}    = isUnsigned;
-  let Inst{15-10} = 0b000000;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
-  // Unscaled
-  def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
-    let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
-  }
-
-  def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
-    let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
-  }
-
-  def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
-    let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
-  }
-
-  def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
-    let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
-  }
-
-  // Scaled
-  def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint32, asm> {
-    let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
-  }
-
-  def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint32, asm> {
-    let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
-  }
-
-  def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint64, asm> {
-    let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
-  }
-
-  def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint64, asm> {
-    let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
-  }
-}
-
-//---
-// Unscaled integer <-> floating point conversion (i.e. FMOV)
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseUnscaledConversion<bits<2> rmode, bits<3> opcode,
-                      RegisterClass srcType, RegisterClass dstType,
-                      string asm>
-    : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "",
-        // We use COPY_TO_REGCLASS for these bitconvert operations.
-        // copyPhysReg() expands the resultant COPY instructions after
-        // regalloc is done. This gives greater freedom for the allocator
-        // and related passes (coalescing, copy propagation, et. al.) to
-        // be more effective.
-        [/*(set (dvt dstType:$Rd), (bitconvert (svt srcType:$Rn)))*/]>,
-      Sched<[WriteFCopy]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{30-23} = 0b00111100;
-  let Inst{21}    = 1;
-  let Inst{20-19} = rmode;
-  let Inst{18-16} = opcode;
-  let Inst{15-10} = 0b000000;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseUnscaledConversionToHigh<bits<2> rmode, bits<3> opcode,
-                     RegisterClass srcType, RegisterOperand dstType, string asm>
-    : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd[1], $Rn", "", []>,
-      Sched<[WriteFCopy]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{30-23} = 0b00111101;
-  let Inst{21}    = 1;
-  let Inst{20-19} = rmode;
-  let Inst{18-16} = opcode;
-  let Inst{15-10} = 0b000000;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseUnscaledConversionFromHigh<bits<2> rmode, bits<3> opcode,
-                     RegisterOperand srcType, RegisterClass dstType, string asm>
-    : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn[1]", "", []>,
-      Sched<[WriteFCopy]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{30-23} = 0b00111101;
-  let Inst{21}    = 1;
-  let Inst{20-19} = rmode;
-  let Inst{18-16} = opcode;
-  let Inst{15-10} = 0b000000;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-
-
-multiclass UnscaledConversion<string asm> {
-  def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> {
-    let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
-  }
-
-  def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> {
-    let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
-  }
-
-  def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> {
-    let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
-  }
-
-  def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> {
-    let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
-  }
-
-  def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128,
-                                             asm#".d"> {
-    let Inst{31} = 1;
-    let Inst{22} = 0;
-  }
-
-  def DXHighr : BaseUnscaledConversionFromHigh<0b01, 0b110, V128, GPR64,
-                                               asm#".d"> {
-    let Inst{31} = 1;
-    let Inst{22} = 0;
-  }
-
-  def : InstAlias<asm#"$Vd.d[1], $Rn",
-                  (!cast<Instruction>(NAME#XDHighr) V128:$Vd, GPR64:$Rn), 0>;
-  def : InstAlias<asm#"$Rd, $Vn.d[1]",
-                  (!cast<Instruction>(NAME#DXHighr) GPR64:$Rd, V128:$Vn), 0>;
-}
-
-//---
-// Floating point conversion
-//---
-
-class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType,
-                       RegisterClass srcType, string asm, list<dag> pattern>
-    : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>,
-      Sched<[WriteFCvt]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31-24} = 0b00011110;
-  let Inst{23-22} = type;
-  let Inst{21-17} = 0b10001;
-  let Inst{16-15} = opcode;
-  let Inst{14-10} = 0b10000;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass FPConversion<string asm> {
-  // Double-precision to Half-precision
-  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-  def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, []>;
-
-  // Double-precision to Single-precision
-  def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm,
-                             [(set FPR32:$Rd, (fround FPR64:$Rn))]>;
-
-  // Half-precision to Double-precision
-  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-  def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, []>;
-
-  // Half-precision to Single-precision
-  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-  def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, []>;
-
-  // Single-precision to Double-precision
-  def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm,
-                             [(set FPR64:$Rd, (fextend FPR32:$Rn))]>;
-
-  // Single-precision to Half-precision
-  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-  def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, []>;
-}
-
-//---
-// Single operand floating point data processing
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
-                              ValueType vt, string asm, SDPatternOperator node>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
-         [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>,
-      Sched<[WriteF]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31-23} = 0b000111100;
-  let Inst{21-19} = 0b100;
-  let Inst{18-15} = opcode;
-  let Inst{14-10} = 0b10000;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SingleOperandFPData<bits<4> opcode, string asm,
-                               SDPatternOperator node = null_frag> {
-  def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
-    let Inst{22} = 0; // 32-bit size flag
-  }
-
-  def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
-    let Inst{22} = 1; // 64-bit size flag
-  }
-}
-
-//---
-// Two operand floating point data processing
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
-                           string asm, list<dag> pat>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
-         asm, "\t$Rd, $Rn, $Rm", "", pat>,
-      Sched<[WriteF]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{31-23} = 0b000111100;
-  let Inst{21}    = 1;
-  let Inst{20-16} = Rm;
-  let Inst{15-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass TwoOperandFPData<bits<4> opcode, string asm,
-                            SDPatternOperator node = null_frag> {
-  def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
-                         [(set (f32 FPR32:$Rd),
-                               (node (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]> {
-    let Inst{22} = 0; // 32-bit size flag
-  }
-
-  def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
-                         [(set (f64 FPR64:$Rd),
-                               (node (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]> {
-    let Inst{22} = 1; // 64-bit size flag
-  }
-}
-
-multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> {
-  def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
-                  [(set FPR32:$Rd, (fneg (node FPR32:$Rn, (f32 FPR32:$Rm))))]> {
-    let Inst{22} = 0; // 32-bit size flag
-  }
-
-  def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
-                  [(set FPR64:$Rd, (fneg (node FPR64:$Rn, (f64 FPR64:$Rm))))]> {
-    let Inst{22} = 1; // 64-bit size flag
-  }
-}
-
-
-//---
-// Three operand floating point data processing
-//---
-
-class BaseThreeOperandFPData<bit isNegated, bit isSub,
-                             RegisterClass regtype, string asm, list<dag> pat>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra),
-         asm, "\t$Rd, $Rn, $Rm, $Ra", "", pat>,
-      Sched<[WriteFMul]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<5> Ra;
-  let Inst{31-23} = 0b000111110;
-  let Inst{21}    = isNegated;
-  let Inst{20-16} = Rm;
-  let Inst{15}    = isSub;
-  let Inst{14-10} = Ra;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm,
-                              SDPatternOperator node> {
-  def Srrr : BaseThreeOperandFPData<isNegated, isSub, FPR32, asm,
-            [(set FPR32:$Rd,
-                  (node (f32 FPR32:$Rn), (f32 FPR32:$Rm), (f32 FPR32:$Ra)))]> {
-    let Inst{22} = 0; // 32-bit size flag
-  }
-
-  def Drrr : BaseThreeOperandFPData<isNegated, isSub, FPR64, asm,
-            [(set FPR64:$Rd,
-                  (node (f64 FPR64:$Rn), (f64 FPR64:$Rm), (f64 FPR64:$Ra)))]> {
-    let Inst{22} = 1; // 64-bit size flag
-  }
-}
-
-//---
-// Floating point data comparisons
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseOneOperandFPComparison<bit signalAllNans,
-                                 RegisterClass regtype, string asm,
-                                 list<dag> pat>
-    : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>,
-      Sched<[WriteFCmp]> {
-  bits<5> Rn;
-  let Inst{31-23} = 0b000111100;
-  let Inst{21}    = 1;
-
-  let Inst{20-16} = 0b00000;
-  let Inst{15-10} = 0b001000;
-  let Inst{9-5}   = Rn;
-  let Inst{4}     = signalAllNans;
-  let Inst{3-0}   = 0b1000;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
-                                string asm, list<dag> pat>
-    : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>,
-      Sched<[WriteFCmp]> {
-  bits<5> Rm;
-  bits<5> Rn;
-  let Inst{31-23} = 0b000111100;
-  let Inst{21}    = 1;
-  let Inst{20-16} = Rm;
-  let Inst{15-10} = 0b001000;
-  let Inst{9-5}   = Rn;
-  let Inst{4}     = signalAllNans;
-  let Inst{3-0}   = 0b0000;
-}
-
-multiclass FPComparison<bit signalAllNans, string asm,
-                        SDPatternOperator OpNode = null_frag> {
-  let Defs = [CPSR] in {
-  def Srr : BaseTwoOperandFPComparison<signalAllNans, FPR32, asm,
-      [(OpNode FPR32:$Rn, (f32 FPR32:$Rm)), (implicit CPSR)]> {
-    let Inst{22} = 0;
-  }
-
-  def Sri : BaseOneOperandFPComparison<signalAllNans, FPR32, asm,
-      [(OpNode (f32 FPR32:$Rn), fpimm0), (implicit CPSR)]> {
-    let Inst{22} = 0;
-  }
-
-  def Drr : BaseTwoOperandFPComparison<signalAllNans, FPR64, asm,
-      [(OpNode FPR64:$Rn, (f64 FPR64:$Rm)), (implicit CPSR)]> {
-    let Inst{22} = 1;
-  }
-
-  def Dri : BaseOneOperandFPComparison<signalAllNans, FPR64, asm,
-      [(OpNode (f64 FPR64:$Rn), fpimm0), (implicit CPSR)]> {
-    let Inst{22} = 1;
-  }
-  } // Defs = [CPSR]
-}
-
-//---
-// Floating point conditional comparisons
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseFPCondComparison<bit signalAllNans,
-                              RegisterClass regtype, string asm>
-    : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
-         asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
-      Sched<[WriteFCmp]> {
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<4> nzcv;
-  bits<4> cond;
-
-  let Inst{31-23} = 0b000111100;
-  let Inst{21}    = 1;
-  let Inst{20-16} = Rm;
-  let Inst{15-12} = cond;
-  let Inst{11-10} = 0b01;
-  let Inst{9-5}   = Rn;
-  let Inst{4}     = signalAllNans;
-  let Inst{3-0}   = nzcv;
-}
-
-multiclass FPCondComparison<bit signalAllNans, string asm> {
-  let Defs = [CPSR], Uses = [CPSR] in {
-  def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> {
-    let Inst{22} = 0;
-  }
-
-  def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> {
-    let Inst{22} = 1;
-  }
-  } // Defs = [CPSR], Uses = [CPSR]
-}
-
-//---
-// Floating point conditional select
-//---
-
-class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
-    : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
-         asm, "\t$Rd, $Rn, $Rm, $cond", "",
-         [(set regtype:$Rd,
-               (ARM64csel (vt regtype:$Rn), regtype:$Rm,
-                          (i32 imm:$cond), CPSR))]>,
-      Sched<[WriteF]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<4> cond;
-
-  let Inst{31-23} = 0b000111100;
-  let Inst{21}    = 1;
-  let Inst{20-16} = Rm;
-  let Inst{15-12} = cond;
-  let Inst{11-10} = 0b11;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass FPCondSelect<string asm> {
-  let Uses = [CPSR] in {
-  def Srrr : BaseFPCondSelect<FPR32, f32, asm> {
-    let Inst{22} = 0;
-  }
-
-  def Drrr : BaseFPCondSelect<FPR64, f64, asm> {
-    let Inst{22} = 1;
-  }
-  } // Uses = [CPSR]
-}
-
-//---
-// Floating move immediate
-//---
-
-class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm>
-  : I<(outs regtype:$Rd), (ins fpimmtype:$imm), asm, "\t$Rd, $imm", "",
-      [(set regtype:$Rd, fpimmtype:$imm)]>,
-    Sched<[WriteFImm]> {
-  bits<5> Rd;
-  bits<8> imm;
-  let Inst{31-23} = 0b000111100;
-  let Inst{21}    = 1;
-  let Inst{20-13} = imm;
-  let Inst{12-5}  = 0b10000000;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass FPMoveImmediate<string asm> {
-  def Si : BaseFPMoveImmediate<FPR32, fpimm32, asm> {
-    let Inst{22} = 0;
-  }
-
-  def Di : BaseFPMoveImmediate<FPR64, fpimm64, asm> {
-    let Inst{22} = 1;
-  }
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD
-//----------------------------------------------------------------------------
-
-def VectorIndexBOperand : AsmOperandClass { let Name = "VectorIndexB"; }
-def VectorIndexHOperand : AsmOperandClass { let Name = "VectorIndexH"; }
-def VectorIndexSOperand : AsmOperandClass { let Name = "VectorIndexS"; }
-def VectorIndexDOperand : AsmOperandClass { let Name = "VectorIndexD"; }
-def VectorIndexB : Operand<i64>, ImmLeaf<i64, [{
-  return ((uint64_t)Imm) < 16;
-}]> {
-  let ParserMatchClass = VectorIndexBOperand;
-  let PrintMethod = "printVectorIndex";
-  let MIOperandInfo = (ops i64imm);
-}
-def VectorIndexH : Operand<i64>, ImmLeaf<i64, [{
-  return ((uint64_t)Imm) < 8;
-}]> {
-  let ParserMatchClass = VectorIndexHOperand;
-  let PrintMethod = "printVectorIndex";
-  let MIOperandInfo = (ops i64imm);
-}
-def VectorIndexS : Operand<i64>, ImmLeaf<i64, [{
-  return ((uint64_t)Imm) < 4;
-}]> {
-  let ParserMatchClass = VectorIndexSOperand;
-  let PrintMethod = "printVectorIndex";
-  let MIOperandInfo = (ops i64imm);
-}
-def VectorIndexD : Operand<i64>, ImmLeaf<i64, [{
-  return ((uint64_t)Imm) < 2;
-}]> {
-  let ParserMatchClass = VectorIndexDOperand;
-  let PrintMethod = "printVectorIndex";
-  let MIOperandInfo = (ops i64imm);
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD three register vector instructions
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
-                        RegisterOperand regtype, string asm, string kind,
-                        list<dag> pattern>
-  : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
-      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
-      "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21}    = 1;
-  let Inst{20-16} = Rm;
-  let Inst{15-11} = opcode;
-  let Inst{10}    = 1;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
-                        RegisterOperand regtype, string asm, string kind,
-                        list<dag> pattern>
-  : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
-      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
-      "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21}    = 1;
-  let Inst{20-16} = Rm;
-  let Inst{15-11} = opcode;
-  let Inst{10}    = 1;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-// All operand sizes distinguished in the encoding.
-multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
-                               SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
-                                      asm, ".8b",
-         [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
-                                      asm, ".16b",
-         [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
-  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
-                                      asm, ".4h",
-         [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
-                                      asm, ".8h",
-         [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
-  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
-                                      asm, ".2s",
-         [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
-                                      asm, ".4s",
-         [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
-  def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128,
-                                      asm, ".2d",
-         [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
-}
-
-// As above, but D sized elements unsupported.
-multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
-                                  SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
-                                      asm, ".8b",
-        [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>;
-  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
-                                      asm, ".16b",
-        [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>;
-  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
-                                      asm, ".4h",
-        [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>;
-  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
-                                      asm, ".8h",
-        [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>;
-  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
-                                      asm, ".2s",
-        [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>;
-  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
-                                      asm, ".4s",
-        [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>;
-}
-
-multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
-                                  SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64,
-                                      asm, ".8b",
-      [(set (v8i8 V64:$dst),
-            (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128,
-                                      asm, ".16b",
-      [(set (v16i8 V128:$dst),
-            (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
-  def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64,
-                                      asm, ".4h",
-      [(set (v4i16 V64:$dst),
-            (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128,
-                                      asm, ".8h",
-      [(set (v8i16 V128:$dst),
-            (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
-  def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64,
-                                      asm, ".2s",
-      [(set (v2i32 V64:$dst),
-            (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128,
-                                      asm, ".4s",
-      [(set (v4i32 V128:$dst),
-            (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
-}
-
-// As above, but only B sized elements supported.
-multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm,
-                                SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
-                                      asm, ".8b",
-    [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
-                                      asm, ".16b",
-    [(set (v16i8 V128:$Rd),
-          (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
-}
-
-// As above, but only S and D sized floating point elements supported.
-multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<5> opc,
-                                 string asm, SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
-                                      asm, ".2s",
-        [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
-  def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
-                                      asm, ".4s",
-        [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
-  def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
-                                      asm, ".2d",
-        [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
-}
-
-multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<5> opc,
-                                    string asm,
-                                    SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
-                                      asm, ".2s",
-        [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
-  def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
-                                      asm, ".4s",
-        [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
-  def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
-                                      asm, ".2d",
-        [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
-}
-
-multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
-                                 string asm, SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64,
-                                      asm, ".2s",
-     [(set (v2f32 V64:$dst),
-           (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
-  def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128,
-                                      asm, ".4s",
-     [(set (v4f32 V128:$dst),
-           (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
-  def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128,
-                                      asm, ".2d",
-     [(set (v2f64 V128:$dst),
-           (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
-}
-
-// As above, but D and B sized elements unsupported.
-multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
-                                SDPatternOperator OpNode> {
-  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
-                                      asm, ".4h",
-        [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
-                                      asm, ".8h",
-        [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
-  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
-                                      asm, ".2s",
-        [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
-                                      asm, ".4s",
-        [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
-}
-
-// Logical three vector ops share opcode bits, and only use B sized elements.
-multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
-                                  SDPatternOperator OpNode = null_frag> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64,
-                                     asm, ".8b",
-                         [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>;
-  def v16i8  : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128,
-                                     asm, ".16b",
-                         [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>;
-
-  def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
-          (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
-  def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
-          (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
-  def : Pat<(v1i64 (OpNode V64:$LHS, V64:$RHS)),
-          (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
-
-  def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
-      (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
-  def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
-      (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
-  def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
-      (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
-}
-
-multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
-                                  string asm, SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64,
-                                     asm, ".8b",
-             [(set (v8i8 V64:$dst),
-                   (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8  : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128,
-                                     asm, ".16b",
-             [(set (v16i8 V128:$dst),
-                   (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
-                           (v16i8 V128:$Rm)))]>;
-
-  def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS),
-                           (v4i16 V64:$RHS))),
-          (!cast<Instruction>(NAME#"v8i8")
-            V64:$LHS, V64:$MHS, V64:$RHS)>;
-  def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS),
-                           (v2i32 V64:$RHS))),
-          (!cast<Instruction>(NAME#"v8i8")
-            V64:$LHS, V64:$MHS, V64:$RHS)>;
-  def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS),
-                           (v1i64 V64:$RHS))),
-          (!cast<Instruction>(NAME#"v8i8")
-            V64:$LHS, V64:$MHS, V64:$RHS)>;
-
-  def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS),
-                           (v8i16 V128:$RHS))),
-      (!cast<Instruction>(NAME#"v16i8")
-        V128:$LHS, V128:$MHS, V128:$RHS)>;
-  def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS),
-                           (v4i32 V128:$RHS))),
-      (!cast<Instruction>(NAME#"v16i8")
-        V128:$LHS, V128:$MHS, V128:$RHS)>;
-  def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS),
-                           (v2i64 V128:$RHS))),
-      (!cast<Instruction>(NAME#"v16i8")
-        V128:$LHS, V128:$MHS, V128:$RHS)>;
-}
-
-
-//----------------------------------------------------------------------------
-// AdvSIMD two register vector instructions.
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
-                        RegisterOperand regtype, string asm, string dstkind,
-                        string srckind, list<dag> pattern>
-  : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
-      "{\t$Rd" # dstkind # ", $Rn" # srckind #
-      "|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
-                            RegisterOperand regtype, string asm, string dstkind,
-                            string srckind, list<dag> pattern>
-  : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
-      "{\t$Rd" # dstkind # ", $Rn" # srckind #
-      "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-// Supports B, H, and S element sizes.
-multiclass SIMDTwoVectorBHS<bit U, bits<5> opc, string asm,
-                            SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
-                                      asm, ".8b", ".8b",
-                          [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
-                                      asm, ".16b", ".16b",
-                          [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
-                                      asm, ".4h", ".4h",
-                          [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
-  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
-                                      asm, ".8h", ".8h",
-                          [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
-  def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
-                                      asm, ".2s", ".2s",
-                          [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
-                                      asm, ".4s", ".4s",
-                          [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-}
-
-class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
-                            RegisterOperand regtype, string asm, string dstkind,
-                            string srckind, string amount>
-  : I<(outs V128:$Rd), (ins regtype:$Rn), asm,
-      "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount #
-      "|" # dstkind # "\t$Rd, $Rn, #" #  amount # "}", "", []>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29-24} = 0b101110;
-  let Inst{23-22} = size;
-  let Inst{21-10} = 0b100001001110;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SIMDVectorLShiftLongBySizeBHS {
-  let neverHasSideEffects = 1 in {
-  def v8i8  : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64,
-                                             "shll", ".8h",  ".8b", "8">;
-  def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128,
-                                             "shll2", ".8h", ".16b", "8">;
-  def v4i16 : BaseSIMDVectorLShiftLongBySize<0, 0b01, V64,
-                                             "shll", ".4s",  ".4h", "16">;
-  def v8i16 : BaseSIMDVectorLShiftLongBySize<1, 0b01, V128,
-                                             "shll2", ".4s", ".8h", "16">;
-  def v2i32 : BaseSIMDVectorLShiftLongBySize<0, 0b10, V64,
-                                             "shll", ".2d",  ".2s", "32">;
-  def v4i32 : BaseSIMDVectorLShiftLongBySize<1, 0b10, V128,
-                                             "shll2", ".2d", ".4s", "32">;
-  }
-}
-
-// Supports all element sizes.
-multiclass SIMDLongTwoVector<bit U, bits<5> opc, string asm,
-                             SDPatternOperator OpNode> {
-  def v8i8_v4i16  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
-                                      asm, ".4h", ".8b",
-               [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
-                                      asm, ".8h", ".16b",
-               [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-  def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
-                                      asm, ".2s", ".4h",
-               [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
-  def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
-                                      asm, ".4s", ".8h",
-               [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
-  def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
-                                      asm, ".1d", ".2s",
-               [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
-                                      asm, ".2d", ".4s",
-               [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-}
-
-multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
-                                 SDPatternOperator OpNode> {
-  def v8i8_v4i16  : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
-                                          asm, ".4h", ".8b",
-      [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd),
-                                      (v8i8 V64:$Rn)))]>;
-  def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
-                                          asm, ".8h", ".16b",
-      [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd),
-                                      (v16i8 V128:$Rn)))]>;
-  def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
-                                          asm, ".2s", ".4h",
-      [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd),
-                                      (v4i16 V64:$Rn)))]>;
-  def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
-                                          asm, ".4s", ".8h",
-      [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd),
-                                      (v8i16 V128:$Rn)))]>;
-  def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
-                                          asm, ".1d", ".2s",
-      [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd),
-                                      (v2i32 V64:$Rn)))]>;
-  def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
-                                          asm, ".2d", ".4s",
-      [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd),
-                                      (v4i32 V128:$Rn)))]>;
-}
-
-// Supports all element sizes, except 1xD.
-multiclass SIMDTwoVectorBHSDTied<bit U, bits<5> opc, string asm,
-                                  SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
-                                    asm, ".8b", ".8b",
-    [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
-                                    asm, ".16b", ".16b",
-    [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
-  def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
-                                    asm, ".4h", ".4h",
-    [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>;
-  def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
-                                    asm, ".8h", ".8h",
-    [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>;
-  def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
-                                    asm, ".2s", ".2s",
-    [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
-                                    asm, ".4s", ".4s",
-    [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
-  def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128,
-                                    asm, ".2d", ".2d",
-    [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>;
-}
-
-multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
-                             SDPatternOperator OpNode = null_frag> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
-                                asm, ".8b", ".8b",
-    [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
-                                asm, ".16b", ".16b",
-    [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
-                                asm, ".4h", ".4h",
-    [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
-  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
-                                asm, ".8h", ".8h",
-    [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
-  def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
-                                asm, ".2s", ".2s",
-    [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
-                                asm, ".4s", ".4s",
-    [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-  def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128,
-                                asm, ".2d", ".2d",
-    [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
-}
-
-
-// Supports only B element sizes.
-multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
-                          SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, size, opc, V64,
-                                asm, ".8b", ".8b",
-                    [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128,
-                                asm, ".16b", ".16b",
-                    [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-
-}
-
-// Supports only B and H element sizes.
-multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
-                                SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
-                                asm, ".8b", ".8b",
-                    [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
-                                asm, ".16b", ".16b",
-                    [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>;
-  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
-                                asm, ".4h", ".4h",
-                    [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>;
-  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
-                                asm, ".8h", ".8h",
-                    [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
-}
-
-// Supports only S and D element sizes, uses high bit of the size field
-// as an extra opcode bit.
-multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
-                           SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
-                                asm, ".2s", ".2s",
-                          [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
-                                asm, ".4s", ".4s",
-                          [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
-                                asm, ".2d", ".2d",
-                          [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
-}
-
-// Supports only S element size.
-multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
-                           SDPatternOperator OpNode> {
-  def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
-                                asm, ".2s", ".2s",
-                          [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
-                                asm, ".4s", ".4s",
-                          [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-}
-
-
-multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm,
-                           SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
-                                asm, ".2s", ".2s",
-                          [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
-                                asm, ".4s", ".4s",
-                          [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
-                                asm, ".2d", ".2d",
-                          [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
-}
-
-multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
-                           SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
-                                asm, ".2s", ".2s",
-                          [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
-                                asm, ".4s", ".4s",
-                          [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
-                                asm, ".2d", ".2d",
-                          [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
-}
-
-
-class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
-                           RegisterOperand inreg, RegisterOperand outreg,
-                           string asm, string outkind, string inkind,
-                           list<dag> pattern>
-  : I<(outs outreg:$Rd), (ins inreg:$Rn), asm,
-      "{\t$Rd" # outkind # ", $Rn" # inkind #
-      "|" # outkind # "\t$Rd, $Rn}", "", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
-                           RegisterOperand inreg, RegisterOperand outreg,
-                           string asm, string outkind, string inkind,
-                           list<dag> pattern>
-  : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm,
-      "{\t$Rd" # outkind # ", $Rn" # inkind #
-      "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
-                              SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDMixedTwoVector<0, U, 0b00, opc, V128, V64,
-                                      asm, ".8b", ".8h",
-        [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
-  def v16i8 : BaseSIMDMixedTwoVectorTied<1, U, 0b00, opc, V128, V128,
-                                      asm#"2", ".16b", ".8h", []>;
-  def v4i16 : BaseSIMDMixedTwoVector<0, U, 0b01, opc, V128, V64,
-                                      asm, ".4h", ".4s",
-        [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-  def v8i16 : BaseSIMDMixedTwoVectorTied<1, U, 0b01, opc, V128, V128,
-                                      asm#"2", ".8h", ".4s", []>;
-  def v2i32 : BaseSIMDMixedTwoVector<0, U, 0b10, opc, V128, V64,
-                                      asm, ".2s", ".2d",
-        [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
-  def v4i32 : BaseSIMDMixedTwoVectorTied<1, U, 0b10, opc, V128, V128,
-                                      asm#"2", ".4s", ".2d", []>;
-
-  def : Pat<(concat_vectors (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn))),
-            (!cast<Instruction>(NAME # "v16i8")
-                (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-  def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn))),
-            (!cast<Instruction>(NAME # "v8i16")
-                (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-  def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn))),
-            (!cast<Instruction>(NAME # "v4i32")
-                (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-}
-
-class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
-                           RegisterOperand regtype, string asm, string kind,
-                           ValueType dty, ValueType sty, SDNode OpNode>
-  : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
-      "{\t$Rd" # kind # ", $Rn" # kind # ", #0" #
-      "|" # kind # "\t$Rd, $Rn, #0}", "",
-      [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-// Comparisons support all element sizes, except 1xD.
-multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
-                            SDNode OpNode> {
-  def v8i8rz  : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64,
-                                     asm, ".8b",
-                                     v8i8, v8i8, OpNode>;
-  def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128,
-                                     asm, ".16b",
-                                     v16i8, v16i8, OpNode>;
-  def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64,
-                                     asm, ".4h",
-                                     v4i16, v4i16, OpNode>;
-  def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128,
-                                     asm, ".8h",
-                                     v8i16, v8i16, OpNode>;
-  def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64,
-                                     asm, ".2s",
-                                     v2i32, v2i32, OpNode>;
-  def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128,
-                                     asm, ".4s",
-                                     v4i32, v4i32, OpNode>;
-  def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128,
-                                     asm, ".2d",
-                                     v2i64, v2i64, OpNode>;
-}
-
-// FP Comparisons support only S and D element sizes.
-multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
-                              string asm, SDNode OpNode> {
-  def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64,
-                                     asm, ".2s",
-                                     v2i32, v2f32, OpNode>;
-  def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128,
-                                     asm, ".4s",
-                                     v4i32, v4f32, OpNode>;
-  def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128,
-                                     asm, ".2d",
-                                     v2i64, v2f64, OpNode>;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
-                             RegisterOperand outtype, RegisterOperand intype,
-                             string asm, string VdTy, string VnTy,
-                             list<dag> pattern>
-  : I<(outs outtype:$Rd), (ins intype:$Rn), asm,
-      !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
-                             RegisterOperand outtype, RegisterOperand intype,
-                             string asm, string VdTy, string VnTy,
-                             list<dag> pattern>
-  : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm,
-      !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SIMDFPWidenTwoVector<bit U, bit S, bits<5> opc, string asm> {
-  def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V128, V64,
-                                    asm, ".4s", ".4h", []>;
-  def v8i16 : BaseSIMDFPCvtTwoVector<1, U, {S,0}, opc, V128, V128,
-                                    asm#"2", ".4s", ".8h", []>;
-  def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V128, V64,
-                                    asm, ".2d", ".2s", []>;
-  def v4i32 : BaseSIMDFPCvtTwoVector<1, U, {S,1}, opc, V128, V128,
-                                    asm#"2", ".2d", ".4s", []>;
-}
-
-multiclass SIMDFPNarrowTwoVector<bit U, bit S, bits<5> opc, string asm> {
-  def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V64, V128,
-                                    asm, ".4h", ".4s", []>;
-  def v8i16 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,0}, opc, V128, V128,
-                                    asm#"2", ".8h", ".4s", []>;
-  def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128,
-                                    asm, ".2s", ".2d", []>;
-  def v4i32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128,
-                                    asm#"2", ".4s", ".2d", []>;
-}
-
-multiclass SIMDFPInexactCvtTwoVector<bit U, bit S, bits<5> opc, string asm,
-                                     Intrinsic OpNode> {
-  def v2f32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128,
-                                     asm, ".2s", ".2d",
-                          [(set (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
-  def v4f32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128,
-                                    asm#"2", ".4s", ".2d", []>;
-
-  def : Pat<(concat_vectors (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn))),
-            (!cast<Instruction>(NAME # "v4f32")
-                (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD three register different-size vector instructions.
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode,
-                      RegisterOperand outtype, RegisterOperand intype1,
-                      RegisterOperand intype2, string asm,
-                      string outkind, string inkind1, string inkind2,
-                      list<dag> pattern>
-  : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm,
-      "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
-      "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{31}    = 0;
-  let Inst{30}    = size{0};
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size{2-1};
-  let Inst{21}    = 1;
-  let Inst{20-16} = Rm;
-  let Inst{15-12} = opcode;
-  let Inst{11-10} = 0b00;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode,
-                      RegisterOperand outtype, RegisterOperand intype1,
-                      RegisterOperand intype2, string asm,
-                      string outkind, string inkind1, string inkind2,
-                      list<dag> pattern>
-  : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm,
-      "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
-      "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{31}    = 0;
-  let Inst{30}    = size{0};
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size{2-1};
-  let Inst{21}    = 1;
-  let Inst{20-16} = Rm;
-  let Inst{15-12} = opcode;
-  let Inst{11-10} = 0b00;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-// FIXME: TableGen doesn't know how to deal with expanded types that also
-//        change the element count (in this case, placing the results in
-//        the high elements of the result register rather than the low
-//        elements). Until that's fixed, we can't code-gen those.
-multiclass SIMDNarrowThreeVectorBHS<bit U, bits<4> opc, string asm,
-                                    Intrinsic IntOp> {
-  def v8i16_v8i8   : BaseSIMDDifferentThreeVector<U, 0b000, opc,
-                                                  V64, V128, V128,
-                                                  asm, ".8b", ".8h", ".8h",
-     [(set (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
-  def v8i16_v16i8  : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".16b", ".8h", ".8h",
-     []>;
-  def v4i32_v4i16  : BaseSIMDDifferentThreeVector<U, 0b010, opc,
-                                                  V64, V128, V128,
-                                                  asm, ".4h", ".4s", ".4s",
-     [(set (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
-  def v4i32_v8i16  : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".8h", ".4s", ".4s",
-     []>;
-  def v2i64_v2i32  : BaseSIMDDifferentThreeVector<U, 0b100, opc,
-                                                  V64, V128, V128,
-                                                  asm, ".2s", ".2d", ".2d",
-     [(set (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
-  def v2i64_v4i32  : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".4s", ".2d", ".2d",
-     []>;
-
-
-  // Patterns for the '2' variants involve INSERT_SUBREG, which you can't put in
-  // a version attached to an instruction.
-  def : Pat<(concat_vectors (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn),
-                                                   (v8i16 V128:$Rm))),
-            (!cast<Instruction>(NAME # "v8i16_v16i8")
-                (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
-                V128:$Rn, V128:$Rm)>;
-  def : Pat<(concat_vectors (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn),
-                                                    (v4i32 V128:$Rm))),
-            (!cast<Instruction>(NAME # "v4i32_v8i16")
-                (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
-                V128:$Rn, V128:$Rm)>;
-  def : Pat<(concat_vectors (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn),
-                                                    (v2i64 V128:$Rm))),
-            (!cast<Instruction>(NAME # "v2i64_v4i32")
-                (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
-                V128:$Rn, V128:$Rm)>;
-}
-
-multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
-                                      Intrinsic IntOp> {
-  def v8i8   : BaseSIMDDifferentThreeVector<U, 0b000, opc,
-                                            V128, V64, V64,
-                                            asm, ".8h", ".8b", ".8b",
-      [(set (v8i16 V128:$Rd), (IntOp (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8  : BaseSIMDDifferentThreeVector<U, 0b001, opc,
-                                            V128, V128, V128,
-                                            asm#"2", ".8h", ".16b", ".16b", []>;
-  def v1i64  : BaseSIMDDifferentThreeVector<U, 0b110, opc,
-                                            V128, V64, V64,
-                                            asm, ".1q", ".1d", ".1d", []>;
-  def v2i64  : BaseSIMDDifferentThreeVector<U, 0b111, opc,
-                                            V128, V128, V128,
-                                            asm#"2", ".1q", ".2d", ".2d", []>;
-
-  def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)),
-                          (v8i8 (extract_high_v16i8 V128:$Rm)))),
-      (!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>;
-}
-
-multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
-                                 SDPatternOperator OpNode> {
-  def v4i16_v4i32  : BaseSIMDDifferentThreeVector<U, 0b010, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".4s", ".4h", ".4h",
-      [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16_v4i32  : BaseSIMDDifferentThreeVector<U, 0b011, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".4s", ".8h", ".8h",
-      [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
-                                      (extract_high_v8i16 V128:$Rm)))]>;
-  def v2i32_v2i64  : BaseSIMDDifferentThreeVector<U, 0b100, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".2d", ".2s", ".2s",
-      [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32_v2i64  : BaseSIMDDifferentThreeVector<U, 0b101, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".2d", ".4s", ".4s",
-      [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
-                                      (extract_high_v4i32 V128:$Rm)))]>;
-}
-
-multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
-                                  SDPatternOperator OpNode = null_frag> {
-  def v8i8_v8i16   : BaseSIMDDifferentThreeVector<U, 0b000, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".8h", ".8b", ".8b",
-      [(set (v8i16 V128:$Rd),
-            (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))))]>;
-  def v16i8_v8i16  : BaseSIMDDifferentThreeVector<U, 0b001, opc,
-                                                 V128, V128, V128,
-                                                 asm#"2", ".8h", ".16b", ".16b",
-      [(set (v8i16 V128:$Rd),
-            (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
-                                (extract_high_v16i8 V128:$Rm)))))]>;
-  def v4i16_v4i32  : BaseSIMDDifferentThreeVector<U, 0b010, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".4s", ".4h", ".4h",
-      [(set (v4i32 V128:$Rd),
-            (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))))]>;
-  def v8i16_v4i32  : BaseSIMDDifferentThreeVector<U, 0b011, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".4s", ".8h", ".8h",
-      [(set (v4i32 V128:$Rd),
-            (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
-                                  (extract_high_v8i16 V128:$Rm)))))]>;
-  def v2i32_v2i64  : BaseSIMDDifferentThreeVector<U, 0b100, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".2d", ".2s", ".2s",
-      [(set (v2i64 V128:$Rd),
-            (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))))]>;
-  def v4i32_v2i64  : BaseSIMDDifferentThreeVector<U, 0b101, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".2d", ".4s", ".4s",
-      [(set (v2i64 V128:$Rd),
-            (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
-                                 (extract_high_v4i32 V128:$Rm)))))]>;
-}
-
-multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
-                                          string asm,
-                                          SDPatternOperator OpNode> {
-  def v8i8_v8i16   : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".8h", ".8b", ".8b",
-    [(set (v8i16 V128:$dst),
-          (add (v8i16 V128:$Rd),
-               (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))))]>;
-  def v16i8_v8i16  : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
-                                                 V128, V128, V128,
-                                                 asm#"2", ".8h", ".16b", ".16b",
-    [(set (v8i16 V128:$dst),
-          (add (v8i16 V128:$Rd),
-               (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
-                                   (extract_high_v16i8 V128:$Rm))))))]>;
-  def v4i16_v4i32  : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".4s", ".4h", ".4h",
-    [(set (v4i32 V128:$dst),
-          (add (v4i32 V128:$Rd),
-               (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))))]>;
-  def v8i16_v4i32  : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".4s", ".8h", ".8h",
-    [(set (v4i32 V128:$dst),
-          (add (v4i32 V128:$Rd),
-               (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
-                                    (extract_high_v8i16 V128:$Rm))))))]>;
-  def v2i32_v2i64  : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".2d", ".2s", ".2s",
-    [(set (v2i64 V128:$dst),
-          (add (v2i64 V128:$Rd),
-               (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))))]>;
-  def v4i32_v2i64  : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".2d", ".4s", ".4s",
-    [(set (v2i64 V128:$dst),
-          (add (v2i64 V128:$Rd),
-               (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
-                                    (extract_high_v4i32 V128:$Rm))))))]>;
-}
-
-multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
-                                  SDPatternOperator OpNode = null_frag> {
-  def v8i8_v8i16   : BaseSIMDDifferentThreeVector<U, 0b000, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".8h", ".8b", ".8b",
-      [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8_v8i16  : BaseSIMDDifferentThreeVector<U, 0b001, opc,
-                                                 V128, V128, V128,
-                                                 asm#"2", ".8h", ".16b", ".16b",
-      [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 V128:$Rn),
-                                      (extract_high_v16i8 V128:$Rm)))]>;
-  def v4i16_v4i32  : BaseSIMDDifferentThreeVector<U, 0b010, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".4s", ".4h", ".4h",
-      [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16_v4i32  : BaseSIMDDifferentThreeVector<U, 0b011, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".4s", ".8h", ".8h",
-      [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
-                                      (extract_high_v8i16 V128:$Rm)))]>;
-  def v2i32_v2i64  : BaseSIMDDifferentThreeVector<U, 0b100, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".2d", ".2s", ".2s",
-      [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32_v2i64  : BaseSIMDDifferentThreeVector<U, 0b101, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".2d", ".4s", ".4s",
-      [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
-                                      (extract_high_v4i32 V128:$Rm)))]>;
-}
-
-multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
-                                      string asm,
-                                      SDPatternOperator OpNode> {
-  def v8i8_v8i16   : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".8h", ".8b", ".8b",
-    [(set (v8i16 V128:$dst),
-          (OpNode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8_v8i16  : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
-                                                 V128, V128, V128,
-                                                 asm#"2", ".8h", ".16b", ".16b",
-    [(set (v8i16 V128:$dst),
-          (OpNode (v8i16 V128:$Rd),
-                  (extract_high_v16i8 V128:$Rn),
-                  (extract_high_v16i8 V128:$Rm)))]>;
-  def v4i16_v4i32  : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".4s", ".4h", ".4h",
-    [(set (v4i32 V128:$dst),
-          (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16_v4i32  : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".4s", ".8h", ".8h",
-    [(set (v4i32 V128:$dst),
-          (OpNode (v4i32 V128:$Rd),
-                  (extract_high_v8i16 V128:$Rn),
-                  (extract_high_v8i16 V128:$Rm)))]>;
-  def v2i32_v2i64  : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".2d", ".2s", ".2s",
-    [(set (v2i64 V128:$dst),
-          (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32_v2i64  : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".2d", ".4s", ".4s",
-    [(set (v2i64 V128:$dst),
-          (OpNode (v2i64 V128:$Rd),
-                  (extract_high_v4i32 V128:$Rn),
-                  (extract_high_v4i32 V128:$Rm)))]>;
-}
-
-multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
-                                           SDPatternOperator Accum> {
-  def v4i16_v4i32  : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".4s", ".4h", ".4h",
-    [(set (v4i32 V128:$dst),
-          (Accum (v4i32 V128:$Rd),
-                 (v4i32 (int_arm64_neon_sqdmull (v4i16 V64:$Rn),
-                                                (v4i16 V64:$Rm)))))]>;
-  def v8i16_v4i32  : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".4s", ".8h", ".8h",
-    [(set (v4i32 V128:$dst),
-          (Accum (v4i32 V128:$Rd),
-                 (v4i32 (int_arm64_neon_sqdmull (extract_high_v8i16 V128:$Rn),
-                                            (extract_high_v8i16 V128:$Rm)))))]>;
-  def v2i32_v2i64  : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
-                                                  V128, V64, V64,
-                                                  asm, ".2d", ".2s", ".2s",
-    [(set (v2i64 V128:$dst),
-          (Accum (v2i64 V128:$Rd),
-                 (v2i64 (int_arm64_neon_sqdmull (v2i32 V64:$Rn),
-                                                (v2i32 V64:$Rm)))))]>;
-  def v4i32_v2i64  : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".2d", ".4s", ".4s",
-    [(set (v2i64 V128:$dst),
-          (Accum (v2i64 V128:$Rd),
-                 (v2i64 (int_arm64_neon_sqdmull (extract_high_v4i32 V128:$Rn),
-                                            (extract_high_v4i32 V128:$Rm)))))]>;
-}
-
-multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
-                                  SDPatternOperator OpNode> {
-  def v8i8_v8i16   : BaseSIMDDifferentThreeVector<U, 0b000, opc,
-                                                  V128, V128, V64,
-                                                  asm, ".8h", ".8h", ".8b",
-       [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8_v8i16  : BaseSIMDDifferentThreeVector<U, 0b001, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".8h", ".8h", ".16b",
-       [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
-                                       (extract_high_v16i8 V128:$Rm)))]>;
-  def v4i16_v4i32  : BaseSIMDDifferentThreeVector<U, 0b010, opc,
-                                                  V128, V128, V64,
-                                                  asm, ".4s", ".4s", ".4h",
-       [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16_v4i32  : BaseSIMDDifferentThreeVector<U, 0b011, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".4s", ".4s", ".8h",
-       [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
-                                       (extract_high_v8i16 V128:$Rm)))]>;
-  def v2i32_v2i64  : BaseSIMDDifferentThreeVector<U, 0b100, opc,
-                                                  V128, V128, V64,
-                                                  asm, ".2d", ".2d", ".2s",
-       [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32_v2i64  : BaseSIMDDifferentThreeVector<U, 0b101, opc,
-                                                  V128, V128, V128,
-                                                  asm#"2", ".2d", ".2d", ".4s",
-       [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
-                                       (extract_high_v4i32 V128:$Rm)))]>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD bitwise extract from vector
-//----------------------------------------------------------------------------
-
-class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty,
-                             string asm, string kind>
-  : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, i32imm:$imm), asm,
-      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" #
-      "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "",
-      [(set (vty regtype:$Rd),
-            (ARM64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  bits<4> imm;
-  let Inst{31}    = 0;
-  let Inst{30}    = size;
-  let Inst{29-21} = 0b101110000;
-  let Inst{20-16} = Rm;
-  let Inst{15}    = 0;
-  let Inst{14-11} = imm;
-  let Inst{10}    = 0;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-
-multiclass SIMDBitwiseExtract<string asm> {
-  def v8i8  : BaseSIMDBitwiseExtract<0, V64, v8i8, asm, ".8b">;
-  def v16i8 : BaseSIMDBitwiseExtract<1, V128, v16i8, asm, ".16b">;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD zip vector
-//----------------------------------------------------------------------------
-
-class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype,
-                        string asm, string kind, SDNode OpNode, ValueType valty>
-  : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
-      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
-      "|" # kind # "\t$Rd, $Rn, $Rm}", "",
-      [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{31}    = 0;
-  let Inst{30}    = size{0};
-  let Inst{29-24} = 0b001110;
-  let Inst{23-22} = size{2-1};
-  let Inst{21}    = 0;
-  let Inst{20-16} = Rm;
-  let Inst{15}    = 0;
-  let Inst{14-12} = opc;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SIMDZipVector<bits<3>opc, string asm,
-                         SDNode OpNode> {
-  def v8i8   : BaseSIMDZipVector<0b000, opc, V64,
-      asm, ".8b", OpNode, v8i8>;
-  def v16i8  : BaseSIMDZipVector<0b001, opc, V128,
-      asm, ".16b", OpNode, v16i8>;
-  def v4i16  : BaseSIMDZipVector<0b010, opc, V64,
-      asm, ".4h", OpNode, v4i16>;
-  def v8i16  : BaseSIMDZipVector<0b011, opc, V128,
-      asm, ".8h", OpNode, v8i16>;
-  def v2i32  : BaseSIMDZipVector<0b100, opc, V64,
-      asm, ".2s", OpNode, v2i32>;
-  def v4i32  : BaseSIMDZipVector<0b101, opc, V128,
-      asm, ".4s", OpNode, v4i32>;
-  def v2i64  : BaseSIMDZipVector<0b111, opc, V128,
-      asm, ".2d", OpNode, v2i64>;
-
-  def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)),
-        (!cast<Instruction>(NAME#"v2i32") V64:$Rn, V64:$Rm)>;
-  def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)),
-        (!cast<Instruction>(NAME#"v4i32") V128:$Rn, V128:$Rm)>;
-  def : Pat<(v2f64 (OpNode V128:$Rn, V128:$Rm)),
-        (!cast<Instruction>(NAME#"v2i64") V128:$Rn, V128:$Rm)>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD three register scalar instructions
-//----------------------------------------------------------------------------
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
-                        RegisterClass regtype, string asm,
-                        list<dag> pattern>
-  : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
-      "\t$Rd, $Rn, $Rm", "", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{31-30} = 0b01;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b11110;
-  let Inst{23-22} = size;
-  let Inst{21}    = 1;
-  let Inst{20-16} = Rm;
-  let Inst{15-11} = opcode;
-  let Inst{10}    = 1;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
-                            SDPatternOperator OpNode> {
-  def v1i64  : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
-    [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
-}
-
-multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
-                               SDPatternOperator OpNode> {
-  def v1i64  : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
-    [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
-  def v1i32  : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm, []>;
-  def v1i16  : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
-  def v1i8   : BaseSIMDThreeScalar<U, 0b00, opc, FPR8 , asm, []>;
-
-  def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
-            (!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
-  def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
-            (!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;
-}
-
-multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
-                             SDPatternOperator OpNode> {
-  def v1i32  : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm,
-                             [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
-  def v1i16  : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
-}
-
-multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
-                             SDPatternOperator OpNode = null_frag> {
-  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-    def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
-      [(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
-    def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
-      [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
-  }
-
-  def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
-            (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
-}
-
-multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<5> opc, string asm,
-                                SDPatternOperator OpNode = null_frag> {
-  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-    def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
-      [(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
-    def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
-      [(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;
-  }
-
-  def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
-            (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
-}
-
-class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,
-              dag oops, dag iops, string asm, string cstr, list<dag> pat>
-  : I<oops, iops, asm,
-      "\t$Rd, $Rn, $Rm", cstr, pat>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{31-30} = 0b01;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b11110;
-  let Inst{23-22} = size;
-  let Inst{21}    = 1;
-  let Inst{20-16} = Rm;
-  let Inst{15-11} = opcode;
-  let Inst{10}    = 0;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDThreeScalarMixedHS<bit U, bits<5> opc, string asm,
-                                  SDPatternOperator OpNode = null_frag> {
-  def i16  : BaseSIMDThreeScalarMixed<U, 0b01, opc,
-                                      (outs FPR32:$Rd),
-                                      (ins FPR16:$Rn, FPR16:$Rm), asm, "", []>;
-  def i32  : BaseSIMDThreeScalarMixed<U, 0b10, opc,
-                                      (outs FPR64:$Rd),
-                                      (ins FPR32:$Rn, FPR32:$Rm), asm, "",
-            [(set (i64 FPR64:$Rd), (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDThreeScalarMixedTiedHS<bit U, bits<5> opc, string asm,
-                                  SDPatternOperator OpNode = null_frag> {
-  def i16  : BaseSIMDThreeScalarMixed<U, 0b01, opc,
-                                      (outs FPR32:$dst),
-                                      (ins FPR32:$Rd, FPR16:$Rn, FPR16:$Rm),
-                                      asm, "$Rd = $dst", []>;
-  def i32  : BaseSIMDThreeScalarMixed<U, 0b10, opc,
-                                      (outs FPR64:$dst),
-                                      (ins FPR64:$Rd, FPR32:$Rn, FPR32:$Rm),
-                                      asm, "$Rd = $dst",
-            [(set (i64 FPR64:$dst),
-                  (OpNode (i64 FPR64:$Rd), (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD two register scalar instructions
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
-                        RegisterClass regtype, RegisterClass regtype2,
-                        string asm, list<dag> pat>
-  : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
-      "\t$Rd, $Rn", "", pat>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31-30} = 0b01;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b11110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
-                        RegisterClass regtype, RegisterClass regtype2,
-                        string asm, list<dag> pat>
-  : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm,
-      "\t$Rd, $Rn", "$Rd = $dst", pat>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31-30} = 0b01;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b11110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
-                        RegisterClass regtype, string asm>
-  : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
-      "\t$Rd, $Rn, #0", "", []>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31-30} = 0b01;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b11110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
-  : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
-     [(set (f32 FPR32:$Rd), (int_arm64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31-17} = 0b011111100110000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
-                             SDPatternOperator OpNode> {
-  def v1i64rz  : BaseSIMDCmpTwoScalar<U, 0b11, opc, FPR64, asm>;
-
-  def : Pat<(v1i64 (OpNode FPR64:$Rn)),
-            (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
-}
-
-multiclass SIMDCmpTwoScalarSD<bit U, bit S, bits<5> opc, string asm,
-                              SDPatternOperator OpNode> {
-  def v1i64rz  : BaseSIMDCmpTwoScalar<U, {S,1}, opc, FPR64, asm>;
-  def v1i32rz  : BaseSIMDCmpTwoScalar<U, {S,0}, opc, FPR32, asm>;
-
-  def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))),
-            (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
-}
-
-multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
-                          SDPatternOperator OpNode = null_frag> {
-  def v1i64       : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
-    [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn)))]>;
-
-  def : Pat<(i64 (OpNode (i64 FPR64:$Rn))),
-            (!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
-}
-
-multiclass SIMDTwoScalarSD<bit U, bit S, bits<5> opc, string asm> {
-  def v1i64       : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,[]>;
-  def v1i32       : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,[]>;
-}
-
-multiclass SIMDTwoScalarCVTSD<bit U, bit S, bits<5> opc, string asm,
-                              SDPatternOperator OpNode> {
-  def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,
-                                [(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
-  def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,
-                                [(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
-}
-
-multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
-                             SDPatternOperator OpNode = null_frag> {
-  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-    def v1i64  : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
-           [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
-    def v1i32  : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR32, asm,
-           [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
-    def v1i16  : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR16, asm, []>;
-    def v1i8   : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
-  }
-
-  def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))),
-            (!cast<Instruction>(NAME # v1i64) FPR64:$Rn)>;
-}
-
-multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
-                                 Intrinsic OpNode> {
-  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-    def v1i64  : BaseSIMDTwoScalarTied<U, 0b11, opc, FPR64, FPR64, asm,
-        [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn)))]>;
-    def v1i32  : BaseSIMDTwoScalarTied<U, 0b10, opc, FPR32, FPR32, asm,
-        [(set (i32 FPR32:$dst), (OpNode (i32 FPR32:$Rd), (i32 FPR32:$Rn)))]>;
-    def v1i16  : BaseSIMDTwoScalarTied<U, 0b01, opc, FPR16, FPR16, asm, []>;
-    def v1i8   : BaseSIMDTwoScalarTied<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
-  }
-
-  def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))),
-            (!cast<Instruction>(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>;
-}
-
-
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDTwoScalarMixedBHS<bit U, bits<5> opc, string asm,
-                                 SDPatternOperator OpNode = null_frag> {
-  def v1i32  : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR64, asm,
-        [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
-  def v1i16  : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR32, asm, []>;
-  def v1i8   : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR16, asm, []>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar pairwise instructions
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode,
-                        RegisterOperand regtype, RegisterOperand vectype,
-                        string asm, string kind>
-  : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
-      "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31-30} = 0b01;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b11110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b11000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> {
-  def v2i64p : BaseSIMDPairwiseScalar<U, 0b11, opc, FPR64Op, V128,
-                                      asm, ".2d">;
-}
-
-multiclass SIMDPairwiseScalarSD<bit U, bit S, bits<5> opc, string asm> {
-  def v2i32p : BaseSIMDPairwiseScalar<U, {S,0}, opc, FPR32Op, V64,
-                                      asm, ".2s">;
-  def v2i64p : BaseSIMDPairwiseScalar<U, {S,1}, opc, FPR64Op, V128,
-                                      asm, ".2d">;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD across lanes instructions
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode,
-                          RegisterClass regtype, RegisterOperand vectype,
-                          string asm, string kind, list<dag> pattern>
-  : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
-      "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21-17} = 0b11000;
-  let Inst{16-12} = opcode;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SIMDAcrossLanesBHS<bit U, bits<5> opcode,
-                              string asm> {
-  def v8i8v  : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR8,  V64,
-                                   asm, ".8b", []>;
-  def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR8,  V128,
-                                   asm, ".16b", []>;
-  def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR16, V64,
-                                   asm, ".4h", []>;
-  def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR16, V128,
-                                   asm, ".8h", []>;
-  def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR32, V128,
-                                   asm, ".4s", []>;
-}
-
-multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> {
-  def v8i8v  : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR16, V64,
-                                   asm, ".8b", []>;
-  def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR16, V128,
-                                   asm, ".16b", []>;
-  def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR32, V64,
-                                   asm, ".4h", []>;
-  def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR32, V128,
-                                   asm, ".8h", []>;
-  def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR64, V128,
-                                   asm, ".4s", []>;
-}
-
-multiclass SIMDAcrossLanesS<bits<5> opcode, bit sz1, string asm,
-                            Intrinsic intOp> {
-  def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128,
-                                   asm, ".4s",
-        [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD INS/DUP instructions
-//----------------------------------------------------------------------------
-
-// FIXME: There has got to be a better way to factor these. ugh.
-
-class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm,
-                     string operands, string constraints, list<dag> pattern>
-  : I<outs, ins, asm, operands, constraints, pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31} = 0;
-  let Inst{30} = Q;
-  let Inst{29} = op;
-  let Inst{28-21} = 0b01110000;
-  let Inst{15} = 0;
-  let Inst{10} = 1;
-  let Inst{9-5} = Rn;
-  let Inst{4-0} = Rd;
-}
-
-class SIMDDupFromMain<bit Q, bits<5> imm5, string size, ValueType vectype,
-                      RegisterOperand vecreg, RegisterClass regtype>
-  : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins regtype:$Rn), "dup",
-                   "{\t$Rd" # size # ", $Rn" #
-                   "|" # size # "\t$Rd, $Rn}", "",
-                   [(set (vectype vecreg:$Rd), (ARM64dup regtype:$Rn))]> {
-  let Inst{20-16} = imm5;
-  let Inst{14-11} = 0b0001;
-}
-
-class SIMDDupFromElement<bit Q, string dstkind, string srckind,
-                         ValueType vectype, ValueType insreg,
-                         RegisterOperand vecreg, Operand idxtype,
-                         ValueType elttype, SDNode OpNode>
-  : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins V128:$Rn, idxtype:$idx), "dup",
-                   "{\t$Rd" # dstkind # ", $Rn" # srckind # "$idx" #
-                   "|" # dstkind # "\t$Rd, $Rn$idx}", "",
-                 [(set (vectype vecreg:$Rd),
-                       (OpNode (insreg V128:$Rn), idxtype:$idx))]> {
-  let Inst{14-11} = 0b0000;
-}
-
-class SIMDDup64FromElement
-  : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128,
-                       VectorIndexD, i64, ARM64duplane64> {
-  bits<1> idx;
-  let Inst{20} = idx;
-  let Inst{19-16} = 0b1000;
-}
-
-class SIMDDup32FromElement<bit Q, string size, ValueType vectype,
-                           RegisterOperand vecreg>
-  : SIMDDupFromElement<Q, size, ".s", vectype, v4i32, vecreg,
-                       VectorIndexS, i64, ARM64duplane32> {
-  bits<2> idx;
-  let Inst{20-19} = idx;
-  let Inst{18-16} = 0b100;
-}
-
-class SIMDDup16FromElement<bit Q, string size, ValueType vectype,
-                           RegisterOperand vecreg>
-  : SIMDDupFromElement<Q, size, ".h", vectype, v8i16, vecreg,
-                       VectorIndexH, i64, ARM64duplane16> {
-  bits<3> idx;
-  let Inst{20-18} = idx;
-  let Inst{17-16} = 0b10;
-}
-
-class SIMDDup8FromElement<bit Q, string size, ValueType vectype,
-                          RegisterOperand vecreg>
-  : SIMDDupFromElement<Q, size, ".b", vectype, v16i8, vecreg,
-                       VectorIndexB, i64, ARM64duplane8> {
-  bits<4> idx;
-  let Inst{20-17} = idx;
-  let Inst{16} = 1;
-}
-
-class BaseSIMDMov<bit Q, string size, bits<4> imm4, RegisterClass regtype,
-                  Operand idxtype, string asm, list<dag> pattern>
-  : BaseSIMDInsDup<Q, 0, (outs regtype:$Rd), (ins V128:$Rn, idxtype:$idx), asm,
-                   "{\t$Rd, $Rn" # size # "$idx" #
-                   "|" # size # "\t$Rd, $Rn$idx}", "", pattern> {
-  let Inst{14-11} = imm4;
-}
-
-class SIMDSMov<bit Q, string size, RegisterClass regtype,
-               Operand idxtype>
-  : BaseSIMDMov<Q, size, 0b0101, regtype, idxtype, "smov", []>;
-class SIMDUMov<bit Q, string size, ValueType vectype, RegisterClass regtype,
-               Operand idxtype>
-  : BaseSIMDMov<Q, size, 0b0111, regtype, idxtype, "umov",
-      [(set regtype:$Rd, (vector_extract (vectype V128:$Rn), idxtype:$idx))]>;
-
-class SIMDMovAlias<string asm, string size, Instruction inst,
-                   RegisterClass regtype, Operand idxtype>
-    : InstAlias<asm#"{\t$dst, $src"#size#"$idx" #
-                    "|" # size # "\t$dst, $src$idx}",
-                (inst regtype:$dst, V128:$src, idxtype:$idx)>;
-
-multiclass SMov {
-  def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> {
-    bits<4> idx;
-    let Inst{20-17} = idx;
-    let Inst{16} = 1;
-  }
-  def vi8to64 : SIMDSMov<1, ".b", GPR64, VectorIndexB> {
-    bits<4> idx;
-    let Inst{20-17} = idx;
-    let Inst{16} = 1;
-  }
-  def vi16to32 : SIMDSMov<0, ".h", GPR32, VectorIndexH> {
-    bits<3> idx;
-    let Inst{20-18} = idx;
-    let Inst{17-16} = 0b10;
-  }
-  def vi16to64 : SIMDSMov<1, ".h", GPR64, VectorIndexH> {
-    bits<3> idx;
-    let Inst{20-18} = idx;
-    let Inst{17-16} = 0b10;
-  }
-  def vi32to64 : SIMDSMov<1, ".s", GPR64, VectorIndexS> {
-    bits<2> idx;
-    let Inst{20-19} = idx;
-    let Inst{18-16} = 0b100;
-  }
-}
-
-multiclass UMov {
-  def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> {
-    bits<4> idx;
-    let Inst{20-17} = idx;
-    let Inst{16} = 1;
-  }
-  def vi16 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndexH> {
-    bits<3> idx;
-    let Inst{20-18} = idx;
-    let Inst{17-16} = 0b10;
-  }
-  def vi32 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndexS> {
-    bits<2> idx;
-    let Inst{20-19} = idx;
-    let Inst{18-16} = 0b100;
-  }
-  def vi64 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndexD> {
-    bits<1> idx;
-    let Inst{20} = idx;
-    let Inst{19-16} = 0b1000;
-  }
-  def : SIMDMovAlias<"mov", ".s",
-                     !cast<Instruction>(NAME#"vi32"),
-                     GPR32, VectorIndexS>;
-  def : SIMDMovAlias<"mov", ".d",
-                     !cast<Instruction>(NAME#"vi64"),
-                     GPR64, VectorIndexD>;
-}
-
-class SIMDInsFromMain<string size, ValueType vectype,
-                      RegisterClass regtype, Operand idxtype>
-  : BaseSIMDInsDup<1, 0, (outs V128:$dst),
-                   (ins V128:$Rd, idxtype:$idx, regtype:$Rn), "ins",
-                   "{\t$Rd" # size # "$idx, $Rn" #
-                   "|" # size # "\t$Rd$idx, $Rn}",
-                   "$Rd = $dst",
-            [(set V128:$dst,
-              (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> {
-  let Inst{14-11} = 0b0011;
-}
-
-class SIMDInsFromElement<string size, ValueType vectype,
-                         ValueType elttype, Operand idxtype>
-  : BaseSIMDInsDup<1, 1, (outs V128:$dst),
-                   (ins V128:$Rd, idxtype:$idx, V128:$Rn, idxtype:$idx2), "ins",
-                   "{\t$Rd" # size # "$idx, $Rn" # size # "$idx2" #
-                   "|" # size # "\t$Rd$idx, $Rn$idx2}",
-                   "$Rd = $dst",
-         [(set V128:$dst,
-               (vector_insert
-                 (vectype V128:$Rd),
-                 (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)),
-                 idxtype:$idx))]>;
-
-class SIMDInsMainMovAlias<string size, Instruction inst,
-                          RegisterClass regtype, Operand idxtype>
-    : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" #
-                        "|" # size #"\t$dst$idx, $src}",
-                (inst V128:$dst, idxtype:$idx, regtype:$src)>;
-class SIMDInsElementMovAlias<string size, Instruction inst,
-                             Operand idxtype>
-    : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" #
-                      # "|" # size #" $dst$idx, $src$idx2}",
-                (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>;
-
-
-multiclass SIMDIns {
-  def vi8gpr : SIMDInsFromMain<".b", v16i8, GPR32, VectorIndexB> {
-    bits<4> idx;
-    let Inst{20-17} = idx;
-    let Inst{16} = 1;
-  }
-  def vi16gpr : SIMDInsFromMain<".h", v8i16, GPR32, VectorIndexH> {
-    bits<3> idx;
-    let Inst{20-18} = idx;
-    let Inst{17-16} = 0b10;
-  }
-  def vi32gpr : SIMDInsFromMain<".s", v4i32, GPR32, VectorIndexS> {
-    bits<2> idx;
-    let Inst{20-19} = idx;
-    let Inst{18-16} = 0b100;
-  }
-  def vi64gpr : SIMDInsFromMain<".d", v2i64, GPR64, VectorIndexD> {
-    bits<1> idx;
-    let Inst{20} = idx;
-    let Inst{19-16} = 0b1000;
-  }
-
-  def vi8lane : SIMDInsFromElement<".b", v16i8, i32, VectorIndexB> {
-    bits<4> idx;
-    bits<4> idx2;
-    let Inst{20-17} = idx;
-    let Inst{16} = 1;
-    let Inst{14-11} = idx2;
-  }
-  def vi16lane : SIMDInsFromElement<".h", v8i16, i32, VectorIndexH> {
-    bits<3> idx;
-    bits<3> idx2;
-    let Inst{20-18} = idx;
-    let Inst{17-16} = 0b10;
-    let Inst{14-12} = idx2;
-    let Inst{11} = 0;
-  }
-  def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> {
-    bits<2> idx;
-    bits<2> idx2;
-    let Inst{20-19} = idx;
-    let Inst{18-16} = 0b100;
-    let Inst{14-13} = idx2;
-    let Inst{12-11} = 0;
-  }
-  def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> {
-    bits<1> idx;
-    bits<1> idx2;
-    let Inst{20} = idx;
-    let Inst{19-16} = 0b1000;
-    let Inst{14} = idx2;
-    let Inst{13-11} = 0;
-  }
-
-  // For all forms of the INS instruction, the "mov" mnemonic is the
-  // preferred alias. Why they didn't just call the instruction "mov" in
-  // the first place is a very good question indeed...
-  def : SIMDInsMainMovAlias<".b", !cast<Instruction>(NAME#"vi8gpr"),
-                         GPR32, VectorIndexB>;
-  def : SIMDInsMainMovAlias<".h", !cast<Instruction>(NAME#"vi16gpr"),
-                         GPR32, VectorIndexH>;
-  def : SIMDInsMainMovAlias<".s", !cast<Instruction>(NAME#"vi32gpr"),
-                         GPR32, VectorIndexS>;
-  def : SIMDInsMainMovAlias<".d", !cast<Instruction>(NAME#"vi64gpr"),
-                         GPR64, VectorIndexD>;
-
-  def : SIMDInsElementMovAlias<".b", !cast<Instruction>(NAME#"vi8lane"),
-                         VectorIndexB>;
-  def : SIMDInsElementMovAlias<".h", !cast<Instruction>(NAME#"vi16lane"),
-                         VectorIndexH>;
-  def : SIMDInsElementMovAlias<".s", !cast<Instruction>(NAME#"vi32lane"),
-                         VectorIndexS>;
-  def : SIMDInsElementMovAlias<".d", !cast<Instruction>(NAME#"vi64lane"),
-                         VectorIndexD>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD TBL/TBX
-//----------------------------------------------------------------------------
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype,
-                          RegisterOperand listtype, string asm, string kind>
-  : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm,
-       "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>,
-    Sched<[WriteV]> {
-  bits<5> Vd;
-  bits<5> Vn;
-  bits<5> Vm;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29-21} = 0b001110000;
-  let Inst{20-16} = Vm;
-  let Inst{15}    = 0;
-  let Inst{14-13} = len;
-  let Inst{12}    = op;
-  let Inst{11-10} = 0b00;
-  let Inst{9-5}   = Vn;
-  let Inst{4-0}   = Vd;
-}
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectype,
-                          RegisterOperand listtype, string asm, string kind>
-  : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm,
-       "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>,
-    Sched<[WriteV]> {
-  bits<5> Vd;
-  bits<5> Vn;
-  bits<5> Vm;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29-21} = 0b001110000;
-  let Inst{20-16} = Vm;
-  let Inst{15}    = 0;
-  let Inst{14-13} = len;
-  let Inst{12}    = op;
-  let Inst{11-10} = 0b00;
-  let Inst{9-5}   = Vn;
-  let Inst{4-0}   = Vd;
-}
-
-class SIMDTableLookupAlias<string asm, Instruction inst,
-                          RegisterOperand vectype, RegisterOperand listtype>
-    : InstAlias<!strconcat(asm, "\t$dst, $lst, $index"),
-                (inst vectype:$dst, listtype:$lst, vectype:$index), 0>;
-
-multiclass SIMDTableLookup<bit op, string asm> {
-  def v8i8One   : BaseSIMDTableLookup<0, 0b00, op, V64, VecListOne16b,
-                                      asm, ".8b">;
-  def v8i8Two   : BaseSIMDTableLookup<0, 0b01, op, V64, VecListTwo16b,
-                                      asm, ".8b">;
-  def v8i8Three : BaseSIMDTableLookup<0, 0b10, op, V64, VecListThree16b,
-                                      asm, ".8b">;
-  def v8i8Four  : BaseSIMDTableLookup<0, 0b11, op, V64, VecListFour16b,
-                                      asm, ".8b">;
-  def v16i8One  : BaseSIMDTableLookup<1, 0b00, op, V128, VecListOne16b,
-                                      asm, ".16b">;
-  def v16i8Two  : BaseSIMDTableLookup<1, 0b01, op, V128, VecListTwo16b,
-                                      asm, ".16b">;
-  def v16i8Three: BaseSIMDTableLookup<1, 0b10, op, V128, VecListThree16b,
-                                      asm, ".16b">;
-  def v16i8Four : BaseSIMDTableLookup<1, 0b11, op, V128, VecListFour16b,
-                                      asm, ".16b">;
-
-  def : SIMDTableLookupAlias<asm # ".8b",
-                         !cast<Instruction>(NAME#"v8i8One"),
-                         V64, VecListOne128>;
-  def : SIMDTableLookupAlias<asm # ".8b",
-                         !cast<Instruction>(NAME#"v8i8Two"),
-                         V64, VecListTwo128>;
-  def : SIMDTableLookupAlias<asm # ".8b",
-                         !cast<Instruction>(NAME#"v8i8Three"),
-                         V64, VecListThree128>;
-  def : SIMDTableLookupAlias<asm # ".8b",
-                         !cast<Instruction>(NAME#"v8i8Four"),
-                         V64, VecListFour128>;
-  def : SIMDTableLookupAlias<asm # ".16b",
-                         !cast<Instruction>(NAME#"v16i8One"),
-                         V128, VecListOne128>;
-  def : SIMDTableLookupAlias<asm # ".16b",
-                         !cast<Instruction>(NAME#"v16i8Two"),
-                         V128, VecListTwo128>;
-  def : SIMDTableLookupAlias<asm # ".16b",
-                         !cast<Instruction>(NAME#"v16i8Three"),
-                         V128, VecListThree128>;
-  def : SIMDTableLookupAlias<asm # ".16b",
-                         !cast<Instruction>(NAME#"v16i8Four"),
-                         V128, VecListFour128>;
-}
-
-multiclass SIMDTableLookupTied<bit op, string asm> {
-  def v8i8One   : BaseSIMDTableLookupTied<0, 0b00, op, V64, VecListOne16b,
-                                      asm, ".8b">;
-  def v8i8Two   : BaseSIMDTableLookupTied<0, 0b01, op, V64, VecListTwo16b,
-                                      asm, ".8b">;
-  def v8i8Three : BaseSIMDTableLookupTied<0, 0b10, op, V64, VecListThree16b,
-                                      asm, ".8b">;
-  def v8i8Four  : BaseSIMDTableLookupTied<0, 0b11, op, V64, VecListFour16b,
-                                      asm, ".8b">;
-  def v16i8One  : BaseSIMDTableLookupTied<1, 0b00, op, V128, VecListOne16b,
-                                      asm, ".16b">;
-  def v16i8Two  : BaseSIMDTableLookupTied<1, 0b01, op, V128, VecListTwo16b,
-                                      asm, ".16b">;
-  def v16i8Three: BaseSIMDTableLookupTied<1, 0b10, op, V128, VecListThree16b,
-                                      asm, ".16b">;
-  def v16i8Four : BaseSIMDTableLookupTied<1, 0b11, op, V128, VecListFour16b,
-                                      asm, ".16b">;
-
-  def : SIMDTableLookupAlias<asm # ".8b",
-                         !cast<Instruction>(NAME#"v8i8One"),
-                         V64, VecListOne128>;
-  def : SIMDTableLookupAlias<asm # ".8b",
-                         !cast<Instruction>(NAME#"v8i8Two"),
-                         V64, VecListTwo128>;
-  def : SIMDTableLookupAlias<asm # ".8b",
-                         !cast<Instruction>(NAME#"v8i8Three"),
-                         V64, VecListThree128>;
-  def : SIMDTableLookupAlias<asm # ".8b",
-                         !cast<Instruction>(NAME#"v8i8Four"),
-                         V64, VecListFour128>;
-  def : SIMDTableLookupAlias<asm # ".16b",
-                         !cast<Instruction>(NAME#"v16i8One"),
-                         V128, VecListOne128>;
-  def : SIMDTableLookupAlias<asm # ".16b",
-                         !cast<Instruction>(NAME#"v16i8Two"),
-                         V128, VecListTwo128>;
-  def : SIMDTableLookupAlias<asm # ".16b",
-                         !cast<Instruction>(NAME#"v16i8Three"),
-                         V128, VecListThree128>;
-  def : SIMDTableLookupAlias<asm # ".16b",
-                         !cast<Instruction>(NAME#"v16i8Four"),
-                         V128, VecListFour128>;
-}
-
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar CPY
-//----------------------------------------------------------------------------
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
-                        string kind, Operand idxtype>
-  : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov",
-       "{\t$dst, $src" # kind # "$idx" #
-       "|\t$dst, $src$idx}", "", []>,
-    Sched<[WriteV]> {
-  bits<5> dst;
-  bits<5> src;
-  let Inst{31-21} = 0b01011110000;
-  let Inst{15-10} = 0b000001;
-  let Inst{9-5}   = src;
-  let Inst{4-0}   = dst;
-}
-
-class SIMDScalarCPYAlias<string asm, string size, Instruction inst,
-      RegisterClass regtype, RegisterOperand vectype, Operand idxtype>
-    : InstAlias<asm # "{\t$dst, $src" # size # "$index" #
-                    # "|\t$dst, $src$index}",
-                (inst regtype:$dst, vectype:$src, idxtype:$index)>;
-
-
-multiclass SIMDScalarCPY<string asm> {
-  def i8  : BaseSIMDScalarCPY<FPR8,  V128, ".b", VectorIndexB> {
-    bits<4> idx;
-    let Inst{20-17} = idx;
-    let Inst{16} = 1;
-  }
-  def i16 : BaseSIMDScalarCPY<FPR16, V128, ".h", VectorIndexH> {
-    bits<3> idx;
-    let Inst{20-18} = idx;
-    let Inst{17-16} = 0b10;
-  }
-  def i32 : BaseSIMDScalarCPY<FPR32, V128, ".s", VectorIndexS> {
-    bits<2> idx;
-    let Inst{20-19} = idx;
-    let Inst{18-16} = 0b100;
-  }
-  def i64 : BaseSIMDScalarCPY<FPR64, V128, ".d", VectorIndexD> {
-    bits<1> idx;
-    let Inst{20} = idx;
-    let Inst{19-16} = 0b1000;
-  }
-
-  // 'DUP' mnemonic aliases.
-  def : SIMDScalarCPYAlias<"dup", ".b",
-                           !cast<Instruction>(NAME#"i8"),
-                           FPR8, V128, VectorIndexB>;
-  def : SIMDScalarCPYAlias<"dup", ".h",
-                           !cast<Instruction>(NAME#"i16"),
-                           FPR16, V128, VectorIndexH>;
-  def : SIMDScalarCPYAlias<"dup", ".s",
-                           !cast<Instruction>(NAME#"i32"),
-                           FPR32, V128, VectorIndexS>;
-  def : SIMDScalarCPYAlias<"dup", ".d",
-                           !cast<Instruction>(NAME#"i64"),
-                           FPR64, V128, VectorIndexD>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD modified immediate instructions
-//----------------------------------------------------------------------------
-
-class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
-                          string asm, string op_string,
-                          string cstr, list<dag> pattern>
-  : I<oops, iops, asm, op_string, cstr, pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<8> imm8;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = op;
-  let Inst{28-19} = 0b0111100000;
-  let Inst{18-16} = imm8{7-5};
-  let Inst{11-10} = 0b01;
-  let Inst{9-5}   = imm8{4-0};
-  let Inst{4-0}   = Rd;
-}
-
-class BaseSIMDModifiedImmVector<bit Q, bit op, RegisterOperand vectype,
-                                Operand immtype, dag opt_shift_iop,
-                                string opt_shift, string asm, string kind,
-                                list<dag> pattern>
-  : BaseSIMDModifiedImm<Q, op, (outs vectype:$Rd),
-                        !con((ins immtype:$imm8), opt_shift_iop), asm,
-                        "{\t$Rd" # kind # ", $imm8" # opt_shift #
-                        "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
-                        "", pattern> {
-  let DecoderMethod = "DecodeModImmInstruction";
-}
-
-class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
-                                Operand immtype, dag opt_shift_iop,
-                                string opt_shift, string asm, string kind,
-                                list<dag> pattern>
-  : BaseSIMDModifiedImm<Q, op, (outs vectype:$dst),
-                        !con((ins vectype:$Rd, immtype:$imm8), opt_shift_iop),
-                        asm, "{\t$Rd" # kind # ", $imm8" # opt_shift #
-                             "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
-                        "$Rd = $dst", pattern> {
-  let DecoderMethod = "DecodeModImmTiedInstruction";
-}
-
-class BaseSIMDModifiedImmVectorShift<bit Q, bit op, bits<2> b15_b12,
-                                     RegisterOperand vectype, string asm,
-                                     string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
-                              (ins logical_vec_shift:$shift),
-                              "$shift", asm, kind, pattern> {
-  bits<2> shift;
-  let Inst{15}    = b15_b12{1};
-  let Inst{14-13} = shift;
-  let Inst{12}    = b15_b12{0};
-}
-
-class BaseSIMDModifiedImmVectorShiftTied<bit Q, bit op, bits<2> b15_b12,
-                                     RegisterOperand vectype, string asm,
-                                     string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255,
-                              (ins logical_vec_shift:$shift),
-                              "$shift", asm, kind, pattern> {
-  bits<2> shift;
-  let Inst{15}    = b15_b12{1};
-  let Inst{14-13} = shift;
-  let Inst{12}    = b15_b12{0};
-}
-
-
-class BaseSIMDModifiedImmVectorShiftHalf<bit Q, bit op, bits<2> b15_b12,
-                                         RegisterOperand vectype, string asm,
-                                         string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
-                              (ins logical_vec_hw_shift:$shift),
-                              "$shift", asm, kind, pattern> {
-  bits<2> shift;
-  let Inst{15} = b15_b12{1};
-  let Inst{14} = 0;
-  let Inst{13} = shift{0};
-  let Inst{12} = b15_b12{0};
-}
-
-class BaseSIMDModifiedImmVectorShiftHalfTied<bit Q, bit op, bits<2> b15_b12,
-                                         RegisterOperand vectype, string asm,
-                                         string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255,
-                              (ins logical_vec_hw_shift:$shift),
-                              "$shift", asm, kind, pattern> {
-  bits<2> shift;
-  let Inst{15} = b15_b12{1};
-  let Inst{14} = 0;
-  let Inst{13} = shift{0};
-  let Inst{12} = b15_b12{0};
-}
-
-multiclass SIMDModifiedImmVectorShift<bit op, bits<2> hw_cmode, bits<2> w_cmode,
-                                      string asm> {
-  def v4i16 : BaseSIMDModifiedImmVectorShiftHalf<0, op, hw_cmode, V64,
-                                                 asm, ".4h", []>;
-  def v8i16 : BaseSIMDModifiedImmVectorShiftHalf<1, op, hw_cmode, V128,
-                                                 asm, ".8h", []>;
-
-  def v2i32 : BaseSIMDModifiedImmVectorShift<0, op, w_cmode, V64,
-                                             asm, ".2s", []>;
-  def v4i32 : BaseSIMDModifiedImmVectorShift<1, op, w_cmode, V128,
-                                             asm, ".4s", []>;
-}
-
-multiclass SIMDModifiedImmVectorShiftTied<bit op, bits<2> hw_cmode,
-                                      bits<2> w_cmode, string asm,
-                                      SDNode OpNode> {
-  def v4i16 : BaseSIMDModifiedImmVectorShiftHalfTied<0, op, hw_cmode, V64,
-                                                 asm, ".4h",
-             [(set (v4i16 V64:$dst), (OpNode V64:$Rd,
-                                             imm0_255:$imm8,
-                                             (i32 imm:$shift)))]>;
-  def v8i16 : BaseSIMDModifiedImmVectorShiftHalfTied<1, op, hw_cmode, V128,
-                                                 asm, ".8h",
-             [(set (v8i16 V128:$dst), (OpNode V128:$Rd,
-                                              imm0_255:$imm8,
-                                              (i32 imm:$shift)))]>;
-
-  def v2i32 : BaseSIMDModifiedImmVectorShiftTied<0, op, w_cmode, V64,
-                                             asm, ".2s",
-             [(set (v2i32 V64:$dst), (OpNode V64:$Rd,
-                                             imm0_255:$imm8,
-                                             (i32 imm:$shift)))]>;
-  def v4i32 : BaseSIMDModifiedImmVectorShiftTied<1, op, w_cmode, V128,
-                                             asm, ".4s",
-             [(set (v4i32 V128:$dst), (OpNode V128:$Rd,
-                                              imm0_255:$imm8,
-                                              (i32 imm:$shift)))]>;
-}
-
-class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
-                             RegisterOperand vectype, string asm,
-                             string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
-                              (ins move_vec_shift:$shift),
-                              "$shift", asm, kind, pattern> {
-  bits<1> shift;
-  let Inst{15-13} = cmode{3-1};
-  let Inst{12}    = shift;
-}
-
-class SIMDModifiedImmVectorNoShift<bit Q, bit op, bits<4> cmode,
-                                   RegisterOperand vectype,
-                                   Operand imm_type, string asm,
-                                   string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm_type, (ins), "",
-                              asm, kind, pattern> {
-  let Inst{15-12} = cmode;
-}
-
-class SIMDModifiedImmScalarNoShift<bit Q, bit op, bits<4> cmode, string asm,
-                                   list<dag> pattern>
-  : BaseSIMDModifiedImm<Q, op, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
-                        "\t$Rd, $imm8", "", pattern> {
-  let Inst{15-12} = cmode;
-  let DecoderMethod = "DecodeModImmInstruction";
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD indexed element
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
-                      RegisterOperand dst_reg, RegisterOperand lhs_reg,
-                      RegisterOperand rhs_reg, Operand vec_idx, string asm,
-                      string apple_kind, string dst_kind, string lhs_kind,
-                      string rhs_kind, list<dag> pattern>
-  : I<(outs dst_reg:$Rd), (ins lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx),
-      asm,
-      "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
-      "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28}    = Scalar;
-  let Inst{27-24} = 0b1111;
-  let Inst{23-22} = size;
-  // Bit 21 must be set by the derived class.
-  let Inst{20-16} = Rm;
-  let Inst{15-12} = opc;
-  // Bit 11 must be set by the derived class.
-  let Inst{10}    = 0;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
-                      RegisterOperand dst_reg, RegisterOperand lhs_reg,
-                      RegisterOperand rhs_reg, Operand vec_idx, string asm,
-                      string apple_kind, string dst_kind, string lhs_kind,
-                      string rhs_kind, list<dag> pattern>
-  : I<(outs dst_reg:$dst),
-      (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm,
-      "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
-      "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28}    = Scalar;
-  let Inst{27-24} = 0b1111;
-  let Inst{23-22} = size;
-  // Bit 21 must be set by the derived class.
-  let Inst{20-16} = Rm;
-  let Inst{15-12} = opc;
-  // Bit 11 must be set by the derived class.
-  let Inst{10}    = 0;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
-                           SDPatternOperator OpNode> {
-  def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
-                                      V64, V64,
-                                      V128, VectorIndexS,
-                                      asm, ".2s", ".2s", ".2s", ".s",
-    [(set (v2f32 V64:$Rd),
-        (OpNode (v2f32 V64:$Rn),
-         (v2f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
-                                      V128, V128,
-                                      V128, VectorIndexS,
-                                      asm, ".4s", ".4s", ".4s", ".s",
-    [(set (v4f32 V128:$Rd),
-        (OpNode (v4f32 V128:$Rn),
-         (v4f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v2i64_indexed : BaseSIMDIndexed<1, U, 0, 0b11, opc,
-                                      V128, V128,
-                                      V128, VectorIndexD,
-                                      asm, ".2d", ".2d", ".2d", ".d",
-    [(set (v2f64 V128:$Rd),
-        (OpNode (v2f64 V128:$Rn),
-         (v2f64 (ARM64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> {
-    bits<1> idx;
-    let Inst{11} = idx{0};
-    let Inst{21} = 0;
-  }
-
-  def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
-                                      FPR32Op, FPR32Op, V128, VectorIndexS,
-                                      asm, ".s", "", "", ".s",
-    [(set (f32 FPR32Op:$Rd),
-          (OpNode (f32 FPR32Op:$Rn),
-                  (f32 (vector_extract (v4f32 V128:$Rm),
-                                       VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b11, opc,
-                                      FPR64Op, FPR64Op, V128, VectorIndexD,
-                                      asm, ".d", "", "", ".d",
-    [(set (f64 FPR64Op:$Rd),
-          (OpNode (f64 FPR64Op:$Rn),
-                  (f64 (vector_extract (v2f64 V128:$Rm),
-                                       VectorIndexD:$idx))))]> {
-    bits<1> idx;
-    let Inst{11} = idx{0};
-    let Inst{21} = 0;
-  }
-}
-
-multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> {
-  // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
-  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
-                           (ARM64duplane32 (v4f32 V128:$Rm),
-                                           VectorIndexS:$idx))),
-            (!cast<Instruction>(INST # v2i32_indexed)
-                V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
-  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
-                           (ARM64dup (f32 FPR32Op:$Rm)))),
-            (!cast<Instruction>(INST # "v2i32_indexed") V64:$Rd, V64:$Rn,
-                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
-
-
-  // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar.
-  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
-                           (ARM64duplane32 (v4f32 V128:$Rm),
-                                           VectorIndexS:$idx))),
-            (!cast<Instruction>(INST # "v4i32_indexed")
-                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
-  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
-                           (ARM64dup (f32 FPR32Op:$Rm)))),
-            (!cast<Instruction>(INST # "v4i32_indexed") V128:$Rd, V128:$Rn,
-                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
-
-  // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar.
-  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
-                           (ARM64duplane64 (v2f64 V128:$Rm),
-                                           VectorIndexD:$idx))),
-            (!cast<Instruction>(INST # "v2i64_indexed")
-                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
-  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
-                           (ARM64dup (f64 FPR64Op:$Rm)))),
-            (!cast<Instruction>(INST # "v2i64_indexed") V128:$Rd, V128:$Rn,
-                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
-
-  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
-  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
-                         (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))),
-            (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn,
-                V128:$Rm, VectorIndexS:$idx)>;
-  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
-                         (vector_extract (v2f32 V64:$Rm), VectorIndexS:$idx))),
-            (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn,
-                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
-
-  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
-  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
-                         (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx))),
-            (!cast<Instruction>(INST # "v1i64_indexed") FPR64:$Rd, FPR64:$Rn,
-                V128:$Rm, VectorIndexD:$idx)>;
-}
-
-multiclass SIMDFPIndexedSDTied<bit U, bits<4> opc, string asm> {
-  def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64,
-                                          V128, VectorIndexS,
-                                          asm, ".2s", ".2s", ".2s", ".s", []> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
-                                      V128, V128,
-                                      V128, VectorIndexS,
-                                      asm, ".4s", ".4s", ".4s", ".s", []> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v2i64_indexed : BaseSIMDIndexedTied<1, U, 0, 0b11, opc,
-                                      V128, V128,
-                                      V128, VectorIndexD,
-                                      asm, ".2d", ".2d", ".2d", ".d", []> {
-    bits<1> idx;
-    let Inst{11} = idx{0};
-    let Inst{21} = 0;
-  }
-
-
-  def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
-                                      FPR32Op, FPR32Op, V128, VectorIndexS,
-                                      asm, ".s", "", "", ".s", []> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b11, opc,
-                                      FPR64Op, FPR64Op, V128, VectorIndexD,
-                                      asm, ".d", "", "", ".d", []> {
-    bits<1> idx;
-    let Inst{11} = idx{0};
-    let Inst{21} = 0;
-  }
-}
-
-multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
-                         SDPatternOperator OpNode> {
-  def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64,
-                                      V128_lo, VectorIndexH,
-                                      asm, ".4h", ".4h", ".4h", ".h",
-    [(set (v4i16 V64:$Rd),
-        (OpNode (v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
-                                      V128, V128,
-                                      V128_lo, VectorIndexH,
-                                      asm, ".8h", ".8h", ".8h", ".h",
-    [(set (v8i16 V128:$Rd),
-       (OpNode (v8i16 V128:$Rn),
-         (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
-                                      V64, V64,
-                                      V128, VectorIndexS,
-                                      asm, ".2s", ".2s", ".2s",  ".s",
-    [(set (v2i32 V64:$Rd),
-       (OpNode (v2i32 V64:$Rn),
-          (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
-                                      V128, V128,
-                                      V128, VectorIndexS,
-                                      asm, ".4s", ".4s", ".4s", ".s",
-    [(set (v4i32 V128:$Rd),
-       (OpNode (v4i32 V128:$Rn),
-          (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc,
-                                      FPR16Op, FPR16Op, V128_lo, VectorIndexH,
-                                      asm, ".h", "", "", ".h", []> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
-                                      FPR32Op, FPR32Op, V128, VectorIndexS,
-                                      asm, ".s", "", "", ".s",
-      [(set (i32 FPR32Op:$Rd),
-            (OpNode FPR32Op:$Rn,
-                    (i32 (vector_extract (v4i32 V128:$Rm),
-                                         VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-}
-
-multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm,
-                               SDPatternOperator OpNode> {
-  def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
-                                      V64, V64,
-                                      V128_lo, VectorIndexH,
-                                      asm, ".4h", ".4h", ".4h", ".h",
-    [(set (v4i16 V64:$Rd),
-        (OpNode (v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
-                                      V128, V128,
-                                      V128_lo, VectorIndexH,
-                                      asm, ".8h", ".8h", ".8h", ".h",
-    [(set (v8i16 V128:$Rd),
-       (OpNode (v8i16 V128:$Rn),
-         (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
-                                      V64, V64,
-                                      V128, VectorIndexS,
-                                      asm, ".2s", ".2s", ".2s", ".s",
-    [(set (v2i32 V64:$Rd),
-       (OpNode (v2i32 V64:$Rn),
-          (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
-                                      V128, V128,
-                                      V128, VectorIndexS,
-                                      asm, ".4s", ".4s", ".4s", ".s",
-    [(set (v4i32 V128:$Rd),
-       (OpNode (v4i32 V128:$Rn),
-          (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-}
-
-multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm,
-                                   SDPatternOperator OpNode> {
-  def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, V64, V64,
-                                          V128_lo, VectorIndexH,
-                                          asm, ".4h", ".4h", ".4h", ".h",
-    [(set (v4i16 V64:$dst),
-        (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
-                                      V128, V128,
-                                      V128_lo, VectorIndexH,
-                                      asm, ".8h", ".8h", ".8h", ".h",
-    [(set (v8i16 V128:$dst),
-       (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
-         (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
-                                      V64, V64,
-                                      V128, VectorIndexS,
-                                      asm, ".2s", ".2s", ".2s", ".s",
-    [(set (v2i32 V64:$dst),
-       (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
-          (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
-                                      V128, V128,
-                                      V128, VectorIndexS,
-                                      asm, ".4s", ".4s", ".4s", ".s",
-    [(set (v4i32 V128:$dst),
-       (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
-          (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-}
-
-multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
-                             SDPatternOperator OpNode> {
-  def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
-                                      V128, V64,
-                                      V128_lo, VectorIndexH,
-                                      asm, ".4s", ".4s", ".4h", ".h",
-    [(set (v4i32 V128:$Rd),
-        (OpNode (v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
-                                      V128, V128,
-                                      V128_lo, VectorIndexH,
-                                      asm#"2", ".4s", ".4s", ".8h", ".h",
-    [(set (v4i32 V128:$Rd),
-          (OpNode (extract_high_v8i16 V128:$Rn),
-                  (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
-                                                      VectorIndexH:$idx))))]> {
-
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
-                                      V128, V64,
-                                      V128, VectorIndexS,
-                                      asm, ".2d", ".2d", ".2s", ".s",
-    [(set (v2i64 V128:$Rd),
-        (OpNode (v2i32 V64:$Rn),
-         (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
-                                      V128, V128,
-                                      V128, VectorIndexS,
-                                      asm#"2", ".2d", ".2d", ".4s", ".s",
-    [(set (v2i64 V128:$Rd),
-          (OpNode (extract_high_v4i32 V128:$Rn),
-                  (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm),
-                                                      VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc,
-                                      FPR32Op, FPR16Op, V128_lo, VectorIndexH,
-                                      asm, ".h", "", "", ".h", []> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
-                                      FPR64Op, FPR32Op, V128, VectorIndexS,
-                                      asm, ".s", "", "", ".s", []> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-}
-
-multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
-                                       SDPatternOperator Accum> {
-  def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
-                                      V128, V64,
-                                      V128_lo, VectorIndexH,
-                                      asm, ".4s", ".4s", ".4h", ".h",
-    [(set (v4i32 V128:$dst),
-          (Accum (v4i32 V128:$Rd),
-                 (v4i32 (int_arm64_neon_sqdmull
-                             (v4i16 V64:$Rn),
-                             (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
-                                                    VectorIndexH:$idx))))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  // FIXME: it would be nice to use the scalar (v1i32) instruction here, but an
-  // intermediate EXTRACT_SUBREG would be untyped.
-  def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
-                (i32 (vector_extract (v4i32
-                         (int_arm64_neon_sqdmull (v4i16 V64:$Rn),
-                             (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
-                                                    VectorIndexH:$idx)))),
-                         (i64 0))))),
-            (EXTRACT_SUBREG
-                (!cast<Instruction>(NAME # v4i16_indexed)
-                    (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), V64:$Rn,
-                    V128_lo:$Rm, VectorIndexH:$idx),
-                ssub)>;
-
-  def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
-                                      V128, V128,
-                                      V128_lo, VectorIndexH,
-                                      asm#"2", ".4s", ".4s", ".8h", ".h",
-    [(set (v4i32 V128:$dst),
-          (Accum (v4i32 V128:$Rd),
-                 (v4i32 (int_arm64_neon_sqdmull
-                            (extract_high_v8i16 V128:$Rn),
-                            (extract_high_v8i16
-                                (ARM64duplane16 (v8i16 V128_lo:$Rm),
-                                                VectorIndexH:$idx))))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
-                                      V128, V64,
-                                      V128, VectorIndexS,
-                                      asm, ".2d", ".2d", ".2s", ".s",
-    [(set (v2i64 V128:$dst),
-        (Accum (v2i64 V128:$Rd),
-               (v2i64 (int_arm64_neon_sqdmull
-                          (v2i32 V64:$Rn),
-                          (v2i32 (ARM64duplane32 (v4i32 V128:$Rm),
-                                                 VectorIndexS:$idx))))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
-                                      V128, V128,
-                                      V128, VectorIndexS,
-                                      asm#"2", ".2d", ".2d", ".4s", ".s",
-    [(set (v2i64 V128:$dst),
-          (Accum (v2i64 V128:$Rd),
-                 (v2i64 (int_arm64_neon_sqdmull
-                            (extract_high_v4i32 V128:$Rn),
-                            (extract_high_v4i32
-                                (ARM64duplane32 (v4i32 V128:$Rm),
-                                                VectorIndexS:$idx))))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
-                                      FPR32Op, FPR16Op, V128_lo, VectorIndexH,
-                                      asm, ".h", "", "", ".h", []> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-
-  def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
-                                      FPR64Op, FPR32Op, V128, VectorIndexS,
-                                      asm, ".s", "", "", ".s",
-    [(set (i64 FPR64Op:$dst),
-          (Accum (i64 FPR64Op:$Rd),
-                 (i64 (int_arm64_neon_sqdmulls_scalar
-                            (i32 FPR32Op:$Rn),
-                            (i32 (vector_extract (v4i32 V128:$Rm),
-                                                 VectorIndexS:$idx))))))]> {
-
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-}
-
-multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
-                                   SDPatternOperator OpNode> {
-  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-  def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
-                                      V128, V64,
-                                      V128_lo, VectorIndexH,
-                                      asm, ".4s", ".4s", ".4h", ".h",
-    [(set (v4i32 V128:$Rd),
-        (OpNode (v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
-                                      V128, V128,
-                                      V128_lo, VectorIndexH,
-                                      asm#"2", ".4s", ".4s", ".8h", ".h",
-    [(set (v4i32 V128:$Rd),
-          (OpNode (extract_high_v8i16 V128:$Rn),
-                  (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
-                                                      VectorIndexH:$idx))))]> {
-
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
-                                      V128, V64,
-                                      V128, VectorIndexS,
-                                      asm, ".2d", ".2d", ".2s", ".s",
-    [(set (v2i64 V128:$Rd),
-        (OpNode (v2i32 V64:$Rn),
-         (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
-                                      V128, V128,
-                                      V128, VectorIndexS,
-                                      asm#"2", ".2d", ".2d", ".4s", ".s",
-    [(set (v2i64 V128:$Rd),
-          (OpNode (extract_high_v4i32 V128:$Rn),
-                  (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm),
-                                                      VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-  }
-}
-
-multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
-                                       SDPatternOperator OpNode> {
-  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-  def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
-                                      V128, V64,
-                                      V128_lo, VectorIndexH,
-                                      asm, ".4s", ".4s", ".4h", ".h",
-    [(set (v4i32 V128:$dst),
-        (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn),
-         (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
-                                      V128, V128,
-                                      V128_lo, VectorIndexH,
-                                      asm#"2", ".4s", ".4s", ".8h", ".h",
-    [(set (v4i32 V128:$dst),
-          (OpNode (v4i32 V128:$Rd),
-                  (extract_high_v8i16 V128:$Rn),
-                  (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
-                                                      VectorIndexH:$idx))))]> {
-    bits<3> idx;
-    let Inst{11} = idx{2};
-    let Inst{21} = idx{1};
-    let Inst{20} = idx{0};
-  }
-
-  def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
-                                      V128, V64,
-                                      V128, VectorIndexS,
-                                      asm, ".2d", ".2d", ".2s", ".s",
-    [(set (v2i64 V128:$dst),
-        (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn),
-         (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-
-  def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
-                                      V128, V128,
-                                      V128, VectorIndexS,
-                                      asm#"2", ".2d", ".2d", ".4s", ".s",
-    [(set (v2i64 V128:$dst),
-          (OpNode (v2i64 V128:$Rd),
-                  (extract_high_v4i32 V128:$Rn),
-                  (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm),
-                                                      VectorIndexS:$idx))))]> {
-    bits<2> idx;
-    let Inst{11} = idx{1};
-    let Inst{21} = idx{0};
-  }
-  }
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar shift by immediate
-//----------------------------------------------------------------------------
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm,
-                     RegisterClass regtype1, RegisterClass regtype2,
-                     Operand immtype, string asm, list<dag> pattern>
-  : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm),
-      asm, "\t$Rd, $Rn, $imm", "", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<7> imm;
-  let Inst{31-30} = 0b01;
-  let Inst{29}    = U;
-  let Inst{28-23} = 0b111110;
-  let Inst{22-16} = fixed_imm;
-  let Inst{15-11} = opc;
-  let Inst{10}    = 1;
-  let Inst{9-5} = Rn;
-  let Inst{4-0} = Rd;
-}
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
-                     RegisterClass regtype1, RegisterClass regtype2,
-                     Operand immtype, string asm, list<dag> pattern>
-  : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm),
-      asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<7> imm;
-  let Inst{31-30} = 0b01;
-  let Inst{29}    = U;
-  let Inst{28-23} = 0b111110;
-  let Inst{22-16} = fixed_imm;
-  let Inst{15-11} = opc;
-  let Inst{10}    = 1;
-  let Inst{9-5} = Rn;
-  let Inst{4-0} = Rd;
-}
-
-
-multiclass SIMDScalarRShiftSD<bit U, bits<5> opc, string asm> {
-  def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
-                              FPR32, FPR32, vecshiftR32, asm, []> {
-    let Inst{20-16} = imm{4-0};
-  }
-
-  def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
-                              FPR64, FPR64, vecshiftR64, asm, []> {
-    let Inst{21-16} = imm{5-0};
-  }
-}
-
-multiclass SIMDScalarRShiftD<bit U, bits<5> opc, string asm,
-                             SDPatternOperator OpNode> {
-  def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
-                              FPR64, FPR64, vecshiftR64, asm,
-  [(set (i64 FPR64:$Rd),
-     (OpNode (i64 FPR64:$Rn), (i32 vecshiftR64:$imm)))]> {
-    let Inst{21-16} = imm{5-0};
-  }
-
-  def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))),
-            (!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftR64:$imm)>;
-}
-
-multiclass SIMDScalarRShiftDTied<bit U, bits<5> opc, string asm,
-                                 SDPatternOperator OpNode = null_frag> {
-  def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?},
-                              FPR64, FPR64, vecshiftR64, asm,
-  [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn),
-                                                   (i32 vecshiftR64:$imm)))]> {
-    let Inst{21-16} = imm{5-0};
-  }
-
-  def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
-                           (i32 vecshiftR64:$imm))),
-            (!cast<Instruction>(NAME # "d") FPR64:$Rd, FPR64:$Rn,
-                                            vecshiftR64:$imm)>;
-}
-
-multiclass SIMDScalarLShiftD<bit U, bits<5> opc, string asm,
-                             SDPatternOperator OpNode> {
-  def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
-                              FPR64, FPR64, vecshiftL64, asm,
-    [(set (v1i64 FPR64:$Rd),
-       (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> {
-    let Inst{21-16} = imm{5-0};
-  }
-}
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-multiclass SIMDScalarLShiftDTied<bit U, bits<5> opc, string asm> {
-  def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?},
-                              FPR64, FPR64, vecshiftL64, asm, []> {
-    let Inst{21-16} = imm{5-0};
-  }
-}
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,
-                               SDPatternOperator OpNode = null_frag> {
-  def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
-                              FPR8, FPR16, vecshiftR8, asm, []> {
-    let Inst{18-16} = imm{2-0};
-  }
-
-  def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
-                              FPR16, FPR32, vecshiftR16, asm, []> {
-    let Inst{19-16} = imm{3-0};
-  }
-
-  def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
-                              FPR32, FPR64, vecshiftR32, asm,
-    [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> {
-    let Inst{20-16} = imm{4-0};
-  }
-}
-
-multiclass SIMDScalarLShiftBHSD<bit U, bits<5> opc, string asm,
-                                SDPatternOperator OpNode> {
-  def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
-                              FPR8, FPR8, vecshiftL8, asm, []> {
-    let Inst{18-16} = imm{2-0};
-  }
-
-  def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
-                              FPR16, FPR16, vecshiftL16, asm, []> {
-    let Inst{19-16} = imm{3-0};
-  }
-
-  def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
-                              FPR32, FPR32, vecshiftL32, asm,
-    [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn), (i32 vecshiftL32:$imm)))]> {
-    let Inst{20-16} = imm{4-0};
-  }
-
-  def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
-                              FPR64, FPR64, vecshiftL64, asm,
-    [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn),
-                                     (i32 vecshiftL64:$imm)))]> {
-    let Inst{21-16} = imm{5-0};
-  }
-}
-
-multiclass SIMDScalarRShiftBHSD<bit U, bits<5> opc, string asm> {
-  def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
-                              FPR8, FPR8, vecshiftR8, asm, []> {
-    let Inst{18-16} = imm{2-0};
-  }
-
-  def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
-                              FPR16, FPR16, vecshiftR16, asm, []> {
-    let Inst{19-16} = imm{3-0};
-  }
-
-  def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
-                              FPR32, FPR32, vecshiftR32, asm, []> {
-    let Inst{20-16} = imm{4-0};
-  }
-
-  def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
-                              FPR64, FPR64, vecshiftR64, asm, []> {
-    let Inst{21-16} = imm{5-0};
-  }
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD vector x indexed element
-//----------------------------------------------------------------------------
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
-                     RegisterOperand dst_reg, RegisterOperand src_reg,
-                     Operand immtype,
-                     string asm, string dst_kind, string src_kind,
-                     list<dag> pattern>
-  : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm),
-      asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
-           "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-23} = 0b011110;
-  let Inst{22-16} = fixed_imm;
-  let Inst{15-11} = opc;
-  let Inst{10}    = 1;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
-                     RegisterOperand vectype1, RegisterOperand vectype2,
-                     Operand immtype,
-                     string asm, string dst_kind, string src_kind,
-                     list<dag> pattern>
-  : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm),
-      asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
-           "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31}    = 0;
-  let Inst{30}    = Q;
-  let Inst{29}    = U;
-  let Inst{28-23} = 0b011110;
-  let Inst{22-16} = fixed_imm;
-  let Inst{15-11} = opc;
-  let Inst{10}    = 1;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
-                              Intrinsic OpNode> {
-  def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
-                                  V64, V64, vecshiftR32,
-                                  asm, ".2s", ".2s",
-      [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
-                                  V128, V128, vecshiftR32,
-                                  asm, ".4s", ".4s",
-      [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
-                                  V128, V128, vecshiftR64,
-                                  asm, ".2d", ".2d",
-      [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> {
-    bits<6> imm;
-    let Inst{21-16} = imm;
-  }
-}
-
-multiclass SIMDVectorRShiftSDToFP<bit U, bits<5> opc, string asm,
-                                  Intrinsic OpNode> {
-  def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
-                                  V64, V64, vecshiftR32,
-                                  asm, ".2s", ".2s",
-      [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
-                                  V128, V128, vecshiftR32,
-                                  asm, ".4s", ".4s",
-      [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
-                                  V128, V128, vecshiftR64,
-                                  asm, ".2d", ".2d",
-      [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> {
-    bits<6> imm;
-    let Inst{21-16} = imm;
-  }
-}
-
-multiclass SIMDVectorRShiftNarrowBHS<bit U, bits<5> opc, string asm,
-                                     SDPatternOperator OpNode> {
-  def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
-                                  V64, V128, vecshiftR16Narrow,
-                                  asm, ".8b", ".8h",
-      [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
-                                  V128, V128, vecshiftR16Narrow,
-                                  asm#"2", ".16b", ".8h", []> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-    let hasSideEffects = 0;
-  }
-
-  def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
-                                  V64, V128, vecshiftR32Narrow,
-                                  asm, ".4h", ".4s",
-      [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))]> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
-                                  V128, V128, vecshiftR32Narrow,
-                                  asm#"2", ".8h", ".4s", []> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-    let hasSideEffects = 0;
-  }
-
-  def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
-                                  V64, V128, vecshiftR64Narrow,
-                                  asm, ".2s", ".2d",
-      [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
-                                  V128, V128, vecshiftR64Narrow,
-                                  asm#"2", ".4s", ".2d", []> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-    let hasSideEffects = 0;
-  }
-
-  // TableGen doesn't like patters w/ INSERT_SUBREG on the instructions
-  // themselves, so put them here instead.
-
-  // Patterns involving what's effectively an insert high and a normal
-  // intrinsic, represented by CONCAT_VECTORS.
-  def : Pat<(concat_vectors (v8i8 V64:$Rd),(OpNode (v8i16 V128:$Rn),
-                                                   vecshiftR16Narrow:$imm)),
-            (!cast<Instruction>(NAME # "v16i8_shift")
-                (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
-                V128:$Rn, vecshiftR16Narrow:$imm)>;
-  def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn),
-                                                     vecshiftR32Narrow:$imm)),
-            (!cast<Instruction>(NAME # "v8i16_shift")
-                (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
-                V128:$Rn, vecshiftR32Narrow:$imm)>;
-  def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn),
-                                                     vecshiftR64Narrow:$imm)),
-            (!cast<Instruction>(NAME # "v4i32_shift")
-                (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
-                V128:$Rn, vecshiftR64Narrow:$imm)>;
-}
-
-multiclass SIMDVectorLShiftBHSD<bit U, bits<5> opc, string asm,
-                                SDPatternOperator OpNode> {
-  def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
-                                  V64, V64, vecshiftL8,
-                                  asm, ".8b", ".8b",
-                 [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn),
-                       (i32 vecshiftL8:$imm)))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
-                                  V128, V128, vecshiftL8,
-                                  asm, ".16b", ".16b",
-             [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn),
-                   (i32 vecshiftL8:$imm)))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
-                                  V64, V64, vecshiftL16,
-                                  asm, ".4h", ".4h",
-              [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn),
-                    (i32 vecshiftL16:$imm)))]> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
-                                  V128, V128, vecshiftL16,
-                                  asm, ".8h", ".8h",
-            [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
-                  (i32 vecshiftL16:$imm)))]> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
-                                  V64, V64, vecshiftL32,
-                                  asm, ".2s", ".2s",
-              [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn),
-                    (i32 vecshiftL32:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
-                                  V128, V128, vecshiftL32,
-                                  asm, ".4s", ".4s",
-            [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
-                  (i32 vecshiftL32:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
-                                  V128, V128, vecshiftL64,
-                                  asm, ".2d", ".2d",
-            [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
-                  (i32 vecshiftL64:$imm)))]> {
-    bits<6> imm;
-    let Inst{21-16} = imm;
-  }
-}
-
-multiclass SIMDVectorRShiftBHSD<bit U, bits<5> opc, string asm,
-                                SDPatternOperator OpNode> {
-  def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
-                                  V64, V64, vecshiftR8,
-                                  asm, ".8b", ".8b",
-                 [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn),
-                       (i32 vecshiftR8:$imm)))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
-                                  V128, V128, vecshiftR8,
-                                  asm, ".16b", ".16b",
-             [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn),
-                   (i32 vecshiftR8:$imm)))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
-                                  V64, V64, vecshiftR16,
-                                  asm, ".4h", ".4h",
-              [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn),
-                    (i32 vecshiftR16:$imm)))]> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
-                                  V128, V128, vecshiftR16,
-                                  asm, ".8h", ".8h",
-            [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
-                  (i32 vecshiftR16:$imm)))]> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
-                                  V64, V64, vecshiftR32,
-                                  asm, ".2s", ".2s",
-              [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn),
-                    (i32 vecshiftR32:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
-                                  V128, V128, vecshiftR32,
-                                  asm, ".4s", ".4s",
-            [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
-                  (i32 vecshiftR32:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
-                                  V128, V128, vecshiftR64,
-                                  asm, ".2d", ".2d",
-            [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
-                  (i32 vecshiftR64:$imm)))]> {
-    bits<6> imm;
-    let Inst{21-16} = imm;
-  }
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDVectorRShiftBHSDTied<bit U, bits<5> opc, string asm,
-                                    SDPatternOperator OpNode = null_frag> {
-  def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?},
-                                  V64, V64, vecshiftR8, asm, ".8b", ".8b",
-                 [(set (v8i8 V64:$dst),
-                   (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn),
-                           (i32 vecshiftR8:$imm)))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
-                                  V128, V128, vecshiftR8, asm, ".16b", ".16b",
-             [(set (v16i8 V128:$dst),
-               (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
-                       (i32 vecshiftR8:$imm)))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?},
-                                  V64, V64, vecshiftR16, asm, ".4h", ".4h",
-              [(set (v4i16 V64:$dst),
-                (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn),
-                        (i32 vecshiftR16:$imm)))]> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
-                                  V128, V128, vecshiftR16, asm, ".8h", ".8h",
-            [(set (v8i16 V128:$dst),
-              (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
-                      (i32 vecshiftR16:$imm)))]> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?},
-                                  V64, V64, vecshiftR32, asm, ".2s", ".2s",
-              [(set (v2i32 V64:$dst),
-                (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
-                        (i32 vecshiftR32:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
-                                  V128, V128, vecshiftR32, asm, ".4s", ".4s",
-            [(set (v4i32 V128:$dst),
-              (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
-                      (i32 vecshiftR32:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?},
-                                  V128, V128, vecshiftR64,
-                                  asm, ".2d", ".2d", [(set (v2i64 V128:$dst),
-              (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn),
-                      (i32 vecshiftR64:$imm)))]> {
-    bits<6> imm;
-    let Inst{21-16} = imm;
-  }
-}
-
-multiclass SIMDVectorLShiftBHSDTied<bit U, bits<5> opc, string asm,
-                                    SDPatternOperator OpNode = null_frag> {
-  def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?},
-                                  V64, V64, vecshiftL8,
-                                  asm, ".8b", ".8b",
-                    [(set (v8i8 V64:$dst),
-                          (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn),
-                                  (i32 vecshiftL8:$imm)))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
-                                  V128, V128, vecshiftL8,
-                                  asm, ".16b", ".16b",
-                    [(set (v16i8 V128:$dst),
-                          (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
-                                  (i32 vecshiftL8:$imm)))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?},
-                                  V64, V64, vecshiftL16,
-                                  asm, ".4h", ".4h",
-                    [(set (v4i16 V64:$dst),
-                           (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn),
-                                   (i32 vecshiftL16:$imm)))]> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
-                                  V128, V128, vecshiftL16,
-                                  asm, ".8h", ".8h",
-                    [(set (v8i16 V128:$dst),
-                          (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
-                                  (i32 vecshiftL16:$imm)))]> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?},
-                                  V64, V64, vecshiftL32,
-                                  asm, ".2s", ".2s",
-                    [(set (v2i32 V64:$dst),
-                          (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
-                                  (i32 vecshiftL32:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
-                                  V128, V128, vecshiftL32,
-                                  asm, ".4s", ".4s",
-                    [(set (v4i32 V128:$dst),
-                          (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
-                                  (i32 vecshiftL32:$imm)))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?},
-                                  V128, V128, vecshiftL64,
-                                  asm, ".2d", ".2d",
-                    [(set (v2i64 V128:$dst),
-                          (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn),
-                                  (i32 vecshiftL64:$imm)))]> {
-    bits<6> imm;
-    let Inst{21-16} = imm;
-  }
-}
-
-multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm,
-                                   SDPatternOperator OpNode> {
-  def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
-                                  V128, V64, vecshiftL8, asm, ".8h", ".8b",
-      [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), vecshiftL8:$imm))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
-                                  V128, V128, vecshiftL8,
-                                  asm#"2", ".8h", ".16b",
-      [(set (v8i16 V128:$Rd),
-            (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> {
-    bits<3> imm;
-    let Inst{18-16} = imm;
-  }
-
-  def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
-                                  V128, V64, vecshiftL16, asm, ".4s", ".4h",
-      [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), vecshiftL16:$imm))]> {
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
-                                  V128, V128, vecshiftL16,
-                                  asm#"2", ".4s", ".8h",
-      [(set (v4i32 V128:$Rd),
-            (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> {
-
-    bits<4> imm;
-    let Inst{19-16} = imm;
-  }
-
-  def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
-                                  V128, V64, vecshiftL32, asm, ".2d", ".2s",
-      [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), vecshiftL32:$imm))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-
-  def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
-                                  V128, V128, vecshiftL32,
-                                  asm#"2", ".2d", ".4s",
-      [(set (v2i64 V128:$Rd),
-            (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> {
-    bits<5> imm;
-    let Inst{20-16} = imm;
-  }
-}
-
-
-//---
-// Vector load/store
-//---
-// SIMD ldX/stX no-index memory references don't allow the optional
-// ", #0" constant and handle post-indexing explicitly, so we use
-// a more specialized parse method for them. Otherwise, it's the same as
-// the general am_noindex handling.
-def MemorySIMDNoIndexOperand : AsmOperandClass {
-  let Name = "MemorySIMDNoIndex";
-  let ParserMethod = "tryParseNoIndexMemory";
-}
-def am_simdnoindex : Operand<i64>,
-                     ComplexPattern<i64, 1, "SelectAddrModeNoIndex", []> {
-  let PrintMethod = "printAMNoIndex";
-  let ParserMatchClass = MemorySIMDNoIndexOperand;
-  let MIOperandInfo = (ops GPR64sp:$base);
-  let DecoderMethod = "DecodeGPR64spRegisterClass";
-}
-
-class BaseSIMDLdSt<bit Q, bit L, bits<4> opcode, bits<2> size,
-                   string asm, dag oops, dag iops, list<dag> pattern>
-  : I<oops, iops, asm, "\t$Vt, $vaddr", "", pattern> {
-  bits<5> Vt;
-  bits<5> vaddr;
-  let Inst{31} = 0;
-  let Inst{30} = Q;
-  let Inst{29-23} = 0b0011000;
-  let Inst{22} = L;
-  let Inst{21-16} = 0b000000;
-  let Inst{15-12} = opcode;
-  let Inst{11-10} = size;
-  let Inst{9-5} = vaddr;
-  let Inst{4-0} = Vt;
-}
-
-class BaseSIMDLdStPost<bit Q, bit L, bits<4> opcode, bits<2> size,
-                       string asm, dag oops, dag iops>
-  : I<oops, iops, asm, "\t$Vt, $vaddr, $Xm", "", []> {
-  bits<5> Vt;
-  bits<5> vaddr;
-  bits<5> Xm;
-  let Inst{31} = 0;
-  let Inst{30} = Q;
-  let Inst{29-23} = 0b0011001;
-  let Inst{22} = L;
-  let Inst{21} = 0;
-  let Inst{20-16} = Xm;
-  let Inst{15-12} = opcode;
-  let Inst{11-10} = size;
-  let Inst{9-5} = vaddr;
-  let Inst{4-0} = Vt;
-  let DecoderMethod = "DecodeSIMDLdStPost";
-}
-
-// The immediate form of AdvSIMD post-indexed addressing is encoded with
-// register post-index addressing from the zero register.
-multiclass SIMDLdStAliases<string asm, string layout, string Count,
-                           int Offset, int Size> {
-  // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16"
-  //      "ld1\t$Vt, $vaddr, #16"
-  // may get mapped to
-  //      (LD1Twov8b_POST VecListTwo8b:$Vt, am_simdnoindex:$vaddr, XZR)
-  def : InstAlias<asm # "\t$Vt, $vaddr, #" # Offset,
-                  (!cast<Instruction>(NAME # Count # "v" # layout # "_POST")
-                      !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
-                      am_simdnoindex:$vaddr, XZR), 1>;
-
-  // E.g. "ld1.8b { v0, v1 }, [x1], #16"
-  //      "ld1.8b\t$Vt, $vaddr, #16"
-  // may get mapped to
-  //      (LD1Twov8b_POST VecListTwo64:$Vt, am_simdnoindex:$vaddr, XZR)
-  def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, #" # Offset,
-                  (!cast<Instruction>(NAME # Count # "v" # layout # "_POST")
-                      !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
-                      am_simdnoindex:$vaddr, XZR), 0>;
-
-  // E.g. "ld1.8b { v0, v1 }, [x1]"
-  //      "ld1\t$Vt, $vaddr"
-  // may get mapped to
-  //      (LD1Twov8b VecListTwo64:$Vt, am_simdnoindex:$vaddr)
-  def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr",
-                  (!cast<Instruction>(NAME # Count # "v" # layout)
-                      !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
-                      am_simdnoindex:$vaddr), 0>;
-
-  // E.g. "ld1.8b { v0, v1 }, [x1], x2"
-  //      "ld1\t$Vt, $vaddr, $Xm"
-  // may get mapped to
-  //      (LD1Twov8b_POST VecListTwo64:$Vt, am_simdnoindex:$vaddr, GPR64pi8:$Xm)
-  def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, $Xm",
-                  (!cast<Instruction>(NAME # Count # "v" # layout # "_POST")
-                      !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
-                      am_simdnoindex:$vaddr,
-                      !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
-}
-
-multiclass BaseSIMDLdN<string Count, string asm, string veclist, int Offset128,
-                       int Offset64, bits<4> opcode> {
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
-    def v16b: BaseSIMDLdSt<1, 1, opcode, 0b00, asm,
-                           (outs !cast<RegisterOperand>(veclist # "16b"):$Vt),
-                           (ins am_simdnoindex:$vaddr), []>;
-    def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm,
-                           (outs !cast<RegisterOperand>(veclist # "8h"):$Vt),
-                           (ins am_simdnoindex:$vaddr), []>;
-    def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm,
-                           (outs !cast<RegisterOperand>(veclist # "4s"):$Vt),
-                           (ins am_simdnoindex:$vaddr), []>;
-    def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm,
-                           (outs !cast<RegisterOperand>(veclist # "2d"):$Vt),
-                           (ins am_simdnoindex:$vaddr), []>;
-    def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm,
-                           (outs !cast<RegisterOperand>(veclist # "8b"):$Vt),
-                           (ins am_simdnoindex:$vaddr), []>;
-    def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm,
-                           (outs !cast<RegisterOperand>(veclist # "4h"):$Vt),
-                           (ins am_simdnoindex:$vaddr), []>;
-    def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm,
-                           (outs !cast<RegisterOperand>(veclist # "2s"):$Vt),
-                           (ins am_simdnoindex:$vaddr), []>;
-
-
-    def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm,
-                       (outs !cast<RegisterOperand>(veclist # "16b"):$Vt),
-                       (ins am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
-    def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm,
-                       (outs !cast<RegisterOperand>(veclist # "8h"):$Vt),
-                       (ins am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
-    def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm,
-                       (outs !cast<RegisterOperand>(veclist # "4s"):$Vt),
-                       (ins am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
-    def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm,
-                       (outs !cast<RegisterOperand>(veclist # "2d"):$Vt),
-                       (ins am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
-    def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm,
-                       (outs !cast<RegisterOperand>(veclist # "8b"):$Vt),
-                       (ins am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
-    def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm,
-                       (outs !cast<RegisterOperand>(veclist # "4h"):$Vt),
-                       (ins am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
-    def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm,
-                       (outs !cast<RegisterOperand>(veclist # "2s"):$Vt),
-                       (ins am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
-  }
-
-  defm : SIMDLdStAliases<asm, "16b", Count, Offset128, 128>;
-  defm : SIMDLdStAliases<asm, "8h", Count, Offset128, 128>;
-  defm : SIMDLdStAliases<asm, "4s", Count, Offset128, 128>;
-  defm : SIMDLdStAliases<asm, "2d", Count, Offset128, 128>;
-  defm : SIMDLdStAliases<asm, "8b", Count, Offset64, 64>;
-  defm : SIMDLdStAliases<asm, "4h", Count, Offset64, 64>;
-  defm : SIMDLdStAliases<asm, "2s", Count, Offset64, 64>;
-}
-
-// Only ld1/st1 has a v1d version.
-multiclass BaseSIMDStN<string Count, string asm, string veclist, int Offset128,
-                       int Offset64, bits<4> opcode> {
-  let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in {
-    def v16b : BaseSIMDLdSt<1, 0, opcode, 0b00, asm, (outs),
-                            (ins !cast<RegisterOperand>(veclist # "16b"):$Vt,
-                                 am_simdnoindex:$vaddr), []>;
-    def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs),
-                           (ins !cast<RegisterOperand>(veclist # "8h"):$Vt,
-                                am_simdnoindex:$vaddr), []>;
-    def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs),
-                           (ins !cast<RegisterOperand>(veclist # "4s"):$Vt,
-                                am_simdnoindex:$vaddr), []>;
-    def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs),
-                           (ins !cast<RegisterOperand>(veclist # "2d"):$Vt,
-                                am_simdnoindex:$vaddr), []>;
-    def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs),
-                           (ins !cast<RegisterOperand>(veclist # "8b"):$Vt,
-                                am_simdnoindex:$vaddr), []>;
-    def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs),
-                           (ins !cast<RegisterOperand>(veclist # "4h"):$Vt,
-                                am_simdnoindex:$vaddr), []>;
-    def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs),
-                           (ins !cast<RegisterOperand>(veclist # "2s"):$Vt,
-                                am_simdnoindex:$vaddr), []>;
-
-    def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm, (outs),
-                       (ins !cast<RegisterOperand>(veclist # "16b"):$Vt,
-                            am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
-    def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm, (outs),
-                       (ins !cast<RegisterOperand>(veclist # "8h"):$Vt,
-                            am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
-    def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm, (outs),
-                       (ins !cast<RegisterOperand>(veclist # "4s"):$Vt,
-                            am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
-    def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm, (outs),
-                       (ins !cast<RegisterOperand>(veclist # "2d"):$Vt,
-                            am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
-    def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm, (outs),
-                       (ins !cast<RegisterOperand>(veclist # "8b"):$Vt,
-                            am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
-    def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm, (outs),
-                       (ins !cast<RegisterOperand>(veclist # "4h"):$Vt,
-                            am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
-    def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm, (outs),
-                       (ins !cast<RegisterOperand>(veclist # "2s"):$Vt,
-                            am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
-  }
-
-  defm : SIMDLdStAliases<asm, "16b", Count, Offset128, 128>;
-  defm : SIMDLdStAliases<asm, "8h", Count, Offset128, 128>;
-  defm : SIMDLdStAliases<asm, "4s", Count, Offset128, 128>;
-  defm : SIMDLdStAliases<asm, "2d", Count, Offset128, 128>;
-  defm : SIMDLdStAliases<asm, "8b", Count, Offset64, 64>;
-  defm : SIMDLdStAliases<asm, "4h", Count, Offset64, 64>;
-  defm : SIMDLdStAliases<asm, "2s", Count, Offset64, 64>;
-}
-
-multiclass BaseSIMDLd1<string Count, string asm, string veclist,
-                       int Offset128, int Offset64, bits<4> opcode>
-  : BaseSIMDLdN<Count, asm, veclist, Offset128, Offset64, opcode> {
-
-  // LD1 instructions have extra "1d" variants.
-  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
-    def v1d : BaseSIMDLdSt<0, 1, opcode, 0b11, asm,
-                           (outs !cast<RegisterOperand>(veclist # "1d"):$Vt),
-                           (ins am_simdnoindex:$vaddr), []>;
-
-    def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm,
-                       (outs !cast<RegisterOperand>(veclist # "1d"):$Vt),
-                       (ins am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
-  }
-
-  defm : SIMDLdStAliases<asm, "1d", Count, Offset64, 64>;
-}
-
-multiclass BaseSIMDSt1<string Count, string asm, string veclist,
-                       int Offset128, int Offset64, bits<4> opcode>
-  : BaseSIMDStN<Count, asm, veclist, Offset128, Offset64, opcode> {
-
-  // ST1 instructions have extra "1d" variants.
-  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
-    def v1d : BaseSIMDLdSt<0, 0, opcode, 0b11, asm, (outs),
-                           (ins !cast<RegisterOperand>(veclist # "1d"):$Vt,
-                                am_simdnoindex:$vaddr), []>;
-
-    def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm, (outs),
-                       (ins !cast<RegisterOperand>(veclist # "1d"):$Vt,
-                            am_simdnoindex:$vaddr,
-                            !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
-  }
-
-  defm : SIMDLdStAliases<asm, "1d", Count, Offset64, 64>;
-}
-
-multiclass SIMDLd1Multiple<string asm> {
-  defm One   : BaseSIMDLd1<"One", asm, "VecListOne", 16, 8,  0b0111>;
-  defm Two   : BaseSIMDLd1<"Two", asm, "VecListTwo", 32, 16, 0b1010>;
-  defm Three : BaseSIMDLd1<"Three", asm, "VecListThree", 48, 24, 0b0110>;
-  defm Four  : BaseSIMDLd1<"Four", asm, "VecListFour", 64, 32, 0b0010>;
-}
-
-multiclass SIMDSt1Multiple<string asm> {
-  defm One   : BaseSIMDSt1<"One", asm, "VecListOne", 16, 8,  0b0111>;
-  defm Two   : BaseSIMDSt1<"Two", asm, "VecListTwo", 32, 16, 0b1010>;
-  defm Three : BaseSIMDSt1<"Three", asm, "VecListThree", 48, 24, 0b0110>;
-  defm Four  : BaseSIMDSt1<"Four", asm, "VecListFour", 64, 32, 0b0010>;
-}
-
-multiclass SIMDLd2Multiple<string asm> {
-  defm Two : BaseSIMDLdN<"Two", asm, "VecListTwo", 32, 16, 0b1000>;
-}
-
-multiclass SIMDSt2Multiple<string asm> {
-  defm Two : BaseSIMDStN<"Two", asm, "VecListTwo", 32, 16, 0b1000>;
-}
-
-multiclass SIMDLd3Multiple<string asm> {
-  defm Three : BaseSIMDLdN<"Three", asm, "VecListThree", 48, 24, 0b0100>;
-}
-
-multiclass SIMDSt3Multiple<string asm> {
-  defm Three : BaseSIMDStN<"Three", asm, "VecListThree", 48, 24, 0b0100>;
-}
-
-multiclass SIMDLd4Multiple<string asm> {
-  defm Four : BaseSIMDLdN<"Four", asm, "VecListFour", 64, 32, 0b0000>;
-}
-
-multiclass SIMDSt4Multiple<string asm> {
-  defm Four : BaseSIMDStN<"Four", asm, "VecListFour", 64, 32, 0b0000>;
-}
-
-//---
-// AdvSIMD Load/store single-element
-//---
-
-class BaseSIMDLdStSingle<bit L, bit R, bits<3> opcode,
-                         string asm, string operands, dag oops, dag iops,
-                         list<dag> pattern>
-  : I<oops, iops, asm, operands, "", pattern> {
-  bits<5> Vt;
-  bits<5> vaddr;
-  let Inst{31} = 0;
-  let Inst{29-24} = 0b001101;
-  let Inst{22} = L;
-  let Inst{21} = R;
-  let Inst{15-13} = opcode;
-  let Inst{9-5} = vaddr;
-  let Inst{4-0} = Vt;
-  let DecoderMethod = "DecodeSIMDLdStSingle";
-}
-
-class BaseSIMDLdStSingleTied<bit L, bit R, bits<3> opcode,
-                         string asm, string operands, dag oops, dag iops,
-                         list<dag> pattern>
-  : I<oops, iops, asm, operands, "$Vt = $dst", pattern> {
-  bits<5> Vt;
-  bits<5> vaddr;
-  let Inst{31} = 0;
-  let Inst{29-24} = 0b001101;
-  let Inst{22} = L;
-  let Inst{21} = R;
-  let Inst{15-13} = opcode;
-  let Inst{9-5} = vaddr;
-  let Inst{4-0} = Vt;
-  let DecoderMethod = "DecodeSIMDLdStSingleTied";
-}
-
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDLdR<bit Q, bit R, bits<3> opcode, bit S, bits<2> size, string asm,
-                  Operand listtype>
-  : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, $vaddr",
-                       (outs listtype:$Vt), (ins am_simdnoindex:$vaddr), []> {
-  let Inst{30} = Q;
-  let Inst{23} = 0;
-  let Inst{20-16} = 0b00000;
-  let Inst{12} = S;
-  let Inst{11-10} = size;
-}
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDLdRPost<bit Q, bit R, bits<3> opcode, bit S, bits<2> size,
-                      string asm, Operand listtype, Operand GPR64pi>
-  : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, $vaddr, $Xm",
-                       (outs listtype:$Vt),
-                       (ins am_simdnoindex:$vaddr, GPR64pi:$Xm), []> {
-  bits<5> Xm;
-  let Inst{30} = Q;
-  let Inst{23} = 1;
-  let Inst{20-16} = Xm;
-  let Inst{12} = S;
-  let Inst{11-10} = size;
-}
-
-multiclass SIMDLdrAliases<string asm, string layout, string Count,
-                          int Offset, int Size> {
-  // E.g. "ld1r { v0.8b }, [x1], #1"
-  //      "ld1r.8b\t$Vt, $vaddr, #1"
-  // may get mapped to
-  //      (LD1Rv8b_POST VecListOne8b:$Vt, am_simdnoindex:$vaddr, XZR)
-  def : InstAlias<asm # "\t$Vt, $vaddr, #" # Offset,
-                  (!cast<Instruction>(NAME # "v" # layout # "_POST")
-                      !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
-                      am_simdnoindex:$vaddr, XZR), 1>;
-
-  // E.g. "ld1r.8b { v0 }, [x1], #1"
-  //      "ld1r.8b\t$Vt, $vaddr, #1"
-  // may get mapped to
-  //      (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, XZR)
-  def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, #" # Offset,
-                  (!cast<Instruction>(NAME # "v" # layout # "_POST")
-                      !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
-                      am_simdnoindex:$vaddr, XZR), 0>;
-
-  // E.g. "ld1r.8b { v0 }, [x1]"
-  //      "ld1r.8b\t$Vt, $vaddr"
-  // may get mapped to
-  //      (LD1Rv8b VecListOne64:$Vt, am_simdnoindex:$vaddr)
-  def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr",
-                  (!cast<Instruction>(NAME # "v" # layout)
-                      !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
-                      am_simdnoindex:$vaddr), 0>;
-
-  // E.g. "ld1r.8b { v0 }, [x1], x2"
-  //      "ld1r.8b\t$Vt, $vaddr, $Xm"
-  // may get mapped to
-  //      (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, GPR64pi1:$Xm)
-  def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, $Xm",
-                  (!cast<Instruction>(NAME # "v" # layout # "_POST")
-                      !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
-                      am_simdnoindex:$vaddr,
-                      !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
-}
-
-multiclass SIMDLdR<bit R, bits<3> opcode, bit S, string asm, string Count,
-  int Offset1, int Offset2, int Offset4, int Offset8> {
-  def v8b : BaseSIMDLdR<0, R, opcode, S, 0b00, asm,
-                        !cast<Operand>("VecList" # Count # "8b")>;
-  def v16b: BaseSIMDLdR<1, R, opcode, S, 0b00, asm,
-                        !cast<Operand>("VecList" # Count #"16b")>;
-  def v4h : BaseSIMDLdR<0, R, opcode, S, 0b01, asm,
-                        !cast<Operand>("VecList" # Count #"4h")>;
-  def v8h : BaseSIMDLdR<1, R, opcode, S, 0b01, asm,
-                        !cast<Operand>("VecList" # Count #"8h")>;
-  def v2s : BaseSIMDLdR<0, R, opcode, S, 0b10, asm,
-                        !cast<Operand>("VecList" # Count #"2s")>;
-  def v4s : BaseSIMDLdR<1, R, opcode, S, 0b10, asm,
-                        !cast<Operand>("VecList" # Count #"4s")>;
-  def v1d : BaseSIMDLdR<0, R, opcode, S, 0b11, asm,
-                        !cast<Operand>("VecList" # Count #"1d")>;
-  def v2d : BaseSIMDLdR<1, R, opcode, S, 0b11, asm,
-                        !cast<Operand>("VecList" # Count #"2d")>;
-
-  def v8b_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b00, asm,
-                                 !cast<Operand>("VecList" # Count # "8b"),
-                                 !cast<Operand>("GPR64pi" # Offset1)>;
-  def v16b_POST: BaseSIMDLdRPost<1, R, opcode, S, 0b00, asm,
-                                 !cast<Operand>("VecList" # Count # "16b"),
-                                 !cast<Operand>("GPR64pi" # Offset1)>;
-  def v4h_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b01, asm,
-                                 !cast<Operand>("VecList" # Count # "4h"),
-                                 !cast<Operand>("GPR64pi" # Offset2)>;
-  def v8h_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b01, asm,
-                                 !cast<Operand>("VecList" # Count # "8h"),
-                                 !cast<Operand>("GPR64pi" # Offset2)>;
-  def v2s_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b10, asm,
-                                 !cast<Operand>("VecList" # Count # "2s"),
-                                 !cast<Operand>("GPR64pi" # Offset4)>;
-  def v4s_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b10, asm,
-                                 !cast<Operand>("VecList" # Count # "4s"),
-                                 !cast<Operand>("GPR64pi" # Offset4)>;
-  def v1d_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b11, asm,
-                                 !cast<Operand>("VecList" # Count # "1d"),
-                                 !cast<Operand>("GPR64pi" # Offset8)>;
-  def v2d_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b11, asm,
-                                 !cast<Operand>("VecList" # Count # "2d"),
-                                 !cast<Operand>("GPR64pi" # Offset8)>;
-
-  defm : SIMDLdrAliases<asm, "8b",  Count, Offset1,  64>;
-  defm : SIMDLdrAliases<asm, "16b", Count, Offset1, 128>;
-  defm : SIMDLdrAliases<asm, "4h",  Count, Offset2,  64>;
-  defm : SIMDLdrAliases<asm, "8h",  Count, Offset2, 128>;
-  defm : SIMDLdrAliases<asm, "2s",  Count, Offset4,  64>;
-  defm : SIMDLdrAliases<asm, "4s",  Count, Offset4, 128>;
-  defm : SIMDLdrAliases<asm, "1d",  Count, Offset8,  64>;
-  defm : SIMDLdrAliases<asm, "2d",  Count, Offset8, 128>;
-}
-
-class SIMDLdStSingleB<bit L, bit R, bits<3> opcode, string asm,
-                      dag oops, dag iops, list<dag> pattern>
-  : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
-                       pattern> {
-  // idx encoded in Q:S:size fields.
-  bits<4> idx;
-  let Inst{30} = idx{3};
-  let Inst{23} = 0;
-  let Inst{20-16} = 0b00000;
-  let Inst{12} = idx{2};
-  let Inst{11-10} = idx{1-0};
-}
-class SIMDLdStSingleBTied<bit L, bit R, bits<3> opcode, string asm,
-                      dag oops, dag iops, list<dag> pattern>
-  : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
-                       pattern> {
-  // idx encoded in Q:S:size fields.
-  bits<4> idx;
-  let Inst{30} = idx{3};
-  let Inst{23} = 0;
-  let Inst{20-16} = 0b00000;
-  let Inst{12} = idx{2};
-  let Inst{11-10} = idx{1-0};
-}
-class SIMDLdStSingleBPost<bit L, bit R, bits<3> opcode, string asm,
-                          dag oops, dag iops>
-  : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
-                       oops, iops, []> {
-  // idx encoded in Q:S:size fields.
-  bits<4> idx;
-  bits<5> Xm;
-  let Inst{30} = idx{3};
-  let Inst{23} = 1;
-  let Inst{20-16} = Xm;
-  let Inst{12} = idx{2};
-  let Inst{11-10} = idx{1-0};
-}
-class SIMDLdStSingleBTiedPost<bit L, bit R, bits<3> opcode, string asm,
-                          dag oops, dag iops>
-  : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
-                       oops, iops, []> {
-  // idx encoded in Q:S:size fields.
-  bits<4> idx;
-  bits<5> Xm;
-  let Inst{30} = idx{3};
-  let Inst{23} = 1;
-  let Inst{20-16} = Xm;
-  let Inst{12} = idx{2};
-  let Inst{11-10} = idx{1-0};
-}
-
-class SIMDLdStSingleH<bit L, bit R, bits<3> opcode, bit size, string asm,
-                      dag oops, dag iops, list<dag> pattern>
-  : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
-                       pattern> {
-  // idx encoded in Q:S:size<1> fields.
-  bits<3> idx;
-  let Inst{30} = idx{2};
-  let Inst{23} = 0;
-  let Inst{20-16} = 0b00000;
-  let Inst{12} = idx{1};
-  let Inst{11} = idx{0};
-  let Inst{10} = size;
-}
-class SIMDLdStSingleHTied<bit L, bit R, bits<3> opcode, bit size, string asm,
-                      dag oops, dag iops, list<dag> pattern>
-  : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
-                       pattern> {
-  // idx encoded in Q:S:size<1> fields.
-  bits<3> idx;
-  let Inst{30} = idx{2};
-  let Inst{23} = 0;
-  let Inst{20-16} = 0b00000;
-  let Inst{12} = idx{1};
-  let Inst{11} = idx{0};
-  let Inst{10} = size;
-}
-
-class SIMDLdStSingleHPost<bit L, bit R, bits<3> opcode, bit size, string asm,
-                          dag oops, dag iops>
-  : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
-                       oops, iops, []> {
-  // idx encoded in Q:S:size<1> fields.
-  bits<3> idx;
-  bits<5> Xm;
-  let Inst{30} = idx{2};
-  let Inst{23} = 1;
-  let Inst{20-16} = Xm;
-  let Inst{12} = idx{1};
-  let Inst{11} = idx{0};
-  let Inst{10} = size;
-}
-class SIMDLdStSingleHTiedPost<bit L, bit R, bits<3> opcode, bit size, string asm,
-                          dag oops, dag iops>
-  : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
-                       oops, iops, []> {
-  // idx encoded in Q:S:size<1> fields.
-  bits<3> idx;
-  bits<5> Xm;
-  let Inst{30} = idx{2};
-  let Inst{23} = 1;
-  let Inst{20-16} = Xm;
-  let Inst{12} = idx{1};
-  let Inst{11} = idx{0};
-  let Inst{10} = size;
-}
-class SIMDLdStSingleS<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
-                      dag oops, dag iops, list<dag> pattern>
-  : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
-                       pattern> {
-  // idx encoded in Q:S fields.
-  bits<2> idx;
-  let Inst{30} = idx{1};
-  let Inst{23} = 0;
-  let Inst{20-16} = 0b00000;
-  let Inst{12} = idx{0};
-  let Inst{11-10} = size;
-}
-class SIMDLdStSingleSTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
-                      dag oops, dag iops, list<dag> pattern>
-  : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
-                       pattern> {
-  // idx encoded in Q:S fields.
-  bits<2> idx;
-  let Inst{30} = idx{1};
-  let Inst{23} = 0;
-  let Inst{20-16} = 0b00000;
-  let Inst{12} = idx{0};
-  let Inst{11-10} = size;
-}
-class SIMDLdStSingleSPost<bit L, bit R, bits<3> opcode, bits<2> size,
-                          string asm, dag oops, dag iops>
-  : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
-                       oops, iops, []> {
-  // idx encoded in Q:S fields.
-  bits<2> idx;
-  bits<5> Xm;
-  let Inst{30} = idx{1};
-  let Inst{23} = 1;
-  let Inst{20-16} = Xm;
-  let Inst{12} = idx{0};
-  let Inst{11-10} = size;
-}
-class SIMDLdStSingleSTiedPost<bit L, bit R, bits<3> opcode, bits<2> size,
-                          string asm, dag oops, dag iops>
-  : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
-                       oops, iops, []> {
-  // idx encoded in Q:S fields.
-  bits<2> idx;
-  bits<5> Xm;
-  let Inst{30} = idx{1};
-  let Inst{23} = 1;
-  let Inst{20-16} = Xm;
-  let Inst{12} = idx{0};
-  let Inst{11-10} = size;
-}
-class SIMDLdStSingleD<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
-                      dag oops, dag iops, list<dag> pattern>
-  : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
-                       pattern> {
-  // idx encoded in Q field.
-  bits<1> idx;
-  let Inst{30} = idx;
-  let Inst{23} = 0;
-  let Inst{20-16} = 0b00000;
-  let Inst{12} = 0;
-  let Inst{11-10} = size;
-}
-class SIMDLdStSingleDTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
-                      dag oops, dag iops, list<dag> pattern>
-  : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
-                       pattern> {
-  // idx encoded in Q field.
-  bits<1> idx;
-  let Inst{30} = idx;
-  let Inst{23} = 0;
-  let Inst{20-16} = 0b00000;
-  let Inst{12} = 0;
-  let Inst{11-10} = size;
-}
-class SIMDLdStSingleDPost<bit L, bit R, bits<3> opcode, bits<2> size,
-                          string asm, dag oops, dag iops>
-  : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
-                       oops, iops, []> {
-  // idx encoded in Q field.
-  bits<1> idx;
-  bits<5> Xm;
-  let Inst{30} = idx;
-  let Inst{23} = 1;
-  let Inst{20-16} = Xm;
-  let Inst{12} = 0;
-  let Inst{11-10} = size;
-}
-class SIMDLdStSingleDTiedPost<bit L, bit R, bits<3> opcode, bits<2> size,
-                          string asm, dag oops, dag iops>
-  : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
-                       oops, iops, []> {
-  // idx encoded in Q field.
-  bits<1> idx;
-  bits<5> Xm;
-  let Inst{30} = idx;
-  let Inst{23} = 1;
-  let Inst{20-16} = Xm;
-  let Inst{12} = 0;
-  let Inst{11-10} = size;
-}
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDLdSingleBTied<bit R, bits<3> opcode, string asm,
-                         RegisterOperand listtype,
-                         RegisterOperand GPR64pi> {
-  def i8 : SIMDLdStSingleBTied<1, R, opcode, asm,
-                           (outs listtype:$dst),
-                           (ins listtype:$Vt, VectorIndexB:$idx,
-                                am_simdnoindex:$vaddr), []>;
-
-  def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm,
-                            (outs listtype:$dst),
-                            (ins listtype:$Vt, VectorIndexB:$idx,
-                                 am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDLdSingleHTied<bit R, bits<3> opcode, bit size, string asm,
-                         RegisterOperand listtype,
-                         RegisterOperand GPR64pi> {
-  def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm,
-                            (outs listtype:$dst),
-                            (ins listtype:$Vt, VectorIndexH:$idx,
-                                 am_simdnoindex:$vaddr), []>;
-
-  def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm,
-                            (outs listtype:$dst),
-                            (ins listtype:$Vt, VectorIndexH:$idx,
-                                 am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDLdSingleSTied<bit R, bits<3> opcode, bits<2> size,string asm,
-                         RegisterOperand listtype,
-                         RegisterOperand GPR64pi> {
-  def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm,
-                            (outs listtype:$dst),
-                            (ins listtype:$Vt, VectorIndexS:$idx,
-                                 am_simdnoindex:$vaddr), []>;
-
-  def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm,
-                            (outs listtype:$dst),
-                            (ins listtype:$Vt, VectorIndexS:$idx,
-                                 am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDLdSingleDTied<bit R, bits<3> opcode, bits<2> size, string asm,
-                         RegisterOperand listtype, RegisterOperand GPR64pi> {
-  def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm,
-                            (outs listtype:$dst),
-                            (ins listtype:$Vt, VectorIndexD:$idx,
-                                 am_simdnoindex:$vaddr), []>;
-
-  def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm,
-                            (outs listtype:$dst),
-                            (ins listtype:$Vt, VectorIndexD:$idx,
-                                 am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-multiclass SIMDStSingleB<bit R, bits<3> opcode, string asm,
-                         RegisterOperand listtype, RegisterOperand GPR64pi> {
-  def i8 : SIMDLdStSingleB<0, R, opcode, asm,
-                           (outs), (ins listtype:$Vt, VectorIndexB:$idx,
-                                        am_simdnoindex:$vaddr), []>;
-
-  def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm,
-                            (outs), (ins listtype:$Vt, VectorIndexB:$idx,
-                                         am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-multiclass SIMDStSingleH<bit R, bits<3> opcode, bit size, string asm,
-                         RegisterOperand listtype, RegisterOperand GPR64pi> {
-  def i16 : SIMDLdStSingleH<0, R, opcode, size, asm,
-                            (outs), (ins listtype:$Vt, VectorIndexH:$idx,
-                                         am_simdnoindex:$vaddr), []>;
-
-  def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm,
-                            (outs), (ins listtype:$Vt, VectorIndexH:$idx,
-                                         am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-multiclass SIMDStSingleS<bit R, bits<3> opcode, bits<2> size,string asm,
-                         RegisterOperand listtype, RegisterOperand GPR64pi> {
-  def i32 : SIMDLdStSingleS<0, R, opcode, size, asm,
-                            (outs), (ins listtype:$Vt, VectorIndexS:$idx,
-                                         am_simdnoindex:$vaddr), []>;
-
-  def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm,
-                            (outs), (ins listtype:$Vt, VectorIndexS:$idx,
-                                         am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-multiclass SIMDStSingleD<bit R, bits<3> opcode, bits<2> size, string asm,
-                         RegisterOperand listtype, RegisterOperand GPR64pi> {
-  def i64 : SIMDLdStSingleD<0, R, opcode, size, asm,
-                            (outs), (ins listtype:$Vt, VectorIndexD:$idx,
-                                         am_simdnoindex:$vaddr), []>;
-
-  def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm,
-                            (outs), (ins listtype:$Vt, VectorIndexD:$idx,
-                                         am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-
-multiclass SIMDLdStSingleAliases<string asm, string layout, string Type,
-                                 string Count, int Offset, Operand idxtype> {
-  // E.g. "ld1 { v0.8b }[0], [x1], #1"
-  //      "ld1\t$Vt, $vaddr, #1"
-  // may get mapped to
-  //      (LD1Rv8b_POST VecListOne8b:$Vt, am_simdnoindex:$vaddr, XZR)
-  def : InstAlias<asm # "\t$Vt$idx, $vaddr, #" # Offset,
-                  (!cast<Instruction>(NAME # Type  # "_POST")
-                      !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
-                      idxtype:$idx, am_simdnoindex:$vaddr, XZR), 1>;
-
-  // E.g. "ld1.8b { v0 }[0], [x1], #1"
-  //      "ld1.8b\t$Vt, $vaddr, #1"
-  // may get mapped to
-  //      (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, XZR)
-  def : InstAlias<asm # "." # layout # "\t$Vt$idx, $vaddr, #" # Offset,
-                  (!cast<Instruction>(NAME # Type # "_POST")
-                      !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
-                      idxtype:$idx, am_simdnoindex:$vaddr, XZR), 0>;
-
-  // E.g. "ld1.8b { v0 }[0], [x1]"
-  //      "ld1.8b\t$Vt, $vaddr"
-  // may get mapped to
-  //      (LD1Rv8b VecListOne64:$Vt, am_simdnoindex:$vaddr)
-  def : InstAlias<asm # "." # layout # "\t$Vt$idx, $vaddr",
-                      (!cast<Instruction>(NAME # Type)
-                         !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
-                         idxtype:$idx, am_simdnoindex:$vaddr), 0>;
-
-  // E.g. "ld1.8b { v0 }[0], [x1], x2"
-  //      "ld1.8b\t$Vt, $vaddr, $Xm"
-  // may get mapped to
-  //      (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, GPR64pi1:$Xm)
-  def : InstAlias<asm # "." # layout # "\t$Vt$idx, $vaddr, $Xm",
-                      (!cast<Instruction>(NAME # Type # "_POST")
-                         !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
-                         idxtype:$idx, am_simdnoindex:$vaddr,
-                         !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
-}
-
-multiclass SIMDLdSt1SingleAliases<string asm> {
-  defm : SIMDLdStSingleAliases<asm, "b", "i8",  "One", 1, VectorIndexB>;
-  defm : SIMDLdStSingleAliases<asm, "h", "i16", "One", 2, VectorIndexH>;
-  defm : SIMDLdStSingleAliases<asm, "s", "i32", "One", 4, VectorIndexS>;
-  defm : SIMDLdStSingleAliases<asm, "d", "i64", "One", 8, VectorIndexD>;
-}
-
-multiclass SIMDLdSt2SingleAliases<string asm> {
-  defm : SIMDLdStSingleAliases<asm, "b", "i8",  "Two", 2,  VectorIndexB>;
-  defm : SIMDLdStSingleAliases<asm, "h", "i16", "Two", 4,  VectorIndexH>;
-  defm : SIMDLdStSingleAliases<asm, "s", "i32", "Two", 8,  VectorIndexS>;
-  defm : SIMDLdStSingleAliases<asm, "d", "i64", "Two", 16, VectorIndexD>;
-}
-
-multiclass SIMDLdSt3SingleAliases<string asm> {
-  defm : SIMDLdStSingleAliases<asm, "b", "i8",  "Three", 3,  VectorIndexB>;
-  defm : SIMDLdStSingleAliases<asm, "h", "i16", "Three", 6,  VectorIndexH>;
-  defm : SIMDLdStSingleAliases<asm, "s", "i32", "Three", 12, VectorIndexS>;
-  defm : SIMDLdStSingleAliases<asm, "d", "i64", "Three", 24, VectorIndexD>;
-}
-
-multiclass SIMDLdSt4SingleAliases<string asm> {
-  defm : SIMDLdStSingleAliases<asm, "b", "i8",  "Four", 4,  VectorIndexB>;
-  defm : SIMDLdStSingleAliases<asm, "h", "i16", "Four", 8,  VectorIndexH>;
-  defm : SIMDLdStSingleAliases<asm, "s", "i32", "Four", 16, VectorIndexS>;
-  defm : SIMDLdStSingleAliases<asm, "d", "i64", "Four", 32, VectorIndexD>;
-}
-
-//----------------------------------------------------------------------------
-// Crypto extensions
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr,
-              list<dag> pat>
-  : I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>,
-    Sched<[WriteV]>{
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31-16} = 0b0100111000101000;
-  let Inst{15-12} = opc;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-class AESInst<bits<4> opc, string asm, Intrinsic OpNode>
-  : AESBase<opc, asm, (outs V128:$Rd), (ins V128:$Rn), "",
-            [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-
-class AESTiedInst<bits<4> opc, string asm, Intrinsic OpNode>
-  : AESBase<opc, asm, (outs V128:$dst), (ins V128:$Rd, V128:$Rn),
-            "$Rd = $dst",
-            [(set (v16i8 V128:$dst),
-                  (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind,
-                     dag oops, dag iops, list<dag> pat>
-  : I<oops, iops, asm,
-      "{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" #
-      "|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>,
-    Sched<[WriteV]>{
-  bits<5> Rd;
-  bits<5> Rn;
-  bits<5> Rm;
-  let Inst{31-21} = 0b01011110000;
-  let Inst{20-16} = Rm;
-  let Inst{15}    = 0;
-  let Inst{14-12} = opc;
-  let Inst{11-10} = 0b00;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-class SHATiedInstQSV<bits<3> opc, string asm, Intrinsic OpNode>
-  : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst),
-                   (ins FPR128:$Rd, FPR32:$Rn, V128:$Rm),
-                   [(set (v4i32 FPR128:$dst),
-                         (OpNode (v4i32 FPR128:$Rd), (i32 FPR32:$Rn),
-                                 (v4i32 V128:$Rm)))]>;
-
-class SHATiedInstVVV<bits<3> opc, string asm, Intrinsic OpNode>
-  : SHA3OpTiedInst<opc, asm, ".4s", (outs V128:$dst),
-                   (ins V128:$Rd, V128:$Rn, V128:$Rm),
-                   [(set (v4i32 V128:$dst),
-                         (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
-                                 (v4i32 V128:$Rm)))]>;
-
-class SHATiedInstQQV<bits<3> opc, string asm, Intrinsic OpNode>
-  : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst),
-                   (ins FPR128:$Rd, FPR128:$Rn, V128:$Rm),
-                   [(set (v4i32 FPR128:$dst),
-                         (OpNode (v4i32 FPR128:$Rd), (v4i32 FPR128:$Rn),
-                                 (v4i32 V128:$Rm)))]>;
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class SHA2OpInst<bits<4> opc, string asm, string kind,
-                 string cstr, dag oops, dag iops,
-                 list<dag> pat>
-  : I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind #
-                       "|" # kind # "\t$Rd, $Rn}", cstr, pat>,
-    Sched<[WriteV]>{
-  bits<5> Rd;
-  bits<5> Rn;
-  let Inst{31-16} = 0b0101111000101000;
-  let Inst{15-12} = opc;
-  let Inst{11-10} = 0b10;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rd;
-}
-
-class SHATiedInstVV<bits<4> opc, string asm, Intrinsic OpNode>
-  : SHA2OpInst<opc, asm, ".4s", "$Rd = $dst", (outs V128:$dst),
-               (ins V128:$Rd, V128:$Rn),
-               [(set (v4i32 V128:$dst),
-                     (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
-
-class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
-  : SHA2OpInst<opc, asm, "", "", (outs FPR32:$Rd), (ins FPR32:$Rn),
-               [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
-
-// Allow the size specifier tokens to be upper case, not just lower.
-def : TokenAlias<".8B", ".8b">;
-def : TokenAlias<".4H", ".4h">;
-def : TokenAlias<".2S", ".2s">;
-def : TokenAlias<".1D", ".1d">;
-def : TokenAlias<".16B", ".16b">;
-def : TokenAlias<".8H", ".8h">;
-def : TokenAlias<".4S", ".4s">;
-def : TokenAlias<".2D", ".2d">;
-def : TokenAlias<".B", ".b">;
-def : TokenAlias<".H", ".h">;
-def : TokenAlias<".S", ".s">;
-def : TokenAlias<".D", ".d">;
diff --git a/lib/Target/ARM64/ARM64InstrInfo.cpp b/lib/Target/ARM64/ARM64InstrInfo.cpp
deleted file mode 100644
index 8f11757..0000000
--- a/lib/Target/ARM64/ARM64InstrInfo.cpp
+++ /dev/null
@@ -1,1864 +0,0 @@
-//===- ARM64InstrInfo.cpp - ARM64 Instruction Information -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64InstrInfo.h"
-#include "ARM64Subtarget.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_CTOR_DTOR
-#include "ARM64GenInstrInfo.inc"
-
-using namespace llvm;
-
-ARM64InstrInfo::ARM64InstrInfo(const ARM64Subtarget &STI)
-    : ARM64GenInstrInfo(ARM64::ADJCALLSTACKDOWN, ARM64::ADJCALLSTACKUP),
-      RI(this, &STI), Subtarget(STI) {}
-
-/// GetInstSize - Return the number of bytes of code the specified
-/// instruction may be.  This returns the maximum number of bytes.
-unsigned ARM64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
-  const MCInstrDesc &Desc = MI->getDesc();
-
-  switch (Desc.getOpcode()) {
-  default:
-    // Anything not explicitly designated otherwise is a nomal 4-byte insn.
-    return 4;
-  case TargetOpcode::DBG_VALUE:
-  case TargetOpcode::EH_LABEL:
-  case TargetOpcode::IMPLICIT_DEF:
-  case TargetOpcode::KILL:
-    return 0;
-  }
-
-  llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
-}
-
-static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
-                            SmallVectorImpl<MachineOperand> &Cond) {
-  // Block ends with fall-through condbranch.
-  switch (LastInst->getOpcode()) {
-  default:
-    llvm_unreachable("Unknown branch instruction?");
-  case ARM64::Bcc:
-    Target = LastInst->getOperand(1).getMBB();
-    Cond.push_back(LastInst->getOperand(0));
-    break;
-  case ARM64::CBZW:
-  case ARM64::CBZX:
-  case ARM64::CBNZW:
-  case ARM64::CBNZX:
-    Target = LastInst->getOperand(1).getMBB();
-    Cond.push_back(MachineOperand::CreateImm(-1));
-    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
-    Cond.push_back(LastInst->getOperand(0));
-    break;
-  case ARM64::TBZ:
-  case ARM64::TBNZ:
-    Target = LastInst->getOperand(2).getMBB();
-    Cond.push_back(MachineOperand::CreateImm(-1));
-    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
-    Cond.push_back(LastInst->getOperand(0));
-    Cond.push_back(LastInst->getOperand(1));
-  }
-}
-
-// Branch analysis.
-bool ARM64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                   MachineBasicBlock *&TBB,
-                                   MachineBasicBlock *&FBB,
-                                   SmallVectorImpl<MachineOperand> &Cond,
-                                   bool AllowModify) const {
-  // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
-    return false;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return false;
-    --I;
-  }
-  if (!isUnpredicatedTerminator(I))
-    return false;
-
-  // Get the last instruction in the block.
-  MachineInstr *LastInst = I;
-
-  // If there is only one terminator instruction, process it.
-  unsigned LastOpc = LastInst->getOpcode();
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
-    if (isUncondBranchOpcode(LastOpc)) {
-      TBB = LastInst->getOperand(0).getMBB();
-      return false;
-    }
-    if (isCondBranchOpcode(LastOpc)) {
-      // Block ends with fall-through condbranch.
-      parseCondBranch(LastInst, TBB, Cond);
-      return false;
-    }
-    return true; // Can't handle indirect branch.
-  }
-
-  // Get the instruction before it if it is a terminator.
-  MachineInstr *SecondLastInst = I;
-  unsigned SecondLastOpc = SecondLastInst->getOpcode();
-
-  // If AllowModify is true and the block ends with two or more unconditional
-  // branches, delete all but the first unconditional branch.
-  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
-    while (isUncondBranchOpcode(SecondLastOpc)) {
-      LastInst->eraseFromParent();
-      LastInst = SecondLastInst;
-      LastOpc = LastInst->getOpcode();
-      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
-        // Return now the only terminator is an unconditional branch.
-        TBB = LastInst->getOperand(0).getMBB();
-        return false;
-      } else {
-        SecondLastInst = I;
-        SecondLastOpc = SecondLastInst->getOpcode();
-      }
-    }
-  }
-
-  // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
-    return true;
-
-  // If the block ends with a B and a Bcc, handle it.
-  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
-    parseCondBranch(SecondLastInst, TBB, Cond);
-    FBB = LastInst->getOperand(0).getMBB();
-    return false;
-  }
-
-  // If the block ends with two unconditional branches, handle it.  The second
-  // one is not executed, so remove it.
-  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
-    TBB = SecondLastInst->getOperand(0).getMBB();
-    I = LastInst;
-    if (AllowModify)
-      I->eraseFromParent();
-    return false;
-  }
-
-  // ...likewise if it ends with an indirect branch followed by an unconditional
-  // branch.
-  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
-    I = LastInst;
-    if (AllowModify)
-      I->eraseFromParent();
-    return true;
-  }
-
-  // Otherwise, can't handle this.
-  return true;
-}
-
-bool ARM64InstrInfo::ReverseBranchCondition(
-    SmallVectorImpl<MachineOperand> &Cond) const {
-  if (Cond[0].getImm() != -1) {
-    // Regular Bcc
-    ARM64CC::CondCode CC = (ARM64CC::CondCode)(int)Cond[0].getImm();
-    Cond[0].setImm(ARM64CC::getInvertedCondCode(CC));
-  } else {
-    // Folded compare-and-branch
-    switch (Cond[1].getImm()) {
-    default:
-      llvm_unreachable("Unknown conditional branch!");
-    case ARM64::CBZW:
-      Cond[1].setImm(ARM64::CBNZW);
-      break;
-    case ARM64::CBNZW:
-      Cond[1].setImm(ARM64::CBZW);
-      break;
-    case ARM64::CBZX:
-      Cond[1].setImm(ARM64::CBNZX);
-      break;
-    case ARM64::CBNZX:
-      Cond[1].setImm(ARM64::CBZX);
-      break;
-    case ARM64::TBZ:
-      Cond[1].setImm(ARM64::TBNZ);
-      break;
-    case ARM64::TBNZ:
-      Cond[1].setImm(ARM64::TBZ);
-      break;
-    }
-  }
-
-  return false;
-}
-
-unsigned ARM64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
-    return 0;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return 0;
-    --I;
-  }
-  if (!isUncondBranchOpcode(I->getOpcode()) &&
-      !isCondBranchOpcode(I->getOpcode()))
-    return 0;
-
-  // Remove the branch.
-  I->eraseFromParent();
-
-  I = MBB.end();
-
-  if (I == MBB.begin())
-    return 1;
-  --I;
-  if (!isCondBranchOpcode(I->getOpcode()))
-    return 1;
-
-  // Remove the branch.
-  I->eraseFromParent();
-  return 2;
-}
-
-void ARM64InstrInfo::instantiateCondBranch(
-    MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB,
-    const SmallVectorImpl<MachineOperand> &Cond) const {
-  if (Cond[0].getImm() != -1) {
-    // Regular Bcc
-    BuildMI(&MBB, DL, get(ARM64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
-  } else {
-    // Folded compare-and-branch
-    const MachineInstrBuilder MIB =
-        BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg());
-    if (Cond.size() > 3)
-      MIB.addImm(Cond[3].getImm());
-    MIB.addMBB(TBB);
-  }
-}
-
-unsigned ARM64InstrInfo::InsertBranch(
-    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
-  // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-
-  if (FBB == 0) {
-    if (Cond.empty()) // Unconditional branch?
-      BuildMI(&MBB, DL, get(ARM64::B)).addMBB(TBB);
-    else
-      instantiateCondBranch(MBB, DL, TBB, Cond);
-    return 1;
-  }
-
-  // Two-way conditional branch.
-  instantiateCondBranch(MBB, DL, TBB, Cond);
-  BuildMI(&MBB, DL, get(ARM64::B)).addMBB(FBB);
-  return 2;
-}
-
-// Find the original register that VReg is copied from.
-static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
-  while (TargetRegisterInfo::isVirtualRegister(VReg)) {
-    const MachineInstr *DefMI = MRI.getVRegDef(VReg);
-    if (!DefMI->isFullCopy())
-      return VReg;
-    VReg = DefMI->getOperand(1).getReg();
-  }
-  return VReg;
-}
-
-// Determine if VReg is defined by an instruction that can be folded into a
-// csel instruction. If so, return the folded opcode, and the replacement
-// register.
-static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
-                                unsigned *NewVReg = 0) {
-  VReg = removeCopies(MRI, VReg);
-  if (!TargetRegisterInfo::isVirtualRegister(VReg))
-    return 0;
-
-  bool Is64Bit = ARM64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
-  const MachineInstr *DefMI = MRI.getVRegDef(VReg);
-  unsigned Opc = 0;
-  unsigned SrcOpNum = 0;
-  switch (DefMI->getOpcode()) {
-  case ARM64::ADDSXri:
-  case ARM64::ADDSWri:
-    // if CPSR is used, do not fold.
-    if (DefMI->findRegisterDefOperandIdx(ARM64::CPSR, true) == -1)
-      return 0;
-  // fall-through to ADDXri and ADDWri.
-  case ARM64::ADDXri:
-  case ARM64::ADDWri:
-    // add x, 1 -> csinc.
-    if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
-        DefMI->getOperand(3).getImm() != 0)
-      return 0;
-    SrcOpNum = 1;
-    Opc = Is64Bit ? ARM64::CSINCXr : ARM64::CSINCWr;
-    break;
-
-  case ARM64::ORNXrr:
-  case ARM64::ORNWrr: {
-    // not x -> csinv, represented as orn dst, xzr, src.
-    unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
-    if (ZReg != ARM64::XZR && ZReg != ARM64::WZR)
-      return 0;
-    SrcOpNum = 2;
-    Opc = Is64Bit ? ARM64::CSINVXr : ARM64::CSINVWr;
-    break;
-  }
-
-  case ARM64::SUBSXrr:
-  case ARM64::SUBSWrr:
-    // if CPSR is used, do not fold.
-    if (DefMI->findRegisterDefOperandIdx(ARM64::CPSR, true) == -1)
-      return 0;
-  // fall-through to SUBXrr and SUBWrr.
-  case ARM64::SUBXrr:
-  case ARM64::SUBWrr: {
-    // neg x -> csneg, represented as sub dst, xzr, src.
-    unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
-    if (ZReg != ARM64::XZR && ZReg != ARM64::WZR)
-      return 0;
-    SrcOpNum = 2;
-    Opc = Is64Bit ? ARM64::CSNEGXr : ARM64::CSNEGWr;
-    break;
-  }
-  default:
-    return 0;
-  }
-  assert(Opc && SrcOpNum && "Missing parameters");
-
-  if (NewVReg)
-    *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
-  return Opc;
-}
-
-bool ARM64InstrInfo::canInsertSelect(
-    const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond,
-    unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
-    int &FalseCycles) const {
-  // Check register classes.
-  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-  const TargetRegisterClass *RC =
-      RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
-  if (!RC)
-    return false;
-
-  // Expanding cbz/tbz requires an extra cycle of latency on the condition.
-  unsigned ExtraCondLat = Cond.size() != 1;
-
-  // GPRs are handled by csel.
-  // FIXME: Fold in x+1, -x, and ~x when applicable.
-  if (ARM64::GPR64allRegClass.hasSubClassEq(RC) ||
-      ARM64::GPR32allRegClass.hasSubClassEq(RC)) {
-    // Single-cycle csel, csinc, csinv, and csneg.
-    CondCycles = 1 + ExtraCondLat;
-    TrueCycles = FalseCycles = 1;
-    if (canFoldIntoCSel(MRI, TrueReg))
-      TrueCycles = 0;
-    else if (canFoldIntoCSel(MRI, FalseReg))
-      FalseCycles = 0;
-    return true;
-  }
-
-  // Scalar floating point is handled by fcsel.
-  // FIXME: Form fabs, fmin, and fmax when applicable.
-  if (ARM64::FPR64RegClass.hasSubClassEq(RC) ||
-      ARM64::FPR32RegClass.hasSubClassEq(RC)) {
-    CondCycles = 5 + ExtraCondLat;
-    TrueCycles = FalseCycles = 2;
-    return true;
-  }
-
-  // Can't do vectors.
-  return false;
-}
-
-void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I, DebugLoc DL,
-                                  unsigned DstReg,
-                                  const SmallVectorImpl<MachineOperand> &Cond,
-                                  unsigned TrueReg, unsigned FalseReg) const {
-  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-
-  // Parse the condition code, see parseCondBranch() above.
-  ARM64CC::CondCode CC;
-  switch (Cond.size()) {
-  default:
-    llvm_unreachable("Unknown condition opcode in Cond");
-  case 1: // b.cc
-    CC = ARM64CC::CondCode(Cond[0].getImm());
-    break;
-  case 3: { // cbz/cbnz
-    // We must insert a compare against 0.
-    bool Is64Bit;
-    switch (Cond[1].getImm()) {
-    default:
-      llvm_unreachable("Unknown branch opcode in Cond");
-    case ARM64::CBZW:
-      Is64Bit = 0;
-      CC = ARM64CC::EQ;
-      break;
-    case ARM64::CBZX:
-      Is64Bit = 1;
-      CC = ARM64CC::EQ;
-      break;
-    case ARM64::CBNZW:
-      Is64Bit = 0;
-      CC = ARM64CC::NE;
-      break;
-    case ARM64::CBNZX:
-      Is64Bit = 1;
-      CC = ARM64CC::NE;
-      break;
-    }
-    unsigned SrcReg = Cond[2].getReg();
-    if (Is64Bit) {
-      // cmp reg, #0 is actually subs xzr, reg, #0.
-      MRI.constrainRegClass(SrcReg, &ARM64::GPR64spRegClass);
-      BuildMI(MBB, I, DL, get(ARM64::SUBSXri), ARM64::XZR)
-          .addReg(SrcReg)
-          .addImm(0)
-          .addImm(0);
-    } else {
-      MRI.constrainRegClass(SrcReg, &ARM64::GPR32spRegClass);
-      BuildMI(MBB, I, DL, get(ARM64::SUBSWri), ARM64::WZR)
-          .addReg(SrcReg)
-          .addImm(0)
-          .addImm(0);
-    }
-    break;
-  }
-  case 4: { // tbz/tbnz
-    // We must insert a tst instruction.
-    switch (Cond[1].getImm()) {
-    default:
-      llvm_unreachable("Unknown branch opcode in Cond");
-    case ARM64::TBZ:
-      CC = ARM64CC::EQ;
-      break;
-    case ARM64::TBNZ:
-      CC = ARM64CC::NE;
-      break;
-    }
-    // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
-    BuildMI(MBB, I, DL, get(ARM64::ANDSXri), ARM64::XZR)
-        .addReg(Cond[2].getReg())
-        .addImm(ARM64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
-    break;
-  }
-  }
-
-  unsigned Opc = 0;
-  const TargetRegisterClass *RC = 0;
-  bool TryFold = false;
-  if (MRI.constrainRegClass(DstReg, &ARM64::GPR64RegClass)) {
-    RC = &ARM64::GPR64RegClass;
-    Opc = ARM64::CSELXr;
-    TryFold = true;
-  } else if (MRI.constrainRegClass(DstReg, &ARM64::GPR32RegClass)) {
-    RC = &ARM64::GPR32RegClass;
-    Opc = ARM64::CSELWr;
-    TryFold = true;
-  } else if (MRI.constrainRegClass(DstReg, &ARM64::FPR64RegClass)) {
-    RC = &ARM64::FPR64RegClass;
-    Opc = ARM64::FCSELDrrr;
-  } else if (MRI.constrainRegClass(DstReg, &ARM64::FPR32RegClass)) {
-    RC = &ARM64::FPR32RegClass;
-    Opc = ARM64::FCSELSrrr;
-  }
-  assert(RC && "Unsupported regclass");
-
-  // Try folding simple instructions into the csel.
-  if (TryFold) {
-    unsigned NewVReg = 0;
-    unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
-    if (FoldedOpc) {
-      // The folded opcodes csinc, csinc and csneg apply the operation to
-      // FalseReg, so we need to invert the condition.
-      CC = ARM64CC::getInvertedCondCode(CC);
-      TrueReg = FalseReg;
-    } else
-      FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
-
-    // Fold the operation. Leave any dead instructions for DCE to clean up.
-    if (FoldedOpc) {
-      FalseReg = NewVReg;
-      Opc = FoldedOpc;
-      // The extends the live range of NewVReg.
-      MRI.clearKillFlags(NewVReg);
-    }
-  }
-
-  // Pull all virtual register into the appropriate class.
-  MRI.constrainRegClass(TrueReg, RC);
-  MRI.constrainRegClass(FalseReg, RC);
-
-  // Insert the csel.
-  BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
-      CC);
-}
-
-bool ARM64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
-                                           unsigned &SrcReg, unsigned &DstReg,
-                                           unsigned &SubIdx) const {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  case ARM64::SBFMXri: // aka sxtw
-  case ARM64::UBFMXri: // aka uxtw
-    // Check for the 32 -> 64 bit extension case, these instructions can do
-    // much more.
-    if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
-      return false;
-    // This is a signed or unsigned 32 -> 64 bit extension.
-    SrcReg = MI.getOperand(1).getReg();
-    DstReg = MI.getOperand(0).getReg();
-    SubIdx = ARM64::sub_32;
-    return true;
-  }
-}
-
-/// analyzeCompare - For a comparison instruction, return the source registers
-/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
-/// Return true if the comparison instruction can be analyzed.
-bool ARM64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
-                                    unsigned &SrcReg2, int &CmpMask,
-                                    int &CmpValue) const {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case ARM64::SUBSWrr:
-  case ARM64::SUBSWrs:
-  case ARM64::SUBSWrx:
-  case ARM64::SUBSXrr:
-  case ARM64::SUBSXrs:
-  case ARM64::SUBSXrx:
-  case ARM64::ADDSWrr:
-  case ARM64::ADDSWrs:
-  case ARM64::ADDSWrx:
-  case ARM64::ADDSXrr:
-  case ARM64::ADDSXrs:
-  case ARM64::ADDSXrx:
-    // Replace SUBSWrr with SUBWrr if CPSR is not used.
-    SrcReg = MI->getOperand(1).getReg();
-    SrcReg2 = MI->getOperand(2).getReg();
-    CmpMask = ~0;
-    CmpValue = 0;
-    return true;
-  case ARM64::SUBSWri:
-  case ARM64::ADDSWri:
-  case ARM64::ANDSWri:
-  case ARM64::SUBSXri:
-  case ARM64::ADDSXri:
-  case ARM64::ANDSXri:
-    SrcReg = MI->getOperand(1).getReg();
-    SrcReg2 = 0;
-    CmpMask = ~0;
-    CmpValue = MI->getOperand(2).getImm();
-    return true;
-  }
-
-  return false;
-}
-
-static bool UpdateOperandRegClass(MachineInstr *Instr) {
-  MachineBasicBlock *MBB = Instr->getParent();
-  assert(MBB && "Can't get MachineBasicBlock here");
-  MachineFunction *MF = MBB->getParent();
-  assert(MF && "Can't get MachineFunction here");
-  const TargetMachine *TM = &MF->getTarget();
-  const TargetInstrInfo *TII = TM->getInstrInfo();
-  const TargetRegisterInfo *TRI = TM->getRegisterInfo();
-  MachineRegisterInfo *MRI = &MF->getRegInfo();
-
-  for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
-       ++OpIdx) {
-    MachineOperand &MO = Instr->getOperand(OpIdx);
-    const TargetRegisterClass *OpRegCstraints =
-        Instr->getRegClassConstraint(OpIdx, TII, TRI);
-
-    // If there's no constraint, there's nothing to do.
-    if (!OpRegCstraints)
-      continue;
-    // If the operand is a frame index, there's nothing to do here.
-    // A frame index operand will resolve correctly during PEI.
-    if (MO.isFI())
-      continue;
-
-    assert(MO.isReg() &&
-           "Operand has register constraints without being a register!");
-
-    unsigned Reg = MO.getReg();
-    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-      if (!OpRegCstraints->contains(Reg))
-        return false;
-    } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
-               !MRI->constrainRegClass(Reg, OpRegCstraints))
-      return false;
-  }
-
-  return true;
-}
-
-/// optimizeCompareInstr - Convert the instruction supplying the argument to the
-/// comparison into one that sets the zero bit in the flags register.
-bool ARM64InstrInfo::optimizeCompareInstr(
-    MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
-    int CmpValue, const MachineRegisterInfo *MRI) const {
-
-  // Replace SUBSWrr with SUBWrr if CPSR is not used.
-  int Cmp_CPSR = CmpInstr->findRegisterDefOperandIdx(ARM64::CPSR, true);
-  if (Cmp_CPSR != -1) {
-    unsigned NewOpc;
-    switch (CmpInstr->getOpcode()) {
-    default:
-      return false;
-    case ARM64::ADDSWrr:      NewOpc = ARM64::ADDWrr; break;
-    case ARM64::ADDSWri:      NewOpc = ARM64::ADDWri; break;
-    case ARM64::ADDSWrs:      NewOpc = ARM64::ADDWrs; break;
-    case ARM64::ADDSWrx:      NewOpc = ARM64::ADDWrx; break;
-    case ARM64::ADDSXrr:      NewOpc = ARM64::ADDXrr; break;
-    case ARM64::ADDSXri:      NewOpc = ARM64::ADDXri; break;
-    case ARM64::ADDSXrs:      NewOpc = ARM64::ADDXrs; break;
-    case ARM64::ADDSXrx:      NewOpc = ARM64::ADDXrx; break;
-    case ARM64::SUBSWrr:      NewOpc = ARM64::SUBWrr; break;
-    case ARM64::SUBSWri:      NewOpc = ARM64::SUBWri; break;
-    case ARM64::SUBSWrs:      NewOpc = ARM64::SUBWrs; break;
-    case ARM64::SUBSWrx:      NewOpc = ARM64::SUBWrx; break;
-    case ARM64::SUBSXrr:      NewOpc = ARM64::SUBXrr; break;
-    case ARM64::SUBSXri:      NewOpc = ARM64::SUBXri; break;
-    case ARM64::SUBSXrs:      NewOpc = ARM64::SUBXrs; break;
-    case ARM64::SUBSXrx:      NewOpc = ARM64::SUBXrx; break;
-    }
-
-    const MCInstrDesc &MCID = get(NewOpc);
-    CmpInstr->setDesc(MCID);
-    CmpInstr->RemoveOperand(Cmp_CPSR);
-    bool succeeded = UpdateOperandRegClass(CmpInstr);
-    (void)succeeded;
-    assert(succeeded && "Some operands reg class are incompatible!");
-    return true;
-  }
-
-  // Continue only if we have a "ri" where immediate is zero.
-  if (CmpValue != 0 || SrcReg2 != 0)
-    return false;
-
-  // CmpInstr is a Compare instruction if destination register is not used.
-  if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
-    return false;
-
-  // Get the unique definition of SrcReg.
-  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
-  if (!MI)
-    return false;
-
-  // We iterate backward, starting from the instruction before CmpInstr and
-  // stop when reaching the definition of the source register or done with the
-  // basic block, to check whether CPSR is used or modified in between.
-  MachineBasicBlock::iterator I = CmpInstr, E = MI,
-                              B = CmpInstr->getParent()->begin();
-
-  // Early exit if CmpInstr is at the beginning of the BB.
-  if (I == B)
-    return false;
-
-  // Check whether the definition of SrcReg is in the same basic block as
-  // Compare. If not, we can't optimize away the Compare.
-  if (MI->getParent() != CmpInstr->getParent())
-    return false;
-
-  // Check that CPSR isn't set between the comparison instruction and the one we
-  // want to change.
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
-  for (--I; I != E; --I) {
-    const MachineInstr &Instr = *I;
-
-    if (Instr.modifiesRegister(ARM64::CPSR, TRI) ||
-        Instr.readsRegister(ARM64::CPSR, TRI))
-      // This instruction modifies or uses CPSR after the one we want to
-      // change. We can't do this transformation.
-      return false;
-    if (I == B)
-      // The 'and' is below the comparison instruction.
-      return false;
-  }
-
-  unsigned NewOpc = MI->getOpcode();
-  switch (MI->getOpcode()) {
-  default:
-    return false;
-  case ARM64::ADDSWrr:
-  case ARM64::ADDSWri:
-  case ARM64::ADDSXrr:
-  case ARM64::ADDSXri:
-  case ARM64::SUBSWrr:
-  case ARM64::SUBSWri:
-  case ARM64::SUBSXrr:
-  case ARM64::SUBSXri:
-    break;
-  case ARM64::ADDWrr:    NewOpc = ARM64::ADDSWrr; break;
-  case ARM64::ADDWri:    NewOpc = ARM64::ADDSWri; break;
-  case ARM64::ADDXrr:    NewOpc = ARM64::ADDSXrr; break;
-  case ARM64::ADDXri:    NewOpc = ARM64::ADDSXri; break;
-  case ARM64::ADCWr:     NewOpc = ARM64::ADCSWr; break;
-  case ARM64::ADCXr:     NewOpc = ARM64::ADCSXr; break;
-  case ARM64::SUBWrr:    NewOpc = ARM64::SUBSWrr; break;
-  case ARM64::SUBWri:    NewOpc = ARM64::SUBSWri; break;
-  case ARM64::SUBXrr:    NewOpc = ARM64::SUBSXrr; break;
-  case ARM64::SUBXri:    NewOpc = ARM64::SUBSXri; break;
-  case ARM64::SBCWr:     NewOpc = ARM64::SBCSWr; break;
-  case ARM64::SBCXr:     NewOpc = ARM64::SBCSXr; break;
-  case ARM64::ANDWri:    NewOpc = ARM64::ANDSWri; break;
-  case ARM64::ANDXri:    NewOpc = ARM64::ANDSXri; break;
-  }
-
-  // Scan forward for the use of CPSR.
-  // When checking against MI: if it's a conditional code requires
-  // checking of V bit, then this is not safe to do.
-  // It is safe to remove CmpInstr if CPSR is redefined or killed.
-  // If we are done with the basic block, we need to check whether CPSR is
-  // live-out.
-  bool IsSafe = false;
-  for (MachineBasicBlock::iterator I = CmpInstr,
-                                   E = CmpInstr->getParent()->end();
-       !IsSafe && ++I != E;) {
-    const MachineInstr &Instr = *I;
-    for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO;
-         ++IO) {
-      const MachineOperand &MO = Instr.getOperand(IO);
-      if (MO.isRegMask() && MO.clobbersPhysReg(ARM64::CPSR)) {
-        IsSafe = true;
-        break;
-      }
-      if (!MO.isReg() || MO.getReg() != ARM64::CPSR)
-        continue;
-      if (MO.isDef()) {
-        IsSafe = true;
-        break;
-      }
-
-      // Decode the condition code.
-      unsigned Opc = Instr.getOpcode();
-      ARM64CC::CondCode CC;
-      switch (Opc) {
-      default:
-        return false;
-      case ARM64::Bcc:
-        CC = (ARM64CC::CondCode)Instr.getOperand(IO - 2).getImm();
-        break;
-      case ARM64::CSINVWr:
-      case ARM64::CSINVXr:
-      case ARM64::CSINCWr:
-      case ARM64::CSINCXr:
-      case ARM64::CSELWr:
-      case ARM64::CSELXr:
-      case ARM64::CSNEGWr:
-      case ARM64::CSNEGXr:
-        CC = (ARM64CC::CondCode)Instr.getOperand(IO - 1).getImm();
-        break;
-      }
-
-      // It is not safe to remove Compare instruction if Overflow(V) is used.
-      switch (CC) {
-      default:
-        // CPSR can be used multiple times, we should continue.
-        break;
-      case ARM64CC::VS:
-      case ARM64CC::VC:
-      case ARM64CC::GE:
-      case ARM64CC::LT:
-      case ARM64CC::GT:
-      case ARM64CC::LE:
-        return false;
-      }
-    }
-  }
-
-  // If CPSR is not killed nor re-defined, we should check whether it is
-  // live-out. If it is live-out, do not optimize.
-  if (!IsSafe) {
-    MachineBasicBlock *MBB = CmpInstr->getParent();
-    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-                                          SE = MBB->succ_end();
-         SI != SE; ++SI)
-      if ((*SI)->isLiveIn(ARM64::CPSR))
-        return false;
-  }
-
-  // Update the instruction to set CPSR.
-  MI->setDesc(get(NewOpc));
-  CmpInstr->eraseFromParent();
-  bool succeeded = UpdateOperandRegClass(MI);
-  (void)succeeded;
-  assert(succeeded && "Some operands reg class are incompatible!");
-  MI->addRegisterDefined(ARM64::CPSR, TRI);
-  return true;
-}
-
-// Return true if this instruction simply sets its single destination register
-// to zero. This is equivalent to a register rename of the zero-register.
-bool ARM64InstrInfo::isGPRZero(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case ARM64::MOVZWi:
-  case ARM64::MOVZXi: // movz Rd, #0 (LSL #0)
-    if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) {
-      assert(MI->getDesc().getNumOperands() == 3 &&
-             MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands");
-      return true;
-    }
-    break;
-  case ARM64::ANDWri: // and Rd, Rzr, #imm
-    return MI->getOperand(1).getReg() == ARM64::WZR;
-  case ARM64::ANDXri:
-    return MI->getOperand(1).getReg() == ARM64::XZR;
-  case TargetOpcode::COPY:
-    return MI->getOperand(1).getReg() == ARM64::WZR;
-  }
-  return false;
-}
-
-// Return true if this instruction simply renames a general register without
-// modifying bits.
-bool ARM64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case TargetOpcode::COPY: {
-    // GPR32 copies will by lowered to ORRXrs
-    unsigned DstReg = MI->getOperand(0).getReg();
-    return (ARM64::GPR32RegClass.contains(DstReg) ||
-            ARM64::GPR64RegClass.contains(DstReg));
-  }
-  case ARM64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
-    if (MI->getOperand(1).getReg() == ARM64::XZR) {
-      assert(MI->getDesc().getNumOperands() == 4 &&
-             MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
-      return true;
-    }
-  case ARM64::ADDXri: // add Xd, Xn, #0 (LSL #0)
-    if (MI->getOperand(2).getImm() == 0) {
-      assert(MI->getDesc().getNumOperands() == 4 &&
-             MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
-      return true;
-    }
-  }
-  return false;
-}
-
-// Return true if this instruction simply renames a general register without
-// modifying bits.
-bool ARM64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case TargetOpcode::COPY: {
-    // FPR64 copies will by lowered to ORR.16b
-    unsigned DstReg = MI->getOperand(0).getReg();
-    return (ARM64::FPR64RegClass.contains(DstReg) ||
-            ARM64::FPR128RegClass.contains(DstReg));
-  }
-  case ARM64::ORRv16i8:
-    if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
-      assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() &&
-             "invalid ORRv16i8 operands");
-      return true;
-    }
-  }
-  return false;
-}
-
-unsigned ARM64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                             int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case ARM64::LDRWui:
-  case ARM64::LDRXui:
-  case ARM64::LDRBui:
-  case ARM64::LDRHui:
-  case ARM64::LDRSui:
-  case ARM64::LDRDui:
-  case ARM64::LDRQui:
-    if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-
-  return 0;
-}
-
-unsigned ARM64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                            int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case ARM64::STRWui:
-  case ARM64::STRXui:
-  case ARM64::STRBui:
-  case ARM64::STRHui:
-  case ARM64::STRSui:
-  case ARM64::STRDui:
-  case ARM64::STRQui:
-    if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  return 0;
-}
-
-/// Return true if this is load/store scales or extends its register offset.
-/// This refers to scaling a dynamic index as opposed to scaled immediates.
-/// MI should be a memory op that allows scaled addressing.
-bool ARM64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case ARM64::LDRBBro:
-  case ARM64::LDRBro:
-  case ARM64::LDRDro:
-  case ARM64::LDRHHro:
-  case ARM64::LDRHro:
-  case ARM64::LDRQro:
-  case ARM64::LDRSBWro:
-  case ARM64::LDRSBXro:
-  case ARM64::LDRSHWro:
-  case ARM64::LDRSHXro:
-  case ARM64::LDRSWro:
-  case ARM64::LDRSro:
-  case ARM64::LDRWro:
-  case ARM64::LDRXro:
-  case ARM64::STRBBro:
-  case ARM64::STRBro:
-  case ARM64::STRDro:
-  case ARM64::STRHHro:
-  case ARM64::STRHro:
-  case ARM64::STRQro:
-  case ARM64::STRSro:
-  case ARM64::STRWro:
-  case ARM64::STRXro:
-    unsigned Val = MI->getOperand(3).getImm();
-    ARM64_AM::ExtendType ExtType = ARM64_AM::getMemExtendType(Val);
-    return (ExtType != ARM64_AM::UXTX) || ARM64_AM::getMemDoShift(Val);
-  }
-  return false;
-}
-
-/// Check all MachineMemOperands for a hint to suppress pairing.
-bool ARM64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
-  assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
-         "Too many target MO flags");
-  for (MachineInstr::mmo_iterator MM = MI->memoperands_begin(),
-                                  E = MI->memoperands_end();
-       MM != E; ++MM) {
-
-    if ((*MM)->getFlags() &
-        (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-/// Set a flag on the first MachineMemOperand to suppress pairing.
-void ARM64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
-  if (MI->memoperands_empty())
-    return;
-
-  assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
-         "Too many target MO flags");
-  (*MI->memoperands_begin())
-      ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit);
-}
-
-bool ARM64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
-                                          unsigned &Offset,
-                                          const TargetRegisterInfo *TRI) const {
-  switch (LdSt->getOpcode()) {
-  default:
-    return false;
-  case ARM64::STRSui:
-  case ARM64::STRDui:
-  case ARM64::STRQui:
-  case ARM64::STRXui:
-  case ARM64::STRWui:
-  case ARM64::LDRSui:
-  case ARM64::LDRDui:
-  case ARM64::LDRQui:
-  case ARM64::LDRXui:
-  case ARM64::LDRWui:
-    if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
-      return false;
-    BaseReg = LdSt->getOperand(1).getReg();
-    MachineFunction &MF = *LdSt->getParent()->getParent();
-    unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize();
-    Offset = LdSt->getOperand(2).getImm() * Width;
-    return true;
-  };
-}
-
-/// Detect opportunities for ldp/stp formation.
-///
-/// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
-bool ARM64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
-                                        MachineInstr *SecondLdSt,
-                                        unsigned NumLoads) const {
-  // Only cluster up to a single pair.
-  if (NumLoads > 1)
-    return false;
-  if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode())
-    return false;
-  // getLdStBaseRegImmOfs guarantees that oper 2 isImm.
-  unsigned Ofs1 = FirstLdSt->getOperand(2).getImm();
-  // Allow 6 bits of positive range.
-  if (Ofs1 > 64)
-    return false;
-  // The caller should already have ordered First/SecondLdSt by offset.
-  unsigned Ofs2 = SecondLdSt->getOperand(2).getImm();
-  return Ofs1 + 1 == Ofs2;
-}
-
-bool ARM64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
-                                            MachineInstr *Second) const {
-  // Cyclone can fuse CMN, CMP followed by Bcc.
-
-  // FIXME: B0 can also fuse:
-  // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ.
-  if (Second->getOpcode() != ARM64::Bcc)
-    return false;
-  switch (First->getOpcode()) {
-  default:
-    return false;
-  case ARM64::SUBSWri:
-  case ARM64::ADDSWri:
-  case ARM64::ANDSWri:
-  case ARM64::SUBSXri:
-  case ARM64::ADDSXri:
-  case ARM64::ANDSXri:
-    return true;
-  }
-}
-
-MachineInstr *ARM64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
-                                                       int FrameIx,
-                                                       uint64_t Offset,
-                                                       const MDNode *MDPtr,
-                                                       DebugLoc DL) const {
-  MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM64::DBG_VALUE))
-                                .addFrameIndex(FrameIx)
-                                .addImm(0)
-                                .addImm(Offset)
-                                .addMetadata(MDPtr);
-  return &*MIB;
-}
-
-static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
-                                            unsigned Reg, unsigned SubIdx,
-                                            unsigned State,
-                                            const TargetRegisterInfo *TRI) {
-  if (!SubIdx)
-    return MIB.addReg(Reg, State);
-
-  if (TargetRegisterInfo::isPhysicalRegister(Reg))
-    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
-  return MIB.addReg(Reg, State, SubIdx);
-}
-
-static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
-                                        unsigned NumRegs) {
-  // We really want the positive remainder mod 32 here, that happens to be
-  // easily obtainable with a mask.
-  return ((DestReg - SrcReg) & 0x1f) < NumRegs;
-}
-
-void ARM64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
-                                      MachineBasicBlock::iterator I,
-                                      DebugLoc DL, unsigned DestReg,
-                                      unsigned SrcReg, bool KillSrc,
-                                      unsigned Opcode,
-                                      llvm::ArrayRef<unsigned> Indices) const {
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
-  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
-  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
-  unsigned NumRegs = Indices.size();
-
-  int SubReg = 0, End = NumRegs, Incr = 1;
-  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
-    SubReg = NumRegs - 1;
-    End = -1;
-    Incr = -1;
-  }
-
-  for (; SubReg != End; SubReg += Incr) {
-    const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode));
-    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
-    AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
-    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
-  }
-}
-
-void ARM64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator I, DebugLoc DL,
-                                 unsigned DestReg, unsigned SrcReg,
-                                 bool KillSrc) const {
-  if (ARM64::GPR32spRegClass.contains(DestReg) &&
-      (ARM64::GPR32spRegClass.contains(SrcReg) || SrcReg == ARM64::WZR)) {
-    const TargetRegisterInfo *TRI = &getRegisterInfo();
-
-    if (DestReg == ARM64::WSP || SrcReg == ARM64::WSP) {
-      // If either operand is WSP, expand to ADD #0.
-      if (Subtarget.hasZeroCycleRegMove()) {
-        // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
-        unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32,
-                                                     &ARM64::GPR64spRegClass);
-        unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32,
-                                                    &ARM64::GPR64spRegClass);
-        // This instruction is reading and writing X registers.  This may upset
-        // the register scavenger and machine verifier, so we need to indicate
-        // that we are reading an undefined value from SrcRegX, but a proper
-        // value from SrcReg.
-        BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestRegX)
-            .addReg(SrcRegX, RegState::Undef)
-            .addImm(0)
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0))
-            .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
-      } else {
-        BuildMI(MBB, I, DL, get(ARM64::ADDWri), DestReg)
-            .addReg(SrcReg, getKillRegState(KillSrc))
-            .addImm(0)
-            .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
-      }
-    } else if (SrcReg == ARM64::WZR && Subtarget.hasZeroCycleZeroing()) {
-      BuildMI(MBB, I, DL, get(ARM64::MOVZWi), DestReg).addImm(0).addImm(
-          ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
-    } else {
-      if (Subtarget.hasZeroCycleRegMove()) {
-        // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
-        unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32,
-                                                     &ARM64::GPR64spRegClass);
-        unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32,
-                                                    &ARM64::GPR64spRegClass);
-        // This instruction is reading and writing X registers.  This may upset
-        // the register scavenger and machine verifier, so we need to indicate
-        // that we are reading an undefined value from SrcRegX, but a proper
-        // value from SrcReg.
-        BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestRegX)
-            .addReg(ARM64::XZR)
-            .addReg(SrcRegX, RegState::Undef)
-            .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
-      } else {
-        // Otherwise, expand to ORR WZR.
-        BuildMI(MBB, I, DL, get(ARM64::ORRWrr), DestReg)
-            .addReg(ARM64::WZR)
-            .addReg(SrcReg, getKillRegState(KillSrc));
-      }
-    }
-    return;
-  }
-
-  if (ARM64::GPR64spRegClass.contains(DestReg) &&
-      (ARM64::GPR64spRegClass.contains(SrcReg) || SrcReg == ARM64::XZR)) {
-    if (DestReg == ARM64::SP || SrcReg == ARM64::SP) {
-      // If either operand is SP, expand to ADD #0.
-      BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestReg)
-          .addReg(SrcReg, getKillRegState(KillSrc))
-          .addImm(0)
-          .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
-    } else if (SrcReg == ARM64::XZR && Subtarget.hasZeroCycleZeroing()) {
-      BuildMI(MBB, I, DL, get(ARM64::MOVZXi), DestReg).addImm(0).addImm(
-          ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
-    } else {
-      // Otherwise, expand to ORR XZR.
-      BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestReg)
-          .addReg(ARM64::XZR)
-          .addReg(SrcReg, getKillRegState(KillSrc));
-    }
-    return;
-  }
-
-  // Copy a DDDD register quad by copying the individual sub-registers.
-  if (ARM64::DDDDRegClass.contains(DestReg) &&
-      ARM64::DDDDRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1,
-                                        ARM64::dsub2, ARM64::dsub3 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8,
-                     Indices);
-    return;
-  }
-
-  // Copy a DDD register triple by copying the individual sub-registers.
-  if (ARM64::DDDRegClass.contains(DestReg) &&
-      ARM64::DDDRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1,
-                                        ARM64::dsub2 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8,
-                     Indices);
-    return;
-  }
-
-  // Copy a DD register pair by copying the individual sub-registers.
-  if (ARM64::DDRegClass.contains(DestReg) &&
-      ARM64::DDRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8,
-                     Indices);
-    return;
-  }
-
-  // Copy a QQQQ register quad by copying the individual sub-registers.
-  if (ARM64::QQQQRegClass.contains(DestReg) &&
-      ARM64::QQQQRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1,
-                                        ARM64::qsub2, ARM64::qsub3 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8,
-                     Indices);
-    return;
-  }
-
-  // Copy a QQQ register triple by copying the individual sub-registers.
-  if (ARM64::QQQRegClass.contains(DestReg) &&
-      ARM64::QQQRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1,
-                                        ARM64::qsub2 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8,
-                     Indices);
-    return;
-  }
-
-  // Copy a QQ register pair by copying the individual sub-registers.
-  if (ARM64::QQRegClass.contains(DestReg) &&
-      ARM64::QQRegClass.contains(SrcReg)) {
-    static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1 };
-    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8,
-                     Indices);
-    return;
-  }
-
-  if (ARM64::FPR128RegClass.contains(DestReg) &&
-      ARM64::FPR128RegClass.contains(SrcReg)) {
-    BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
-        SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-
-  if (ARM64::FPR64RegClass.contains(DestReg) &&
-      ARM64::FPR64RegClass.contains(SrcReg)) {
-    DestReg =
-        RI.getMatchingSuperReg(DestReg, ARM64::dsub, &ARM64::FPR128RegClass);
-    SrcReg =
-        RI.getMatchingSuperReg(SrcReg, ARM64::dsub, &ARM64::FPR128RegClass);
-    BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
-        SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-
-  if (ARM64::FPR32RegClass.contains(DestReg) &&
-      ARM64::FPR32RegClass.contains(SrcReg)) {
-    DestReg =
-        RI.getMatchingSuperReg(DestReg, ARM64::ssub, &ARM64::FPR128RegClass);
-    SrcReg =
-        RI.getMatchingSuperReg(SrcReg, ARM64::ssub, &ARM64::FPR128RegClass);
-    BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
-        SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-
-  if (ARM64::FPR16RegClass.contains(DestReg) &&
-      ARM64::FPR16RegClass.contains(SrcReg)) {
-    DestReg =
-        RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR128RegClass);
-    SrcReg =
-        RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR128RegClass);
-    BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
-        SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-
-  if (ARM64::FPR8RegClass.contains(DestReg) &&
-      ARM64::FPR8RegClass.contains(SrcReg)) {
-    DestReg =
-        RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR128RegClass);
-    SrcReg =
-        RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR128RegClass);
-    BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
-        SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-
-  // Copies between GPR64 and FPR64.
-  if (ARM64::FPR64RegClass.contains(DestReg) &&
-      ARM64::GPR64RegClass.contains(SrcReg)) {
-    BuildMI(MBB, I, DL, get(ARM64::FMOVXDr), DestReg)
-        .addReg(SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-  if (ARM64::GPR64RegClass.contains(DestReg) &&
-      ARM64::FPR64RegClass.contains(SrcReg)) {
-    BuildMI(MBB, I, DL, get(ARM64::FMOVDXr), DestReg)
-        .addReg(SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-  // Copies between GPR32 and FPR32.
-  if (ARM64::FPR32RegClass.contains(DestReg) &&
-      ARM64::GPR32RegClass.contains(SrcReg)) {
-    BuildMI(MBB, I, DL, get(ARM64::FMOVWSr), DestReg)
-        .addReg(SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-  if (ARM64::GPR32RegClass.contains(DestReg) &&
-      ARM64::FPR32RegClass.contains(SrcReg)) {
-    BuildMI(MBB, I, DL, get(ARM64::FMOVSWr), DestReg)
-        .addReg(SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-
-  assert(0 && "unimplemented reg-to-reg copy");
-}
-
-void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MBBI,
-                                         unsigned SrcReg, bool isKill, int FI,
-                                         const TargetRegisterClass *RC,
-                                         const TargetRegisterInfo *TRI) const {
-  DebugLoc DL;
-  if (MBBI != MBB.end())
-    DL = MBBI->getDebugLoc();
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
-  unsigned Align = MFI.getObjectAlignment(FI);
-
-  MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
-  MachineMemOperand *MMO = MF.getMachineMemOperand(
-      PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
-  unsigned Opc = 0;
-  bool Offset = true;
-  switch (RC->getSize()) {
-  case 1:
-    if (ARM64::FPR8RegClass.hasSubClassEq(RC))
-      Opc = ARM64::STRBui;
-    break;
-  case 2:
-    if (ARM64::FPR16RegClass.hasSubClassEq(RC))
-      Opc = ARM64::STRHui;
-    break;
-  case 4:
-    if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) {
-      Opc = ARM64::STRWui;
-      if (TargetRegisterInfo::isVirtualRegister(SrcReg))
-        MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR32RegClass);
-      else
-        assert(SrcReg != ARM64::WSP);
-    } else if (ARM64::FPR32RegClass.hasSubClassEq(RC))
-      Opc = ARM64::STRSui;
-    break;
-  case 8:
-    if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) {
-      Opc = ARM64::STRXui;
-      if (TargetRegisterInfo::isVirtualRegister(SrcReg))
-        MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass);
-      else
-        assert(SrcReg != ARM64::SP);
-    } else if (ARM64::FPR64RegClass.hasSubClassEq(RC))
-      Opc = ARM64::STRDui;
-    break;
-  case 16:
-    if (ARM64::FPR128RegClass.hasSubClassEq(RC))
-      Opc = ARM64::STRQui;
-    else if (ARM64::DDRegClass.hasSubClassEq(RC))
-      Opc = ARM64::ST1Twov1d, Offset = false;
-    break;
-  case 24:
-    if (ARM64::DDDRegClass.hasSubClassEq(RC))
-      Opc = ARM64::ST1Threev1d, Offset = false;
-    break;
-  case 32:
-    if (ARM64::DDDDRegClass.hasSubClassEq(RC))
-      Opc = ARM64::ST1Fourv1d, Offset = false;
-    else if (ARM64::QQRegClass.hasSubClassEq(RC))
-      Opc = ARM64::ST1Twov2d, Offset = false;
-    break;
-  case 48:
-    if (ARM64::QQQRegClass.hasSubClassEq(RC))
-      Opc = ARM64::ST1Threev2d, Offset = false;
-    break;
-  case 64:
-    if (ARM64::QQQQRegClass.hasSubClassEq(RC))
-      Opc = ARM64::ST1Fourv2d, Offset = false;
-    break;
-  }
-  assert(Opc && "Unknown register class");
-
-  const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
-                                      .addReg(SrcReg, getKillRegState(isKill))
-                                      .addFrameIndex(FI);
-
-  if (Offset)
-    MI.addImm(0);
-  MI.addMemOperand(MMO);
-}
-
-void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator MBBI,
-                                          unsigned DestReg, int FI,
-                                          const TargetRegisterClass *RC,
-                                          const TargetRegisterInfo *TRI) const {
-  DebugLoc DL;
-  if (MBBI != MBB.end())
-    DL = MBBI->getDebugLoc();
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
-  unsigned Align = MFI.getObjectAlignment(FI);
-  MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
-  MachineMemOperand *MMO = MF.getMachineMemOperand(
-      PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
-
-  unsigned Opc = 0;
-  bool Offset = true;
-  switch (RC->getSize()) {
-  case 1:
-    if (ARM64::FPR8RegClass.hasSubClassEq(RC))
-      Opc = ARM64::LDRBui;
-    break;
-  case 2:
-    if (ARM64::FPR16RegClass.hasSubClassEq(RC))
-      Opc = ARM64::LDRHui;
-    break;
-  case 4:
-    if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) {
-      Opc = ARM64::LDRWui;
-      if (TargetRegisterInfo::isVirtualRegister(DestReg))
-        MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR32RegClass);
-      else
-        assert(DestReg != ARM64::WSP);
-    } else if (ARM64::FPR32RegClass.hasSubClassEq(RC))
-      Opc = ARM64::LDRSui;
-    break;
-  case 8:
-    if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) {
-      Opc = ARM64::LDRXui;
-      if (TargetRegisterInfo::isVirtualRegister(DestReg))
-        MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR64RegClass);
-      else
-        assert(DestReg != ARM64::SP);
-    } else if (ARM64::FPR64RegClass.hasSubClassEq(RC))
-      Opc = ARM64::LDRDui;
-    break;
-  case 16:
-    if (ARM64::FPR128RegClass.hasSubClassEq(RC))
-      Opc = ARM64::LDRQui;
-    else if (ARM64::DDRegClass.hasSubClassEq(RC))
-      Opc = ARM64::LD1Twov1d, Offset = false;
-    break;
-  case 24:
-    if (ARM64::DDDRegClass.hasSubClassEq(RC))
-      Opc = ARM64::LD1Threev1d, Offset = false;
-    break;
-  case 32:
-    if (ARM64::DDDDRegClass.hasSubClassEq(RC))
-      Opc = ARM64::LD1Fourv1d, Offset = false;
-    else if (ARM64::QQRegClass.hasSubClassEq(RC))
-      Opc = ARM64::LD1Twov2d, Offset = false;
-    break;
-  case 48:
-    if (ARM64::QQQRegClass.hasSubClassEq(RC))
-      Opc = ARM64::LD1Threev2d, Offset = false;
-    break;
-  case 64:
-    if (ARM64::QQQQRegClass.hasSubClassEq(RC))
-      Opc = ARM64::LD1Fourv2d, Offset = false;
-    break;
-  }
-  assert(Opc && "Unknown register class");
-
-  const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
-                                      .addReg(DestReg, getDefRegState(true))
-                                      .addFrameIndex(FI);
-  if (Offset)
-    MI.addImm(0);
-  MI.addMemOperand(MMO);
-}
-
-void llvm::emitFrameOffset(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, DebugLoc DL,
-                           unsigned DestReg, unsigned SrcReg, int Offset,
-                           const ARM64InstrInfo *TII, MachineInstr::MIFlag Flag,
-                           bool SetCPSR) {
-  if (DestReg == SrcReg && Offset == 0)
-    return;
-
-  bool isSub = Offset < 0;
-  if (isSub)
-    Offset = -Offset;
-
-  // FIXME: If the offset won't fit in 24-bits, compute the offset into a
-  // scratch register.  If DestReg is a virtual register, use it as the
-  // scratch register; otherwise, create a new virtual register (to be
-  // replaced by the scavenger at the end of PEI).  That case can be optimized
-  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
-  // register can be loaded with offset%8 and the add/sub can use an extending
-  // instruction with LSL#3.
-  // Currently the function handles any offsets but generates a poor sequence
-  // of code.
-  //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
-
-  unsigned Opc;
-  if (SetCPSR)
-    Opc = isSub ? ARM64::SUBSXri : ARM64::ADDSXri;
-  else
-    Opc = isSub ? ARM64::SUBXri : ARM64::ADDXri;
-  const unsigned MaxEncoding = 0xfff;
-  const unsigned ShiftSize = 12;
-  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
-  while (((unsigned)Offset) >= (1 << ShiftSize)) {
-    unsigned ThisVal;
-    if (((unsigned)Offset) > MaxEncodableValue) {
-      ThisVal = MaxEncodableValue;
-    } else {
-      ThisVal = Offset & MaxEncodableValue;
-    }
-    assert((ThisVal >> ShiftSize) <= MaxEncoding &&
-           "Encoding cannot handle value that big");
-    BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
-        .addReg(SrcReg)
-        .addImm(ThisVal >> ShiftSize)
-        .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftSize))
-        .setMIFlag(Flag);
-
-    SrcReg = DestReg;
-    Offset -= ThisVal;
-    if (Offset == 0)
-      return;
-  }
-  BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
-      .addReg(SrcReg)
-      .addImm(Offset)
-      .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0))
-      .setMIFlag(Flag);
-}
-
-MachineInstr *
-ARM64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      int FrameIndex) const {
-  // This is a bit of a hack. Consider this instruction:
-  //
-  //   %vreg0<def> = COPY %SP; GPR64all:%vreg0
-  //
-  // We explicitly chose GPR64all for the virtual register so such a copy might
-  // be eliminated by RegisterCoalescer. However, that may not be possible, and
-  // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
-  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
-  //
-  // To prevent that, we are going to constrain the %vreg0 register class here.
-  //
-  // <rdar://problem/11522048>
-  //
-  if (MI->isCopy()) {
-    unsigned DstReg = MI->getOperand(0).getReg();
-    unsigned SrcReg = MI->getOperand(1).getReg();
-    if (SrcReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(DstReg)) {
-      MF.getRegInfo().constrainRegClass(DstReg, &ARM64::GPR64RegClass);
-      return 0;
-    }
-    if (DstReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(SrcReg)) {
-      MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass);
-      return 0;
-    }
-  }
-
-  // Cannot fold.
-  return 0;
-}
-
-int llvm::isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
-                                  bool *OutUseUnscaledOp,
-                                  unsigned *OutUnscaledOp,
-                                  int *EmittableOffset) {
-  int Scale = 1;
-  bool IsSigned = false;
-  // The ImmIdx should be changed case by case if it is not 2.
-  unsigned ImmIdx = 2;
-  unsigned UnscaledOp = 0;
-  // Set output values in case of early exit.
-  if (EmittableOffset)
-    *EmittableOffset = 0;
-  if (OutUseUnscaledOp)
-    *OutUseUnscaledOp = false;
-  if (OutUnscaledOp)
-    *OutUnscaledOp = 0;
-  switch (MI.getOpcode()) {
-  default:
-    assert(0 && "unhandled opcode in rewriteARM64FrameIndex");
-  // Vector spills/fills can't take an immediate offset.
-  case ARM64::LD1Twov2d:
-  case ARM64::LD1Threev2d:
-  case ARM64::LD1Fourv2d:
-  case ARM64::LD1Twov1d:
-  case ARM64::LD1Threev1d:
-  case ARM64::LD1Fourv1d:
-  case ARM64::ST1Twov2d:
-  case ARM64::ST1Threev2d:
-  case ARM64::ST1Fourv2d:
-  case ARM64::ST1Twov1d:
-  case ARM64::ST1Threev1d:
-  case ARM64::ST1Fourv1d:
-    return ARM64FrameOffsetCannotUpdate;
-  case ARM64::PRFMui:
-    Scale = 8;
-    UnscaledOp = ARM64::PRFUMi;
-    break;
-  case ARM64::LDRXui:
-    Scale = 8;
-    UnscaledOp = ARM64::LDURXi;
-    break;
-  case ARM64::LDRWui:
-    Scale = 4;
-    UnscaledOp = ARM64::LDURWi;
-    break;
-  case ARM64::LDRBui:
-    Scale = 1;
-    UnscaledOp = ARM64::LDURBi;
-    break;
-  case ARM64::LDRHui:
-    Scale = 2;
-    UnscaledOp = ARM64::LDURHi;
-    break;
-  case ARM64::LDRSui:
-    Scale = 4;
-    UnscaledOp = ARM64::LDURSi;
-    break;
-  case ARM64::LDRDui:
-    Scale = 8;
-    UnscaledOp = ARM64::LDURDi;
-    break;
-  case ARM64::LDRQui:
-    Scale = 16;
-    UnscaledOp = ARM64::LDURQi;
-    break;
-  case ARM64::LDRBBui:
-    Scale = 1;
-    UnscaledOp = ARM64::LDURBBi;
-    break;
-  case ARM64::LDRHHui:
-    Scale = 2;
-    UnscaledOp = ARM64::LDURHHi;
-    break;
-  case ARM64::LDRSBXui:
-    Scale = 1;
-    UnscaledOp = ARM64::LDURSBXi;
-    break;
-  case ARM64::LDRSBWui:
-    Scale = 1;
-    UnscaledOp = ARM64::LDURSBWi;
-    break;
-  case ARM64::LDRSHXui:
-    Scale = 2;
-    UnscaledOp = ARM64::LDURSHXi;
-    break;
-  case ARM64::LDRSHWui:
-    Scale = 2;
-    UnscaledOp = ARM64::LDURSHWi;
-    break;
-  case ARM64::LDRSWui:
-    Scale = 4;
-    UnscaledOp = ARM64::LDURSWi;
-    break;
-
-  case ARM64::STRXui:
-    Scale = 8;
-    UnscaledOp = ARM64::STURXi;
-    break;
-  case ARM64::STRWui:
-    Scale = 4;
-    UnscaledOp = ARM64::STURWi;
-    break;
-  case ARM64::STRBui:
-    Scale = 1;
-    UnscaledOp = ARM64::STURBi;
-    break;
-  case ARM64::STRHui:
-    Scale = 2;
-    UnscaledOp = ARM64::STURHi;
-    break;
-  case ARM64::STRSui:
-    Scale = 4;
-    UnscaledOp = ARM64::STURSi;
-    break;
-  case ARM64::STRDui:
-    Scale = 8;
-    UnscaledOp = ARM64::STURDi;
-    break;
-  case ARM64::STRQui:
-    Scale = 16;
-    UnscaledOp = ARM64::STURQi;
-    break;
-  case ARM64::STRBBui:
-    Scale = 1;
-    UnscaledOp = ARM64::STURBBi;
-    break;
-  case ARM64::STRHHui:
-    Scale = 2;
-    UnscaledOp = ARM64::STURHHi;
-    break;
-
-  case ARM64::LDPXi:
-  case ARM64::LDPDi:
-  case ARM64::STPXi:
-  case ARM64::STPDi:
-    IsSigned = true;
-    Scale = 8;
-    break;
-  case ARM64::LDPQi:
-  case ARM64::STPQi:
-    IsSigned = true;
-    Scale = 16;
-    break;
-  case ARM64::LDPWi:
-  case ARM64::LDPSi:
-  case ARM64::STPWi:
-  case ARM64::STPSi:
-    IsSigned = true;
-    Scale = 4;
-    break;
-
-  case ARM64::LDURXi:
-  case ARM64::LDURWi:
-  case ARM64::LDURBi:
-  case ARM64::LDURHi:
-  case ARM64::LDURSi:
-  case ARM64::LDURDi:
-  case ARM64::LDURQi:
-  case ARM64::LDURHHi:
-  case ARM64::LDURBBi:
-  case ARM64::LDURSBXi:
-  case ARM64::LDURSBWi:
-  case ARM64::LDURSHXi:
-  case ARM64::LDURSHWi:
-  case ARM64::LDURSWi:
-  case ARM64::STURXi:
-  case ARM64::STURWi:
-  case ARM64::STURBi:
-  case ARM64::STURHi:
-  case ARM64::STURSi:
-  case ARM64::STURDi:
-  case ARM64::STURQi:
-  case ARM64::STURBBi:
-  case ARM64::STURHHi:
-    Scale = 1;
-    break;
-  }
-
-  Offset += MI.getOperand(ImmIdx).getImm() * Scale;
-
-  bool useUnscaledOp = false;
-  // If the offset doesn't match the scale, we rewrite the instruction to
-  // use the unscaled instruction instead. Likewise, if we have a negative
-  // offset (and have an unscaled op to use).
-  if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
-    useUnscaledOp = true;
-
-  // Use an unscaled addressing mode if the instruction has a negative offset
-  // (or if the instruction is already using an unscaled addressing mode).
-  unsigned MaskBits;
-  if (IsSigned) {
-    // ldp/stp instructions.
-    MaskBits = 7;
-    Offset /= Scale;
-  } else if (UnscaledOp == 0 || useUnscaledOp) {
-    MaskBits = 9;
-    IsSigned = true;
-    Scale = 1;
-  } else {
-    MaskBits = 12;
-    IsSigned = false;
-    Offset /= Scale;
-  }
-
-  // Attempt to fold address computation.
-  int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
-  int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
-  if (Offset >= MinOff && Offset <= MaxOff) {
-    if (EmittableOffset)
-      *EmittableOffset = Offset;
-    Offset = 0;
-  } else {
-    int NewOff = Offset < 0 ? MinOff : MaxOff;
-    if (EmittableOffset)
-      *EmittableOffset = NewOff;
-    Offset = (Offset - NewOff) * Scale;
-  }
-  if (OutUseUnscaledOp)
-    *OutUseUnscaledOp = useUnscaledOp;
-  if (OutUnscaledOp)
-    *OutUnscaledOp = UnscaledOp;
-  return ARM64FrameOffsetCanUpdate |
-         (Offset == 0 ? ARM64FrameOffsetIsLegal : 0);
-}
-
-bool llvm::rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
-                                  unsigned FrameReg, int &Offset,
-                                  const ARM64InstrInfo *TII) {
-  unsigned Opcode = MI.getOpcode();
-  unsigned ImmIdx = FrameRegIdx + 1;
-
-  if (Opcode == ARM64::ADDSXri || Opcode == ARM64::ADDXri) {
-    Offset += MI.getOperand(ImmIdx).getImm();
-    emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
-                    MI.getOperand(0).getReg(), FrameReg, Offset, TII,
-                    MachineInstr::NoFlags, (Opcode == ARM64::ADDSXri));
-    MI.eraseFromParent();
-    Offset = 0;
-    return true;
-  }
-
-  int NewOffset;
-  unsigned UnscaledOp;
-  bool UseUnscaledOp;
-  int Status = isARM64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, &UnscaledOp,
-                                       &NewOffset);
-  if (Status & ARM64FrameOffsetCanUpdate) {
-    if (Status & ARM64FrameOffsetIsLegal)
-      // Replace the FrameIndex with FrameReg.
-      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
-    if (UseUnscaledOp)
-      MI.setDesc(TII->get(UnscaledOp));
-
-    MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
-    return Offset == 0;
-  }
-
-  return false;
-}
-
-void ARM64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
-  NopInst.setOpcode(ARM64::HINT);
-  NopInst.addOperand(MCOperand::CreateImm(0));
-}
diff --git a/lib/Target/ARM64/ARM64InstrInfo.h b/lib/Target/ARM64/ARM64InstrInfo.h
deleted file mode 100644
index 2591ca0..0000000
--- a/lib/Target/ARM64/ARM64InstrInfo.h
+++ /dev/null
@@ -1,219 +0,0 @@
-//===- ARM64InstrInfo.h - ARM64 Instruction Information ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ARM64INSTRINFO_H
-#define LLVM_TARGET_ARM64INSTRINFO_H
-
-#include "ARM64.h"
-#include "ARM64RegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "ARM64GenInstrInfo.inc"
-
-namespace llvm {
-
-class ARM64Subtarget;
-class ARM64TargetMachine;
-
-class ARM64InstrInfo : public ARM64GenInstrInfo {
-  // Reserve bits in the MachineMemOperand target hint flags, starting at 1.
-  // They will be shifted into MOTargetHintStart when accessed.
-  enum TargetMemOperandFlags {
-    MOSuppressPair = 1
-  };
-
-  const ARM64RegisterInfo RI;
-  const ARM64Subtarget &Subtarget;
-
-public:
-  explicit ARM64InstrInfo(const ARM64Subtarget &STI);
-
-  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-  /// such, whenever a client has an instance of instruction info, it should
-  /// always be able to get register info as well (through this method).
-  const ARM64RegisterInfo &getRegisterInfo() const { return RI; }
-
-  unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
-
-  bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
-                             unsigned &DstReg, unsigned &SubIdx) const override;
-
-  unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                               int &FrameIndex) const override;
-  unsigned isStoreToStackSlot(const MachineInstr *MI,
-                              int &FrameIndex) const override;
-
-  /// \brief Does this instruction set its full destination register to zero?
-  bool isGPRZero(const MachineInstr *MI) const;
-
-  /// \brief Does this instruction rename a GPR without modifying bits?
-  bool isGPRCopy(const MachineInstr *MI) const;
-
-  /// \brief Does this instruction rename an FPR without modifying bits?
-  bool isFPRCopy(const MachineInstr *MI) const;
-
-  /// Return true if this is load/store scales or extends its register offset.
-  /// This refers to scaling a dynamic index as opposed to scaled immediates.
-  /// MI should be a memory op that allows scaled addressing.
-  bool isScaledAddr(const MachineInstr *MI) const;
-
-  /// Return true if pairing the given load or store is hinted to be
-  /// unprofitable.
-  bool isLdStPairSuppressed(const MachineInstr *MI) const;
-
-  /// Hint that pairing the given load or store is unprofitable.
-  void suppressLdStPair(MachineInstr *MI) const;
-
-  bool getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
-                            unsigned &Offset,
-                            const TargetRegisterInfo *TRI) const override;
-
-  bool enableClusterLoads() const override { return true; }
-
-  bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,
-                          unsigned NumLoads) const override;
-
-  bool shouldScheduleAdjacent(MachineInstr *First,
-                              MachineInstr *Second) const override;
-
-  MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
-                                         uint64_t Offset, const MDNode *MDPtr,
-                                         DebugLoc DL) const;
-  void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                        DebugLoc DL, unsigned DestReg, unsigned SrcReg,
-                        bool KillSrc, unsigned Opcode,
-                        llvm::ArrayRef<unsigned> Indices) const;
-  void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                   DebugLoc DL, unsigned DestReg, unsigned SrcReg,
-                   bool KillSrc) const override;
-
-  void storeRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI, unsigned SrcReg,
-                           bool isKill, int FrameIndex,
-                           const TargetRegisterClass *RC,
-                           const TargetRegisterInfo *TRI) const override;
-
-  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MBBI, unsigned DestReg,
-                            int FrameIndex, const TargetRegisterClass *RC,
-                            const TargetRegisterInfo *TRI) const override;
-
-  MachineInstr *
-  foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                        const SmallVectorImpl<unsigned> &Ops,
-                        int FrameIndex) const override;
-
-  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                     MachineBasicBlock *&FBB,
-                     SmallVectorImpl<MachineOperand> &Cond,
-                     bool AllowModify = false) const override;
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                        MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond,
-                        DebugLoc DL) const override;
-  bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
-  bool canInsertSelect(const MachineBasicBlock &,
-                       const SmallVectorImpl<MachineOperand> &Cond, unsigned,
-                       unsigned, int &, int &, int &) const override;
-  void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-                    DebugLoc DL, unsigned DstReg,
-                    const SmallVectorImpl<MachineOperand> &Cond,
-                    unsigned TrueReg, unsigned FalseReg) const override;
-  void getNoopForMachoTarget(MCInst &NopInst) const override;
-
-  /// analyzeCompare - For a comparison instruction, return the source registers
-  /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
-  /// Return true if the comparison instruction can be analyzed.
-  bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
-                      unsigned &SrcReg2, int &CmpMask,
-                      int &CmpValue) const override;
-  /// optimizeCompareInstr - Convert the instruction supplying the argument to
-  /// the comparison into one that sets the zero bit in the flags register.
-  bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
-                            unsigned SrcReg2, int CmpMask, int CmpValue,
-                            const MachineRegisterInfo *MRI) const override;
-
-private:
-  void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
-                             MachineBasicBlock *TBB,
-                             const SmallVectorImpl<MachineOperand> &Cond) const;
-};
-
-/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
-/// plus Offset.  This is intended to be used from within the prolog/epilog
-/// insertion (PEI) pass, where a virtual scratch register may be allocated
-/// if necessary, to be replaced by the scavenger at the end of PEI.
-void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                     DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset,
-                     const ARM64InstrInfo *TII,
-                     MachineInstr::MIFlag = MachineInstr::NoFlags,
-                     bool SetCPSR = false);
-
-/// rewriteARM64FrameIndex - Rewrite MI to access 'Offset' bytes from the
-/// FP. Return false if the offset could not be handled directly in MI, and
-/// return the left-over portion by reference.
-bool rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
-                            unsigned FrameReg, int &Offset,
-                            const ARM64InstrInfo *TII);
-
-/// \brief Use to report the frame offset status in isARM64FrameOffsetLegal.
-enum ARM64FrameOffsetStatus {
-  ARM64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
-  ARM64FrameOffsetIsLegal = 0x1,      ///< Offset is legal.
-  ARM64FrameOffsetCanUpdate = 0x2     ///< Offset can apply, at least partly.
-};
-
-/// \brief Check if the @p Offset is a valid frame offset for @p MI.
-/// The returned value reports the validity of the frame offset for @p MI.
-/// It uses the values defined by ARM64FrameOffsetStatus for that.
-/// If result == ARM64FrameOffsetCannotUpdate, @p MI cannot be updated to
-/// use an offset.eq
-/// If result & ARM64FrameOffsetIsLegal, @p Offset can completely be
-/// rewriten in @p MI.
-/// If result & ARM64FrameOffsetCanUpdate, @p Offset contains the
-/// amount that is off the limit of the legal offset.
-/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
-/// turned into an unscaled operator, which opcode is in @p OutUnscaledOp.
-/// If set, @p EmittableOffset contains the amount that can be set in @p MI
-/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
-/// is a legal offset.
-int isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
-                            bool *OutUseUnscaledOp = NULL,
-                            unsigned *OutUnscaledOp = NULL,
-                            int *EmittableOffset = NULL);
-
-static inline bool isUncondBranchOpcode(int Opc) { return Opc == ARM64::B; }
-
-static inline bool isCondBranchOpcode(int Opc) {
-  switch (Opc) {
-  case ARM64::Bcc:
-  case ARM64::CBZW:
-  case ARM64::CBZX:
-  case ARM64::CBNZW:
-  case ARM64::CBNZX:
-  case ARM64::TBZ:
-  case ARM64::TBNZ:
-    return true;
-  default:
-    return false;
-  }
-}
-
-static inline bool isIndirectBranchOpcode(int Opc) { return Opc == ARM64::BR; }
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td
deleted file mode 100644
index 2fe1720..0000000
--- a/lib/Target/ARM64/ARM64InstrInfo.td
+++ /dev/null
@@ -1,4458 +0,0 @@
-//===- ARM64InstrInfo.td - Describe the ARM64 Instructions -*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// ARM64 Instruction definitions.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ARM64-specific DAG Nodes.
-//
-
-// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
-def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
-                                              [SDTCisSameAs<0, 2>,
-                                               SDTCisSameAs<0, 3>,
-                                               SDTCisInt<0>, SDTCisVT<1, i32>]>;
-
-// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
-def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
-                                            [SDTCisSameAs<0, 1>,
-                                             SDTCisSameAs<0, 2>,
-                                             SDTCisInt<0>,
-                                             SDTCisVT<3, i32>]>;
-
-// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
-def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
-                                            [SDTCisSameAs<0, 2>,
-                                             SDTCisSameAs<0, 3>,
-                                             SDTCisInt<0>,
-                                             SDTCisVT<1, i32>,
-                                             SDTCisVT<4, i32>]>;
-
-def SDT_ARM64Brcond  : SDTypeProfile<0, 3,
-                                     [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
-                                      SDTCisVT<2, i32>]>;
-def SDT_ARM64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
-def SDT_ARM64tbz : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisVT<1, i64>,
-                                        SDTCisVT<2, OtherVT>]>;
-
-
-def SDT_ARM64CSel  : SDTypeProfile<1, 4,
-                                   [SDTCisSameAs<0, 1>,
-                                    SDTCisSameAs<0, 2>,
-                                    SDTCisInt<3>,
-                                    SDTCisVT<4, i32>]>;
-def SDT_ARM64FCmp   : SDTypeProfile<0, 2,
-                                   [SDTCisFP<0>,
-                                    SDTCisSameAs<0, 1>]>;
-def SDT_ARM64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
-def SDT_ARM64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
-def SDT_ARM64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
-                                          SDTCisSameAs<0, 1>,
-                                          SDTCisSameAs<0, 2>]>;
-def SDT_ARM64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
-def SDT_ARM64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
-def SDT_ARM64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-                                           SDTCisInt<2>, SDTCisInt<3>]>;
-def SDT_ARM64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
-def SDT_ARM64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-                                          SDTCisSameAs<0,2>, SDTCisInt<3>]>;
-def SDT_ARM64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
-
-def SDT_ARM64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
-def SDT_ARM64fcmpz : SDTypeProfile<1, 1, []>;
-def SDT_ARM64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
-def SDT_ARM64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-                                           SDTCisSameAs<0,2>]>;
-def SDT_ARM64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-                                           SDTCisSameAs<0,2>,
-                                           SDTCisSameAs<0,3>]>;
-def SDT_ARM64TCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-def SDT_ARM64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
-
-def SDT_ARM64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
-
-def SDT_ARM64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
-                                                 SDTCisPtrTy<1>]>;
-def SDT_ARM64WrapperLarge : SDTypeProfile<1, 4,
-                                        [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
-                                         SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
-                                         SDTCisSameAs<1, 4>]>;
-
-
-// Node definitions.
-def ARM64adrp          : SDNode<"ARM64ISD::ADRP", SDTIntUnaryOp, []>;
-def ARM64addlow        : SDNode<"ARM64ISD::ADDlow", SDTIntBinOp, []>;
-def ARM64LOADgot       : SDNode<"ARM64ISD::LOADgot", SDTIntUnaryOp>;
-def ARM64callseq_start : SDNode<"ISD::CALLSEQ_START",
-                                SDCallSeqStart<[ SDTCisVT<0, i32> ]>,
-                                [SDNPHasChain, SDNPOutGlue]>;
-def ARM64callseq_end   : SDNode<"ISD::CALLSEQ_END",
-                                SDCallSeqEnd<[ SDTCisVT<0, i32>,
-                                               SDTCisVT<1, i32> ]>,
-                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def ARM64call          : SDNode<"ARM64ISD::CALL",
-                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
-                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                                 SDNPVariadic]>;
-def ARM64brcond        : SDNode<"ARM64ISD::BRCOND", SDT_ARM64Brcond,
-                                [SDNPHasChain]>;
-def ARM64cbz           : SDNode<"ARM64ISD::CBZ", SDT_ARM64cbz,
-                                [SDNPHasChain]>;
-def ARM64cbnz           : SDNode<"ARM64ISD::CBNZ", SDT_ARM64cbz,
-                                [SDNPHasChain]>;
-def ARM64tbz           : SDNode<"ARM64ISD::TBZ", SDT_ARM64tbz,
-                                [SDNPHasChain]>;
-def ARM64tbnz           : SDNode<"ARM64ISD::TBNZ", SDT_ARM64tbz,
-                                [SDNPHasChain]>;
-
-
-def ARM64csel          : SDNode<"ARM64ISD::CSEL", SDT_ARM64CSel>;
-def ARM64csinv         : SDNode<"ARM64ISD::CSINV", SDT_ARM64CSel>;
-def ARM64csneg         : SDNode<"ARM64ISD::CSNEG", SDT_ARM64CSel>;
-def ARM64csinc         : SDNode<"ARM64ISD::CSINC", SDT_ARM64CSel>;
-def ARM64retflag       : SDNode<"ARM64ISD::RET_FLAG", SDTNone,
-                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def ARM64adc       : SDNode<"ARM64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
-def ARM64sbc       : SDNode<"ARM64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
-def ARM64add_flag  : SDNode<"ARM64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
-                            [SDNPCommutative]>;
-def ARM64sub_flag  : SDNode<"ARM64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
-def ARM64and_flag  : SDNode<"ARM64ISD::ANDS",  SDTBinaryArithWithFlagsOut>;
-def ARM64adc_flag  : SDNode<"ARM64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
-def ARM64sbc_flag  : SDNode<"ARM64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
-
-def ARM64threadpointer : SDNode<"ARM64ISD::THREAD_POINTER", SDTPtrLeaf>;
-
-def ARM64fcmp      : SDNode<"ARM64ISD::FCMP", SDT_ARM64FCmp>;
-
-def ARM64fmax      : SDNode<"ARM64ISD::FMAX", SDTFPBinOp>;
-def ARM64fmin      : SDNode<"ARM64ISD::FMIN", SDTFPBinOp>;
-
-def ARM64dup       : SDNode<"ARM64ISD::DUP", SDT_ARM64Dup>;
-def ARM64duplane8  : SDNode<"ARM64ISD::DUPLANE8", SDT_ARM64DupLane>;
-def ARM64duplane16 : SDNode<"ARM64ISD::DUPLANE16", SDT_ARM64DupLane>;
-def ARM64duplane32 : SDNode<"ARM64ISD::DUPLANE32", SDT_ARM64DupLane>;
-def ARM64duplane64 : SDNode<"ARM64ISD::DUPLANE64", SDT_ARM64DupLane>;
-
-def ARM64zip1      : SDNode<"ARM64ISD::ZIP1", SDT_ARM64Zip>;
-def ARM64zip2      : SDNode<"ARM64ISD::ZIP2", SDT_ARM64Zip>;
-def ARM64uzp1      : SDNode<"ARM64ISD::UZP1", SDT_ARM64Zip>;
-def ARM64uzp2      : SDNode<"ARM64ISD::UZP2", SDT_ARM64Zip>;
-def ARM64trn1      : SDNode<"ARM64ISD::TRN1", SDT_ARM64Zip>;
-def ARM64trn2      : SDNode<"ARM64ISD::TRN2", SDT_ARM64Zip>;
-
-def ARM64movi_edit : SDNode<"ARM64ISD::MOVIedit", SDT_ARM64MOVIedit>;
-def ARM64movi_shift : SDNode<"ARM64ISD::MOVIshift", SDT_ARM64MOVIshift>;
-def ARM64movi_msl : SDNode<"ARM64ISD::MOVImsl", SDT_ARM64MOVIshift>;
-def ARM64mvni_shift : SDNode<"ARM64ISD::MVNIshift", SDT_ARM64MOVIshift>;
-def ARM64mvni_msl : SDNode<"ARM64ISD::MVNImsl", SDT_ARM64MOVIshift>;
-def ARM64movi : SDNode<"ARM64ISD::MOVI", SDT_ARM64MOVIedit>;
-def ARM64fmov : SDNode<"ARM64ISD::FMOV", SDT_ARM64MOVIedit>;
-
-def ARM64rev16 : SDNode<"ARM64ISD::REV16", SDT_ARM64UnaryVec>;
-def ARM64rev32 : SDNode<"ARM64ISD::REV32", SDT_ARM64UnaryVec>;
-def ARM64rev64 : SDNode<"ARM64ISD::REV64", SDT_ARM64UnaryVec>;
-def ARM64ext : SDNode<"ARM64ISD::EXT", SDT_ARM64ExtVec>;
-
-def ARM64vashr : SDNode<"ARM64ISD::VASHR", SDT_ARM64vshift>;
-def ARM64vlshr : SDNode<"ARM64ISD::VLSHR", SDT_ARM64vshift>;
-def ARM64vshl : SDNode<"ARM64ISD::VSHL", SDT_ARM64vshift>;
-def ARM64sqshli : SDNode<"ARM64ISD::SQSHL_I", SDT_ARM64vshift>;
-def ARM64uqshli : SDNode<"ARM64ISD::UQSHL_I", SDT_ARM64vshift>;
-def ARM64sqshlui : SDNode<"ARM64ISD::SQSHLU_I", SDT_ARM64vshift>;
-def ARM64srshri : SDNode<"ARM64ISD::SRSHR_I", SDT_ARM64vshift>;
-def ARM64urshri : SDNode<"ARM64ISD::URSHR_I", SDT_ARM64vshift>;
-
-def ARM64not: SDNode<"ARM64ISD::NOT", SDT_ARM64unvec>;
-def ARM64bit: SDNode<"ARM64ISD::BIT", SDT_ARM64trivec>;
-
-def ARM64cmeq: SDNode<"ARM64ISD::CMEQ", SDT_ARM64binvec>;
-def ARM64cmge: SDNode<"ARM64ISD::CMGE", SDT_ARM64binvec>;
-def ARM64cmgt: SDNode<"ARM64ISD::CMGT", SDT_ARM64binvec>;
-def ARM64cmhi: SDNode<"ARM64ISD::CMHI", SDT_ARM64binvec>;
-def ARM64cmhs: SDNode<"ARM64ISD::CMHS", SDT_ARM64binvec>;
-
-def ARM64fcmeq: SDNode<"ARM64ISD::FCMEQ", SDT_ARM64fcmp>;
-def ARM64fcmge: SDNode<"ARM64ISD::FCMGE", SDT_ARM64fcmp>;
-def ARM64fcmgt: SDNode<"ARM64ISD::FCMGT", SDT_ARM64fcmp>;
-
-def ARM64cmeqz: SDNode<"ARM64ISD::CMEQz", SDT_ARM64unvec>;
-def ARM64cmgez: SDNode<"ARM64ISD::CMGEz", SDT_ARM64unvec>;
-def ARM64cmgtz: SDNode<"ARM64ISD::CMGTz", SDT_ARM64unvec>;
-def ARM64cmlez: SDNode<"ARM64ISD::CMLEz", SDT_ARM64unvec>;
-def ARM64cmltz: SDNode<"ARM64ISD::CMLTz", SDT_ARM64unvec>;
-def ARM64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
-                         (ARM64not (ARM64cmeqz (and node:$LHS, node:$RHS)))>;
-
-def ARM64fcmeqz: SDNode<"ARM64ISD::FCMEQz", SDT_ARM64fcmpz>;
-def ARM64fcmgez: SDNode<"ARM64ISD::FCMGEz", SDT_ARM64fcmpz>;
-def ARM64fcmgtz: SDNode<"ARM64ISD::FCMGTz", SDT_ARM64fcmpz>;
-def ARM64fcmlez: SDNode<"ARM64ISD::FCMLEz", SDT_ARM64fcmpz>;
-def ARM64fcmltz: SDNode<"ARM64ISD::FCMLTz", SDT_ARM64fcmpz>;
-
-def ARM64bici: SDNode<"ARM64ISD::BICi", SDT_ARM64vecimm>;
-def ARM64orri: SDNode<"ARM64ISD::ORRi", SDT_ARM64vecimm>;
-
-def ARM64neg : SDNode<"ARM64ISD::NEG", SDT_ARM64unvec>;
-
-def ARM64tcret: SDNode<"ARM64ISD::TC_RETURN", SDT_ARM64TCRET,
-                  [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
-
-def ARM64Prefetch        : SDNode<"ARM64ISD::PREFETCH", SDT_ARM64PREFETCH,
-                               [SDNPHasChain, SDNPSideEffect]>;
-
-def ARM64sitof: SDNode<"ARM64ISD::SITOF", SDT_ARM64ITOF>;
-def ARM64uitof: SDNode<"ARM64ISD::UITOF", SDT_ARM64ITOF>;
-
-def ARM64tlsdesc_call : SDNode<"ARM64ISD::TLSDESC_CALL", SDT_ARM64TLSDescCall,
-                               [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
-                                SDNPVariadic]>;
-
-def ARM64WrapperLarge : SDNode<"ARM64ISD::WrapperLarge", SDT_ARM64WrapperLarge>;
-
-
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-
-// ARM64 Instruction Predicate Definitions.
-//
-def HasZCZ    : Predicate<"Subtarget->hasZeroCycleZeroing()">;
-def NoZCZ     : Predicate<"!Subtarget->hasZeroCycleZeroing()">;
-def IsDarwin  : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
-def ForCodeSize   : Predicate<"ForCodeSize">;
-def NotForCodeSize   : Predicate<"!ForCodeSize">;
-
-include "ARM64InstrFormats.td"
-
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous instructions.
-//===----------------------------------------------------------------------===//
-
-let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
-                              [(ARM64callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
-                            [(ARM64callseq_end timm:$amt1, timm:$amt2)]>;
-} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
-
-let isReMaterializable = 1, isCodeGenOnly = 1 in {
-// FIXME: The following pseudo instructions are only needed because remat
-// cannot handle multiple instructions.  When that changes, they can be
-// removed, along with the ARM64Wrapper node.
-
-let AddedComplexity = 10 in
-def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
-                     [(set GPR64:$dst, (ARM64LOADgot tglobaladdr:$addr))]>,
-              Sched<[WriteLDAdr]>;
-
-// The MOVaddr instruction should match only when the add is not folded
-// into a load or store address.
-def MOVaddr
-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaladdr:$hi),
-                                            tglobaladdr:$low))]>,
-      Sched<[WriteAdrAdr]>;
-def MOVaddrJT
-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp tjumptable:$hi),
-                                             tjumptable:$low))]>,
-      Sched<[WriteAdrAdr]>;
-def MOVaddrCP
-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp tconstpool:$hi),
-                                             tconstpool:$low))]>,
-      Sched<[WriteAdrAdr]>;
-def MOVaddrBA
-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp tblockaddress:$hi),
-                                             tblockaddress:$low))]>,
-      Sched<[WriteAdrAdr]>;
-def MOVaddrTLS
-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaltlsaddr:$hi),
-                                            tglobaltlsaddr:$low))]>,
-      Sched<[WriteAdrAdr]>;
-def MOVaddrEXT
-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
-             [(set GPR64:$dst, (ARM64addlow (ARM64adrp texternalsym:$hi),
-                                            texternalsym:$low))]>,
-      Sched<[WriteAdrAdr]>;
-
-} // isReMaterializable, isCodeGenOnly
-
-def : Pat<(ARM64LOADgot tglobaltlsaddr:$addr),
-          (LOADgot tglobaltlsaddr:$addr)>;
-
-def : Pat<(ARM64LOADgot texternalsym:$addr),
-          (LOADgot texternalsym:$addr)>;
-
-def : Pat<(ARM64LOADgot tconstpool:$addr),
-          (LOADgot tconstpool:$addr)>;
-
-//===----------------------------------------------------------------------===//
-// System instructions.
-//===----------------------------------------------------------------------===//
-
-def HINT  : HintI<"hint">;
-def : InstAlias<"nop",  (HINT 0b000)>;
-def : InstAlias<"yield",(HINT 0b001)>;
-def : InstAlias<"wfe",  (HINT 0b010)>;
-def : InstAlias<"wfi",  (HINT 0b011)>;
-def : InstAlias<"sev",  (HINT 0b100)>;
-def : InstAlias<"sevl", (HINT 0b101)>;
-
-  // As far as LLVM is concerned this writes to the system's exclusive monitors.
-let mayLoad = 1, mayStore = 1 in
-def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
-
-def DMB   : CRmSystemI<barrier_op, 0b101, "dmb">;
-def DSB   : CRmSystemI<barrier_op, 0b100, "dsb">;
-def ISB   : CRmSystemI<barrier_op, 0b110, "isb">;
-def : InstAlias<"clrex", (CLREX 0xf)>;
-def : InstAlias<"isb", (ISB 0xf)>;
-
-def MRS    : MRSI;
-def MSR    : MSRI;
-def MSRcpsr: MSRcpsrI;
-
-// The thread pointer (on Linux, at least, where this has been implemented) is
-// TPIDR_EL0.
-def : Pat<(ARM64threadpointer), (MRS 0xde82)>;
-
-// Generic system instructions
-def SYS    : SystemI<0, "sys">;
-def SYSxt  : SystemXtI<0, "sys">;
-def SYSLxt : SystemLXtI<1, "sysl">;
-
-//===----------------------------------------------------------------------===//
-// Move immediate instructions.
-//===----------------------------------------------------------------------===//
-
-defm MOVK : InsertImmediate<0b11, "movk">;
-defm MOVN : MoveImmediate<0b00, "movn">;
-
-let PostEncoderMethod = "fixMOVZ" in
-defm MOVZ : MoveImmediate<0b10, "movz">;
-
-def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>;
-
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
-
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
-
-def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g3:$sym, 48)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g2:$sym, 32)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
-
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>;
-
-def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g2:$sym, 32)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>;
-
-let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
-    isAsCheapAsAMove = 1 in {
-// FIXME: The following pseudo instructions are only needed because remat
-// cannot handle multiple instructions.  When that changes, we can select
-// directly to the real instructions and get rid of these pseudos.
-
-def MOVi32imm
-    : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
-             [(set GPR32:$dst, imm:$src)]>,
-      Sched<[WriteImm]>;
-def MOVi64imm
-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
-             [(set GPR64:$dst, imm:$src)]>,
-      Sched<[WriteImm]>;
-} // isReMaterializable, isCodeGenOnly
-
-def : Pat<(ARM64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
-                             tglobaladdr:$g1, tglobaladdr:$g0),
-          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48),
-                                  tglobaladdr:$g2, 32),
-                          tglobaladdr:$g1, 16),
-                  tglobaladdr:$g0, 0)>;
-
-def : Pat<(ARM64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
-                             tblockaddress:$g1, tblockaddress:$g0),
-          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48),
-                                  tblockaddress:$g2, 32),
-                          tblockaddress:$g1, 16),
-                  tblockaddress:$g0, 0)>;
-
-def : Pat<(ARM64WrapperLarge tconstpool:$g3, tconstpool:$g2,
-                             tconstpool:$g1, tconstpool:$g0),
-          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48),
-                                  tconstpool:$g2, 32),
-                          tconstpool:$g1, 16),
-                  tconstpool:$g0, 0)>;
-
-
-//===----------------------------------------------------------------------===//
-// Arithmetic instructions.
-//===----------------------------------------------------------------------===//
-
-// Add/subtract with carry.
-defm ADC : AddSubCarry<0, "adc", "adcs", ARM64adc, ARM64adc_flag>;
-defm SBC : AddSubCarry<1, "sbc", "sbcs", ARM64sbc, ARM64sbc_flag>;
-
-def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
-def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
-def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
-def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
-
-// Add/subtract
-defm ADD : AddSub<0, "add", add>;
-defm SUB : AddSub<1, "sub">;
-
-defm ADDS : AddSubS<0, "adds", ARM64add_flag>;
-defm SUBS : AddSubS<1, "subs", ARM64sub_flag>;
-
-// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
-def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
-          (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
-def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
-          (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
-def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
-          (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
-def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
-          (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
-def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
-          (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
-def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
-          (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
-def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
-          (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
-def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
-          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
-
-// Because of the immediate format for add/sub-imm instructions, the
-// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
-//  These patterns capture that transformation.
-let AddedComplexity = 1 in {
-def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
-          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
-def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
-          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
-def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
-          (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
-def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
-          (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
-}
-
-def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0)>;
-def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0)>;
-def : InstAlias<"neg $dst, $src, $shift",
-                (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift:$shift)>;
-def : InstAlias<"neg $dst, $src, $shift",
-                (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift:$shift)>;
-
-// Because of the immediate format for add/sub-imm instructions, the
-// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
-//  These patterns capture that transformation.
-let AddedComplexity = 1 in {
-def : Pat<(ARM64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
-          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
-def : Pat<(ARM64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
-          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
-def : Pat<(ARM64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
-          (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
-def : Pat<(ARM64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
-          (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
-}
-
-def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0)>;
-def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0)>;
-def : InstAlias<"negs $dst, $src, $shift",
-                (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift:$shift)>;
-def : InstAlias<"negs $dst, $src, $shift",
-                (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift:$shift)>;
-
-// Unsigned/Signed divide
-defm UDIV : Div<0, "udiv", udiv>;
-defm SDIV : Div<1, "sdiv", sdiv>;
-let isCodeGenOnly = 1 in {
-defm UDIV_Int : Div<0, "udiv", int_arm64_udiv>;
-defm SDIV_Int : Div<1, "sdiv", int_arm64_sdiv>;
-}
-
-// Variable shift
-defm ASRV : Shift<0b10, "asrv", sra>;
-defm LSLV : Shift<0b00, "lslv", shl>;
-defm LSRV : Shift<0b01, "lsrv", srl>;
-defm RORV : Shift<0b11, "rorv", rotr>;
-
-def : ShiftAlias<"asr", ASRVWr, GPR32>;
-def : ShiftAlias<"asr", ASRVXr, GPR64>;
-def : ShiftAlias<"lsl", LSLVWr, GPR32>;
-def : ShiftAlias<"lsl", LSLVXr, GPR64>;
-def : ShiftAlias<"lsr", LSRVWr, GPR32>;
-def : ShiftAlias<"lsr", LSRVXr, GPR64>;
-def : ShiftAlias<"ror", RORVWr, GPR32>;
-def : ShiftAlias<"ror", RORVXr, GPR64>;
-
-// Multiply-add
-let AddedComplexity = 7 in {
-defm MADD : MulAccum<0, "madd", add>;
-defm MSUB : MulAccum<1, "msub", sub>;
-
-def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
-          (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
-def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
-          (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
-
-def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
-          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
-def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
-          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
-} // AddedComplexity = 7
-
-let AddedComplexity = 5 in {
-def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
-def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
-def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
-def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
-
-def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
-          (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
-def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
-          (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
-
-def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
-          (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
-def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
-          (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
-} // AddedComplexity = 5
-
-def : MulAccumWAlias<"mul", MADDWrrr>;
-def : MulAccumXAlias<"mul", MADDXrrr>;
-def : MulAccumWAlias<"mneg", MSUBWrrr>;
-def : MulAccumXAlias<"mneg", MSUBXrrr>;
-def : WideMulAccumAlias<"smull", SMADDLrrr>;
-def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
-def : WideMulAccumAlias<"umull", UMADDLrrr>;
-def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
-
-// Multiply-high
-def SMULHrr : MulHi<0b010, "smulh", mulhs>;
-def UMULHrr : MulHi<0b110, "umulh", mulhu>;
-
-// CRC32
-def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_arm64_crc32b, "crc32b">;
-def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_arm64_crc32h, "crc32h">;
-def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_arm64_crc32w, "crc32w">;
-def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_arm64_crc32x, "crc32x">;
-
-def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_arm64_crc32cb, "crc32cb">;
-def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_arm64_crc32ch, "crc32ch">;
-def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_arm64_crc32cw, "crc32cw">;
-def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_arm64_crc32cx, "crc32cx">;
-
-
-//===----------------------------------------------------------------------===//
-// Logical instructions.
-//===----------------------------------------------------------------------===//
-
-// (immediate)
-defm ANDS : LogicalImmS<0b11, "ands", ARM64and_flag>;
-defm AND  : LogicalImm<0b00, "and", and>;
-defm EOR  : LogicalImm<0b10, "eor", xor>;
-defm ORR  : LogicalImm<0b01, "orr", or>;
-
-def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
-                                          logical_imm32:$imm)>;
-def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
-                                          logical_imm64:$imm)>;
-
-
-// (register)
-defm ANDS : LogicalRegS<0b11, 0, "ands">;
-defm BICS : LogicalRegS<0b11, 1, "bics">;
-defm AND  : LogicalReg<0b00, 0, "and", and>;
-defm BIC  : LogicalReg<0b00, 1, "bic",
-                       BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
-defm EON  : LogicalReg<0b10, 1, "eon",
-                       BinOpFrag<(xor node:$LHS, (not node:$RHS))>>;
-defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
-defm ORN  : LogicalReg<0b01, 1, "orn",
-                       BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
-defm ORR  : LogicalReg<0b01, 0, "orr", or>;
-
-def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0)>;
-def : InstAlias<"mov $dst, $src",
-                (ADDWri GPR32sp:$dst, GPR32sp:$src, 0, 0)>;
-def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0)>;
-def : InstAlias<"mov $dst, $src",
-                (ADDXri GPR64sp:$dst, GPR64sp:$src, 0, 0)>;
-
-def : InstAlias<"tst $src1, $src2",
-                (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2)>;
-def : InstAlias<"tst $src1, $src2",
-                (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2)>;
-
-def : InstAlias<"tst $src1, $src2",
-                (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0)>;
-def : InstAlias<"tst $src1, $src2",
-                (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0)>;
-
-def : InstAlias<"tst $src1, $src2, $sh",
-                (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift:$sh)>;
-def : InstAlias<"tst $src1, $src2, $sh",
-                (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift:$sh)>;
-
-def : InstAlias<"mvn $Wd, $Wm",
-                (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0)>;
-def : InstAlias<"mvn $Xd, $Xm",
-                (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0)>;
-
-def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
-def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
-
-
-//===----------------------------------------------------------------------===//
-// One operand data processing instructions.
-//===----------------------------------------------------------------------===//
-
-defm CLS    : OneOperandData<0b101, "cls">;
-defm CLZ    : OneOperandData<0b100, "clz", ctlz>;
-defm RBIT   : OneOperandData<0b000, "rbit">;
-def  REV16Wr : OneWRegData<0b001, "rev16",
-                                  UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
-def  REV16Xr : OneXRegData<0b001, "rev16",
-                                  UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
-
-def : Pat<(cttz GPR32:$Rn),
-          (CLZWr (RBITWr GPR32:$Rn))>;
-def : Pat<(cttz GPR64:$Rn),
-          (CLZXr (RBITXr GPR64:$Rn))>;
-
-// Unlike the other one operand instructions, the instructions with the "rev"
-// mnemonic do *not* just different in the size bit, but actually use different
-// opcode bits for the different sizes.
-def REVWr   : OneWRegData<0b010, "rev", bswap>;
-def REVXr   : OneXRegData<0b011, "rev", bswap>;
-def REV32Xr : OneXRegData<0b010, "rev32",
-                                 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
-
-//===----------------------------------------------------------------------===//
-// Bitfield immediate extraction instruction.
-//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in
-defm EXTR : ExtractImm<"extr">;
-def : InstAlias<"ror $dst, $src, $shift",
-            (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
-def : InstAlias<"ror $dst, $src, $shift",
-            (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
-
-def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
-          (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
-def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
-          (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
-
-//===----------------------------------------------------------------------===//
-// Other bitfield immediate instructions.
-//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in {
-defm BFM  : BitfieldImmWith2RegArgs<0b01, "bfm">;
-defm SBFM : BitfieldImm<0b00, "sbfm">;
-defm UBFM : BitfieldImm<0b10, "ubfm">;
-}
-
-def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
-  uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
-  uint64_t enc = 31 - N->getZExtValue();
-  return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-// min(7, 31 - shift_amt)
-def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
-  uint64_t enc = 31 - N->getZExtValue();
-  enc = enc > 7 ? 7 : enc;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-// min(15, 31 - shift_amt)
-def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
-  uint64_t enc = 31 - N->getZExtValue();
-  enc = enc > 15 ? 15 : enc;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
-  uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
-  uint64_t enc = 63 - N->getZExtValue();
-  return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-// min(7, 63 - shift_amt)
-def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
-  uint64_t enc = 63 - N->getZExtValue();
-  enc = enc > 7 ? 7 : enc;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-// min(15, 63 - shift_amt)
-def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
-  uint64_t enc = 63 - N->getZExtValue();
-  enc = enc > 15 ? 15 : enc;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-// min(31, 63 - shift_amt)
-def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
-  uint64_t enc = 63 - N->getZExtValue();
-  enc = enc > 31 ? 31 : enc;
-  return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
-          (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
-                              (i64 (i32shift_b imm0_31:$imm)))>;
-def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
-          (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
-                              (i64 (i64shift_b imm0_63:$imm)))>;
-
-let AddedComplexity = 10 in {
-def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
-          (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
-def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
-          (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
-}
-
-def : InstAlias<"asr $dst, $src, $shift",
-                (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
-def : InstAlias<"asr $dst, $src, $shift",
-                (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
-def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
-def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
-def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
-def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
-def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
-
-def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
-          (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
-def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
-          (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
-
-def : InstAlias<"lsr $dst, $src, $shift",
-                (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
-def : InstAlias<"lsr $dst, $src, $shift",
-                (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
-def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
-def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
-def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
-def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
-def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
-
-//===----------------------------------------------------------------------===//
-// Conditionally set flags instructions.
-//===----------------------------------------------------------------------===//
-defm CCMN : CondSetFlagsImm<0, "ccmn">;
-defm CCMP : CondSetFlagsImm<1, "ccmp">;
-
-defm CCMN : CondSetFlagsReg<0, "ccmn">;
-defm CCMP : CondSetFlagsReg<1, "ccmp">;
-
-//===----------------------------------------------------------------------===//
-// Conditional select instructions.
-//===----------------------------------------------------------------------===//
-defm CSEL  : CondSelect<0, 0b00, "csel">;
-
-def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
-defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
-defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
-defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
-
-def : Pat<(ARM64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR),
-          (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR),
-          (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR),
-          (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR),
-          (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR),
-          (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR),
-          (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
-
-def : Pat<(ARM64csel (i32 0), (i32 1), (i32 imm:$cc), CPSR),
-          (CSINCWr WZR, WZR, (i32 imm:$cc))>;
-def : Pat<(ARM64csel (i64 0), (i64 1), (i32 imm:$cc), CPSR),
-          (CSINCXr XZR, XZR, (i32 imm:$cc))>;
-def : Pat<(ARM64csel (i32 0), (i32 -1), (i32 imm:$cc), CPSR),
-          (CSINVWr WZR, WZR, (i32 imm:$cc))>;
-def : Pat<(ARM64csel (i64 0), (i64 -1), (i32 imm:$cc), CPSR),
-          (CSINVXr XZR, XZR, (i32 imm:$cc))>;
-
-// The inverse of the condition code from the alias instruction is what is used
-// in the aliased instruction. The parser all ready inverts the condition code
-// for these aliases.
-// FIXME: Is this the correct way to handle these aliases?
-def : InstAlias<"cset $dst, $cc", (CSINCWr GPR32:$dst, WZR, WZR, ccode:$cc)>;
-def : InstAlias<"cset $dst, $cc", (CSINCXr GPR64:$dst, XZR, XZR, ccode:$cc)>;
-
-def : InstAlias<"csetm $dst, $cc", (CSINVWr GPR32:$dst, WZR, WZR, ccode:$cc)>;
-def : InstAlias<"csetm $dst, $cc", (CSINVXr GPR64:$dst, XZR, XZR, ccode:$cc)>;
-
-def : InstAlias<"cinc $dst, $src, $cc",
-                (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>;
-def : InstAlias<"cinc $dst, $src, $cc",
-                (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>;
-
-def : InstAlias<"cinv $dst, $src, $cc",
-                (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>;
-def : InstAlias<"cinv $dst, $src, $cc",
-                (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>;
-
-def : InstAlias<"cneg $dst, $src, $cc",
-                (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>;
-def : InstAlias<"cneg $dst, $src, $cc",
-                (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>;
-
-//===----------------------------------------------------------------------===//
-// PC-relative instructions.
-//===----------------------------------------------------------------------===//
-let isReMaterializable = 1 in {
-let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in {
-def ADR  : ADRI<0, "adr", adrlabel, []>;
-} // neverHasSideEffects = 1
-
-def ADRP : ADRI<1, "adrp", adrplabel,
-                [(set GPR64:$Xd, (ARM64adrp tglobaladdr:$label))]>;
-} // isReMaterializable = 1
-
-// page address of a constant pool entry, block address
-def : Pat<(ARM64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
-def : Pat<(ARM64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
-
-//===----------------------------------------------------------------------===//
-// Unconditional branch (register) instructions.
-//===----------------------------------------------------------------------===//
-
-let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
-def RET  : BranchReg<0b0010, "ret", []>;
-def DRPS : SpecialReturn<0b0101, "drps">;
-def ERET : SpecialReturn<0b0100, "eret">;
-} // isReturn = 1, isTerminator = 1, isBarrier = 1
-
-// Default to the LR register.
-def : InstAlias<"ret", (RET LR)>;
-
-let isCall = 1, Defs = [LR], Uses = [SP] in {
-def BLR : BranchReg<0b0001, "blr", [(ARM64call GPR64:$Rn)]>;
-} // isCall
-
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
-def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
-} // isBranch, isTerminator, isBarrier, isIndirectBranch
-
-// Create a separate pseudo-instruction for codegen to use so that we don't
-// flag lr as used in every function. It'll be restored before the RET by the
-// epilogue if it's legitimately used.
-def RET_ReallyLR : Pseudo<(outs), (ins), [(ARM64retflag)]> {
-  let isTerminator = 1;
-  let isBarrier = 1;
-  let isReturn = 1;
-}
-
-// This is a directive-like pseudo-instruction. The purpose is to insert an
-// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
-// (which in the usual case is a BLR).
-let hasSideEffects = 1 in
-def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
-  let AsmString = ".tlsdesccall $sym";
-}
-
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
-// gets expanded to two MCInsts during lowering.
-let isCall = 1, Defs = [LR] in
-def TLSDESC_BLR
-    : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
-             [(ARM64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
-
-def : Pat<(ARM64tlsdesc_call GPR64:$dest, texternalsym:$sym),
-          (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
-//===----------------------------------------------------------------------===//
-// Conditional branch (immediate) instruction.
-//===----------------------------------------------------------------------===//
-def Bcc : BranchCond;
-
-//===----------------------------------------------------------------------===//
-// Compare-and-branch instructions.
-//===----------------------------------------------------------------------===//
-defm CBZ  : CmpBranch<0, "cbz", ARM64cbz>;
-defm CBNZ : CmpBranch<1, "cbnz", ARM64cbnz>;
-
-//===----------------------------------------------------------------------===//
-// Test-bit-and-branch instructions.
-//===----------------------------------------------------------------------===//
-def TBZ  : TestBranch<0, "tbz", ARM64tbz>;
-def TBNZ : TestBranch<1, "tbnz", ARM64tbnz>;
-
-//===----------------------------------------------------------------------===//
-// Unconditional branch (immediate) instructions.
-//===----------------------------------------------------------------------===//
-let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
-def B  : BranchImm<0, "b", [(br bb:$addr)]>;
-} // isBranch, isTerminator, isBarrier
-
-let isCall = 1, Defs = [LR], Uses = [SP] in {
-def BL : CallImm<1, "bl", [(ARM64call tglobaladdr:$addr)]>;
-} // isCall
-def : Pat<(ARM64call texternalsym:$func), (BL texternalsym:$func)>;
-
-//===----------------------------------------------------------------------===//
-// Exception generation instructions.
-//===----------------------------------------------------------------------===//
-def BRK   : ExceptionGeneration<0b001, 0b00, "brk">;
-def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
-def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
-def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">;
-def HLT   : ExceptionGeneration<0b010, 0b00, "hlt">;
-def HVC   : ExceptionGeneration<0b000, 0b10, "hvc">;
-def SMC   : ExceptionGeneration<0b000, 0b11, "smc">;
-def SVC   : ExceptionGeneration<0b000, 0b01, "svc">;
-
-// DCPSn defaults to an immediate operand of zero if unspecified.
-def : InstAlias<"dcps1", (DCPS1 0)>;
-def : InstAlias<"dcps2", (DCPS2 0)>;
-def : InstAlias<"dcps3", (DCPS3 0)>;
-
-//===----------------------------------------------------------------------===//
-// Load instructions.
-//===----------------------------------------------------------------------===//
-
-// Pair (indexed, offset)
-def LDPWi : LoadPairOffset<0b00, 0, GPR32, am_indexed32simm7, "ldp">;
-def LDPXi : LoadPairOffset<0b10, 0, GPR64, am_indexed64simm7, "ldp">;
-def LDPSi : LoadPairOffset<0b00, 1, FPR32, am_indexed32simm7, "ldp">;
-def LDPDi : LoadPairOffset<0b01, 1, FPR64, am_indexed64simm7, "ldp">;
-def LDPQi : LoadPairOffset<0b10, 1, FPR128, am_indexed128simm7, "ldp">;
-
-def LDPSWi : LoadPairOffset<0b01, 0, GPR64, am_indexed32simm7, "ldpsw">;
-
-// Pair (pre-indexed)
-def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, am_indexed32simm7, "ldp">;
-def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, am_indexed64simm7, "ldp">;
-def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, am_indexed32simm7, "ldp">;
-def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, am_indexed64simm7, "ldp">;
-def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, am_indexed128simm7, "ldp">;
-
-def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, am_indexed32simm7, "ldpsw">;
-
-// Pair (post-indexed)
-def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">;
-def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">;
-def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">;
-def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">;
-def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">;
-
-def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">;
-
-
-// Pair (no allocate)
-def LDNPWi : LoadPairNoAlloc<0b00, 0, GPR32, am_indexed32simm7, "ldnp">;
-def LDNPXi : LoadPairNoAlloc<0b10, 0, GPR64, am_indexed64simm7, "ldnp">;
-def LDNPSi : LoadPairNoAlloc<0b00, 1, FPR32, am_indexed32simm7, "ldnp">;
-def LDNPDi : LoadPairNoAlloc<0b01, 1, FPR64, am_indexed64simm7, "ldnp">;
-def LDNPQi : LoadPairNoAlloc<0b10, 1, FPR128, am_indexed128simm7, "ldnp">;
-
-//---
-// (register offset)
-//---
-
-let AddedComplexity = 10 in {
-// Integer
-def LDRBBro : Load8RO<0b00,  0, 0b01, GPR32, "ldrb",
-                      [(set GPR32:$Rt, (zextloadi8 ro_indexed8:$addr))]>;
-def LDRHHro : Load16RO<0b01, 0, 0b01, GPR32, "ldrh",
-                      [(set GPR32:$Rt, (zextloadi16 ro_indexed16:$addr))]>;
-def LDRWro  : Load32RO<0b10,   0, 0b01, GPR32, "ldr",
-                      [(set GPR32:$Rt, (load ro_indexed32:$addr))]>;
-def LDRXro  : Load64RO<0b11,   0, 0b01, GPR64, "ldr",
-                      [(set GPR64:$Rt, (load ro_indexed64:$addr))]>;
-
-// Floating-point
-def LDRBro : Load8RO<0b00,   1, 0b01, FPR8,   "ldr",
-                      [(set FPR8:$Rt, (load ro_indexed8:$addr))]>;
-def LDRHro : Load16RO<0b01,  1, 0b01, FPR16,  "ldr",
-                      [(set FPR16:$Rt, (load ro_indexed16:$addr))]>;
-def LDRSro : Load32RO<0b10,    1, 0b01, FPR32,  "ldr",
-                      [(set (f32 FPR32:$Rt), (load ro_indexed32:$addr))]>;
-def LDRDro : Load64RO<0b11,    1, 0b01, FPR64,  "ldr",
-                      [(set (f64 FPR64:$Rt), (load ro_indexed64:$addr))]>;
-def LDRQro : Load128RO<0b00,    1, 0b11, FPR128, "ldr", []> {
-  let mayLoad = 1;
-}
-
-// For regular load, we do not have any alignment requirement.
-// Thus, it is safe to directly map the vector loads with interesting
-// addressing modes.
-// FIXME: We could do the same for bitconvert to floating point vectors.
-def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 ro_indexed8:$addr)))),
-           (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
-                          (LDRBro ro_indexed8:$addr), bsub)>;
-def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 ro_indexed8:$addr)))),
-           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-                          (LDRBro ro_indexed8:$addr), bsub)>;
-def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 ro_indexed16:$addr)))),
-           (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
-                          (LDRHro ro_indexed16:$addr), hsub)>;
-def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 ro_indexed16:$addr)))),
-           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
-                          (LDRHro ro_indexed16:$addr), hsub)>;
-def : Pat <(v2i32 (scalar_to_vector (i32 (load ro_indexed32:$addr)))),
-           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
-                          (LDRSro ro_indexed32:$addr), ssub)>;
-def : Pat <(v4i32 (scalar_to_vector (i32 (load ro_indexed32:$addr)))),
-           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
-                          (LDRSro ro_indexed32:$addr), ssub)>;
-def : Pat <(v1i64 (scalar_to_vector (i64 (load ro_indexed64:$addr)))),
-           (LDRDro ro_indexed64:$addr)>;
-def : Pat <(v2i64 (scalar_to_vector (i64 (load ro_indexed64:$addr)))),
-           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
-                          (LDRDro ro_indexed64:$addr), dsub)>;
-
-// Match all load 64 bits width whose type is compatible with FPR64
-def : Pat<(v2f32 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-def : Pat<(v1f64 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-def : Pat<(v8i8 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-def : Pat<(v4i16 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-def : Pat<(v2i32 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-def : Pat<(v1i64 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-
-// Match all load 128 bits width whose type is compatible with FPR128
-def : Pat<(v4f32 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(v2f64 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(v16i8 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(v8i16 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(v4i32 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(v2i64 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(f128  (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-
-// Load sign-extended half-word
-def LDRSHWro : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh",
-                      [(set GPR32:$Rt, (sextloadi16 ro_indexed16:$addr))]>;
-def LDRSHXro : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh",
-                      [(set GPR64:$Rt, (sextloadi16 ro_indexed16:$addr))]>;
-
-// Load sign-extended byte
-def LDRSBWro : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb",
-                      [(set GPR32:$Rt, (sextloadi8 ro_indexed8:$addr))]>;
-def LDRSBXro : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb",
-                      [(set GPR64:$Rt, (sextloadi8 ro_indexed8:$addr))]>;
-
-// Load sign-extended word
-def LDRSWro  : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw",
-                      [(set GPR64:$Rt, (sextloadi32 ro_indexed32:$addr))]>;
-
-// Pre-fetch.
-def PRFMro : PrefetchRO<0b11, 0, 0b10, "prfm",
-                        [(ARM64Prefetch imm:$Rt, ro_indexed64:$addr)]>;
-
-// zextload -> i64
-def : Pat<(i64 (zextloadi8 ro_indexed8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi16 ro_indexed16:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRHHro ro_indexed16:$addr), sub_32)>;
-
-// zextloadi1 -> zextloadi8
-def : Pat<(i32 (zextloadi1 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>;
-def : Pat<(i64 (zextloadi1 ro_indexed8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>;
-
-// extload -> zextload
-def : Pat<(i32 (extloadi16 ro_indexed16:$addr)), (LDRHHro ro_indexed16:$addr)>;
-def : Pat<(i32 (extloadi8 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>;
-def : Pat<(i32 (extloadi1 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>;
-def : Pat<(i64 (extloadi32 ro_indexed32:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRWro ro_indexed32:$addr), sub_32)>;
-def : Pat<(i64 (extloadi16 ro_indexed16:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRHHro ro_indexed16:$addr), sub_32)>;
-def : Pat<(i64 (extloadi8 ro_indexed8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>;
-def : Pat<(i64 (extloadi1 ro_indexed8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>;
-
-} // AddedComplexity = 10
-
-//---
-// (unsigned immediate)
-//---
-def LDRXui : LoadUI<0b11, 0, 0b01, GPR64, am_indexed64, "ldr",
-                    [(set GPR64:$Rt, (load am_indexed64:$addr))]>;
-def LDRWui : LoadUI<0b10, 0, 0b01, GPR32, am_indexed32, "ldr",
-                    [(set GPR32:$Rt, (load am_indexed32:$addr))]>;
-def LDRBui : LoadUI<0b00, 1, 0b01, FPR8, am_indexed8, "ldr",
-                    [(set FPR8:$Rt, (load am_indexed8:$addr))]>;
-def LDRHui : LoadUI<0b01, 1, 0b01, FPR16, am_indexed16, "ldr",
-                    [(set FPR16:$Rt, (load am_indexed16:$addr))]>;
-def LDRSui : LoadUI<0b10, 1, 0b01, FPR32, am_indexed32, "ldr",
-                    [(set (f32 FPR32:$Rt), (load am_indexed32:$addr))]>;
-def LDRDui : LoadUI<0b11, 1, 0b01, FPR64, am_indexed64, "ldr",
-                    [(set (f64 FPR64:$Rt), (load am_indexed64:$addr))]>;
-def LDRQui : LoadUI<0b00, 1, 0b11, FPR128, am_indexed128, "ldr",
-                    [(set (f128 FPR128:$Rt), (load am_indexed128:$addr))]>;
-
-// For regular load, we do not have any alignment requirement.
-// Thus, it is safe to directly map the vector loads with interesting
-// addressing modes.
-// FIXME: We could do the same for bitconvert to floating point vectors.
-def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 am_indexed8:$addr)))),
-           (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
-                          (LDRBui am_indexed8:$addr), bsub)>;
-def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 am_indexed8:$addr)))),
-           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-                          (LDRBui am_indexed8:$addr), bsub)>;
-def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 am_indexed16:$addr)))),
-           (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
-                          (LDRHui am_indexed16:$addr), hsub)>;
-def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 am_indexed16:$addr)))),
-           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
-                          (LDRHui am_indexed16:$addr), hsub)>;
-def : Pat <(v2i32 (scalar_to_vector (i32 (load am_indexed32:$addr)))),
-           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
-                          (LDRSui am_indexed32:$addr), ssub)>;
-def : Pat <(v4i32 (scalar_to_vector (i32 (load am_indexed32:$addr)))),
-           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
-                          (LDRSui am_indexed32:$addr), ssub)>;
-def : Pat <(v1i64 (scalar_to_vector (i64 (load am_indexed64:$addr)))),
-           (LDRDui am_indexed64:$addr)>;
-def : Pat <(v2i64 (scalar_to_vector (i64 (load am_indexed64:$addr)))),
-           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
-                          (LDRDui am_indexed64:$addr), dsub)>;
-
-// Match all load 64 bits width whose type is compatible with FPR64
-def : Pat<(v2f32 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-def : Pat<(v1f64 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-def : Pat<(v8i8 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-def : Pat<(v4i16 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-def : Pat<(v2i32 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-def : Pat<(v1i64 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-
-// Match all load 128 bits width whose type is compatible with FPR128
-def : Pat<(v4f32 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(v2f64 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(v16i8 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(v8i16 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(v4i32 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(v2i64 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(f128  (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-
-def LDRHHui : LoadUI<0b01, 0, 0b01, GPR32, am_indexed16, "ldrh",
-                     [(set GPR32:$Rt, (zextloadi16 am_indexed16:$addr))]>;
-def LDRBBui : LoadUI<0b00, 0, 0b01, GPR32, am_indexed8, "ldrb",
-                     [(set GPR32:$Rt, (zextloadi8 am_indexed8:$addr))]>;
-// zextload -> i64
-def : Pat<(i64 (zextloadi8 am_indexed8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi16 am_indexed16:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRHHui am_indexed16:$addr), sub_32)>;
-
-// zextloadi1 -> zextloadi8
-def : Pat<(i32 (zextloadi1 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>;
-def : Pat<(i64 (zextloadi1 am_indexed8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>;
-
-// extload -> zextload
-def : Pat<(i32 (extloadi16 am_indexed16:$addr)), (LDRHHui am_indexed16:$addr)>;
-def : Pat<(i32 (extloadi8 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>;
-def : Pat<(i32 (extloadi1 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>;
-def : Pat<(i64 (extloadi32 am_indexed32:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRWui am_indexed32:$addr), sub_32)>;
-def : Pat<(i64 (extloadi16 am_indexed16:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRHHui am_indexed16:$addr), sub_32)>;
-def : Pat<(i64 (extloadi8 am_indexed8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>;
-def : Pat<(i64 (extloadi1 am_indexed8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>;
-
-// load sign-extended half-word
-def LDRSHWui : LoadUI<0b01, 0, 0b11, GPR32, am_indexed16, "ldrsh",
-                      [(set GPR32:$Rt, (sextloadi16 am_indexed16:$addr))]>;
-def LDRSHXui : LoadUI<0b01, 0, 0b10, GPR64, am_indexed16, "ldrsh",
-                      [(set GPR64:$Rt, (sextloadi16 am_indexed16:$addr))]>;
-
-// load sign-extended byte
-def LDRSBWui : LoadUI<0b00, 0, 0b11, GPR32, am_indexed8, "ldrsb",
-                      [(set GPR32:$Rt, (sextloadi8 am_indexed8:$addr))]>;
-def LDRSBXui : LoadUI<0b00, 0, 0b10, GPR64, am_indexed8, "ldrsb",
-                      [(set GPR64:$Rt, (sextloadi8 am_indexed8:$addr))]>;
-
-// load sign-extended word
-def LDRSWui  : LoadUI<0b10, 0, 0b10, GPR64, am_indexed32, "ldrsw",
-                      [(set GPR64:$Rt, (sextloadi32 am_indexed32:$addr))]>;
-
-// load zero-extended word
-def : Pat<(i64 (zextloadi32 am_indexed32:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRWui am_indexed32:$addr), sub_32)>;
-
-// Pre-fetch.
-def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
-                        [(ARM64Prefetch imm:$Rt, am_indexed64:$addr)]>;
-
-//---
-// (literal)
-def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">;
-def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">;
-def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">;
-def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">;
-def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">;
-
-// load sign-extended word
-def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">;
-
-// prefetch
-def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
-//                   [(ARM64Prefetch imm:$Rt, tglobaladdr:$label)]>;
-
-//---
-// (unscaled immediate)
-def LDURXi : LoadUnscaled<0b11, 0, 0b01, GPR64, am_unscaled64, "ldur",
-                          [(set GPR64:$Rt, (load am_unscaled64:$addr))]>;
-def LDURWi : LoadUnscaled<0b10, 0, 0b01, GPR32, am_unscaled32, "ldur",
-                          [(set GPR32:$Rt, (load am_unscaled32:$addr))]>;
-def LDURBi : LoadUnscaled<0b00, 1, 0b01, FPR8,  am_unscaled8, "ldur",
-                          [(set FPR8:$Rt, (load am_unscaled8:$addr))]>;
-def LDURHi : LoadUnscaled<0b01, 1, 0b01, FPR16, am_unscaled16, "ldur",
-                          [(set FPR16:$Rt, (load am_unscaled16:$addr))]>;
-def LDURSi : LoadUnscaled<0b10, 1, 0b01, FPR32, am_unscaled32, "ldur",
-                          [(set (f32 FPR32:$Rt), (load am_unscaled32:$addr))]>;
-def LDURDi : LoadUnscaled<0b11, 1, 0b01, FPR64, am_unscaled64, "ldur",
-                          [(set (f64 FPR64:$Rt), (load am_unscaled64:$addr))]>;
-def LDURQi : LoadUnscaled<0b00, 1, 0b11, FPR128, am_unscaled128, "ldur",
-                        [(set (v2f64 FPR128:$Rt), (load am_unscaled128:$addr))]>;
-
-def LDURHHi
-    : LoadUnscaled<0b01, 0, 0b01, GPR32, am_unscaled16, "ldurh",
-                   [(set GPR32:$Rt, (zextloadi16 am_unscaled16:$addr))]>;
-def LDURBBi
-    : LoadUnscaled<0b00, 0, 0b01, GPR32, am_unscaled8, "ldurb",
-                   [(set GPR32:$Rt, (zextloadi8 am_unscaled8:$addr))]>;
-
-// Match all load 64 bits width whose type is compatible with FPR64
-def : Pat<(v2f32 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-def : Pat<(v1f64 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-def : Pat<(v8i8 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-def : Pat<(v4i16 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-def : Pat<(v2i32 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-def : Pat<(v1i64 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-
-// Match all load 128 bits width whose type is compatible with FPR128
-def : Pat<(v4f32 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(v2f64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(v16i8 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(v8i16 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(v4i32 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(v2i64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(f128  (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-
-//  anyext -> zext
-def : Pat<(i32 (extloadi16 am_unscaled16:$addr)), (LDURHHi am_unscaled16:$addr)>;
-def : Pat<(i32 (extloadi8 am_unscaled8:$addr)), (LDURBBi am_unscaled8:$addr)>;
-def : Pat<(i32 (extloadi1 am_unscaled8:$addr)), (LDURBBi am_unscaled8:$addr)>;
-def : Pat<(i64 (extloadi32 am_unscaled32:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDURWi am_unscaled32:$addr), sub_32)>;
-def : Pat<(i64 (extloadi16 am_unscaled16:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>;
-def : Pat<(i64 (extloadi8 am_unscaled8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>;
-def : Pat<(i64 (extloadi1 am_unscaled8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>;
-// unscaled zext
-def : Pat<(i32 (zextloadi16 am_unscaled16:$addr)),
-    (LDURHHi am_unscaled16:$addr)>;
-def : Pat<(i32 (zextloadi8 am_unscaled8:$addr)),
-    (LDURBBi am_unscaled8:$addr)>;
-def : Pat<(i32 (zextloadi1 am_unscaled8:$addr)),
-    (LDURBBi am_unscaled8:$addr)>;
-def : Pat<(i64 (zextloadi32 am_unscaled32:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDURWi am_unscaled32:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi16 am_unscaled16:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi8 am_unscaled8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi1 am_unscaled8:$addr)),
-    (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>;
-
-
-//---
-// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
-
-// Define new assembler match classes as we want to only match these when
-// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
-// associate a DiagnosticType either, as we want the diagnostic for the
-// canonical form (the scaled operand) to take precedence.
-def MemoryUnscaledFB8Operand : AsmOperandClass {
-  let Name = "MemoryUnscaledFB8";
-  let RenderMethod = "addMemoryUnscaledOperands";
-}
-def MemoryUnscaledFB16Operand : AsmOperandClass {
-  let Name = "MemoryUnscaledFB16";
-  let RenderMethod = "addMemoryUnscaledOperands";
-}
-def MemoryUnscaledFB32Operand : AsmOperandClass {
-  let Name = "MemoryUnscaledFB32";
-  let RenderMethod = "addMemoryUnscaledOperands";
-}
-def MemoryUnscaledFB64Operand : AsmOperandClass {
-  let Name = "MemoryUnscaledFB64";
-  let RenderMethod = "addMemoryUnscaledOperands";
-}
-def MemoryUnscaledFB128Operand : AsmOperandClass {
-  let Name = "MemoryUnscaledFB128";
-  let RenderMethod = "addMemoryUnscaledOperands";
-}
-def am_unscaled_fb8 : Operand<i64> {
-  let ParserMatchClass = MemoryUnscaledFB8Operand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def am_unscaled_fb16 : Operand<i64> {
-  let ParserMatchClass = MemoryUnscaledFB16Operand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def am_unscaled_fb32 : Operand<i64> {
-  let ParserMatchClass = MemoryUnscaledFB32Operand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def am_unscaled_fb64 : Operand<i64> {
-  let ParserMatchClass = MemoryUnscaledFB64Operand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def am_unscaled_fb128 : Operand<i64> {
-  let ParserMatchClass = MemoryUnscaledFB128Operand;
-  let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def : InstAlias<"ldr $Rt, $addr", (LDURXi GPR64:$Rt, am_unscaled_fb64:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURWi GPR32:$Rt, am_unscaled_fb32:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURBi FPR8:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURHi FPR16:$Rt, am_unscaled_fb16:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURSi FPR32:$Rt, am_unscaled_fb32:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURDi FPR64:$Rt, am_unscaled_fb64:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURQi FPR128:$Rt, am_unscaled_fb128:$addr)>;
-
-// zextload -> i64
-def : Pat<(i64 (zextloadi8 am_unscaled8:$addr)),
-  (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi16 am_unscaled16:$addr)),
-  (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>;
-
-// load sign-extended half-word
-def LDURSHWi
-    : LoadUnscaled<0b01, 0, 0b11, GPR32, am_unscaled16, "ldursh",
-                   [(set GPR32:$Rt, (sextloadi16 am_unscaled16:$addr))]>;
-def LDURSHXi
-    : LoadUnscaled<0b01, 0, 0b10, GPR64, am_unscaled16, "ldursh",
-                   [(set GPR64:$Rt, (sextloadi16 am_unscaled16:$addr))]>;
-
-// load sign-extended byte
-def LDURSBWi
-    : LoadUnscaled<0b00, 0, 0b11, GPR32, am_unscaled8, "ldursb",
-                   [(set GPR32:$Rt, (sextloadi8 am_unscaled8:$addr))]>;
-def LDURSBXi
-    : LoadUnscaled<0b00, 0, 0b10, GPR64, am_unscaled8, "ldursb",
-                   [(set GPR64:$Rt, (sextloadi8 am_unscaled8:$addr))]>;
-
-// load sign-extended word
-def LDURSWi
-    : LoadUnscaled<0b10, 0, 0b10, GPR64, am_unscaled32, "ldursw",
-                   [(set GPR64:$Rt, (sextloadi32 am_unscaled32:$addr))]>;
-
-// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
-def : InstAlias<"ldrb $Rt, $addr", (LDURBBi GPR32:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"ldrh $Rt, $addr", (LDURHHi GPR32:$Rt, am_unscaled_fb16:$addr)>;
-def : InstAlias<"ldrsb $Rt, $addr", (LDURSBWi GPR32:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"ldrsb $Rt, $addr", (LDURSBXi GPR64:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"ldrsh $Rt, $addr", (LDURSHWi GPR32:$Rt, am_unscaled_fb16:$addr)>;
-def : InstAlias<"ldrsh $Rt, $addr", (LDURSHXi GPR64:$Rt, am_unscaled_fb16:$addr)>;
-def : InstAlias<"ldrsw $Rt, $addr", (LDURSWi GPR64:$Rt, am_unscaled_fb32:$addr)>;
-
-// Pre-fetch.
-def PRFUMi : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
-                               [(ARM64Prefetch imm:$Rt, am_unscaled64:$addr)]>;
-
-//---
-// (unscaled immediate, unprivileged)
-def LDTRXi : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
-def LDTRWi : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
-
-def LDTRHi : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
-def LDTRBi : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
-
-// load sign-extended half-word
-def LDTRSHWi : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
-def LDTRSHXi : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
-
-// load sign-extended byte
-def LDTRSBWi : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
-def LDTRSBXi : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
-
-// load sign-extended word
-def LDTRSWi  : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
-
-//---
-// (immediate pre-indexed)
-def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">;
-def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">;
-def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8,  "ldr">;
-def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">;
-def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">;
-def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">;
-def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">;
-
-// load sign-extended half-word
-def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">;
-def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">;
-
-// load sign-extended byte
-def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">;
-def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">;
-
-// load zero-extended byte
-def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">;
-def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">;
-
-// load sign-extended word
-def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">;
-
-// ISel pseudos and patterns. See expanded comment on LoadPreIdxPseudo.
-def LDRDpre_isel  : LoadPreIdxPseudo<FPR64>;
-def LDRSpre_isel  : LoadPreIdxPseudo<FPR32>;
-def LDRXpre_isel  : LoadPreIdxPseudo<GPR64>;
-def LDRWpre_isel  : LoadPreIdxPseudo<GPR32>;
-def LDRHHpre_isel : LoadPreIdxPseudo<GPR32>;
-def LDRBBpre_isel : LoadPreIdxPseudo<GPR32>;
-
-def LDRSWpre_isel : LoadPreIdxPseudo<GPR64>;
-def LDRSHWpre_isel : LoadPreIdxPseudo<GPR32>;
-def LDRSHXpre_isel : LoadPreIdxPseudo<GPR64>;
-def LDRSBWpre_isel : LoadPreIdxPseudo<GPR32>;
-def LDRSBXpre_isel : LoadPreIdxPseudo<GPR64>;
-
-//---
-// (immediate post-indexed)
-def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">;
-def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">;
-def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8,  "ldr">;
-def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">;
-def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">;
-def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">;
-def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">;
-
-// load sign-extended half-word
-def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">;
-def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">;
-
-// load sign-extended byte
-def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">;
-def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">;
-
-// load zero-extended byte
-def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">;
-def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">;
-
-// load sign-extended word
-def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">;
-
-// ISel pseudos and patterns. See expanded comment on LoadPostIdxPseudo.
-def LDRDpost_isel  : LoadPostIdxPseudo<FPR64>;
-def LDRSpost_isel  : LoadPostIdxPseudo<FPR32>;
-def LDRXpost_isel  : LoadPostIdxPseudo<GPR64>;
-def LDRWpost_isel  : LoadPostIdxPseudo<GPR32>;
-def LDRHHpost_isel : LoadPostIdxPseudo<GPR32>;
-def LDRBBpost_isel : LoadPostIdxPseudo<GPR32>;
-
-def LDRSWpost_isel : LoadPostIdxPseudo<GPR64>;
-def LDRSHWpost_isel : LoadPostIdxPseudo<GPR32>;
-def LDRSHXpost_isel : LoadPostIdxPseudo<GPR64>;
-def LDRSBWpost_isel : LoadPostIdxPseudo<GPR32>;
-def LDRSBXpost_isel : LoadPostIdxPseudo<GPR64>;
-
-//===----------------------------------------------------------------------===//
-// Store instructions.
-//===----------------------------------------------------------------------===//
-
-// Pair (indexed, offset)
-// FIXME: Use dedicated range-checked addressing mode operand here.
-def STPWi : StorePairOffset<0b00, 0, GPR32, am_indexed32simm7, "stp">;
-def STPXi : StorePairOffset<0b10, 0, GPR64, am_indexed64simm7, "stp">;
-def STPSi : StorePairOffset<0b00, 1, FPR32, am_indexed32simm7, "stp">;
-def STPDi : StorePairOffset<0b01, 1, FPR64, am_indexed64simm7, "stp">;
-def STPQi : StorePairOffset<0b10, 1, FPR128, am_indexed128simm7, "stp">;
-
-// Pair (pre-indexed)
-def STPWpre : StorePairPreIdx<0b00, 0, GPR32, am_indexed32simm7, "stp">;
-def STPXpre : StorePairPreIdx<0b10, 0, GPR64, am_indexed64simm7, "stp">;
-def STPSpre : StorePairPreIdx<0b00, 1, FPR32, am_indexed32simm7, "stp">;
-def STPDpre : StorePairPreIdx<0b01, 1, FPR64, am_indexed64simm7, "stp">;
-def STPQpre : StorePairPreIdx<0b10, 1, FPR128, am_indexed128simm7, "stp">;
-
-// Pair (pre-indexed)
-def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">;
-def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">;
-def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">;
-def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">;
-def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">;
-
-// Pair (no allocate)
-def STNPWi : StorePairNoAlloc<0b00, 0, GPR32, am_indexed32simm7, "stnp">;
-def STNPXi : StorePairNoAlloc<0b10, 0, GPR64, am_indexed64simm7, "stnp">;
-def STNPSi : StorePairNoAlloc<0b00, 1, FPR32, am_indexed32simm7, "stnp">;
-def STNPDi : StorePairNoAlloc<0b01, 1, FPR64, am_indexed64simm7, "stnp">;
-def STNPQi : StorePairNoAlloc<0b10, 1, FPR128, am_indexed128simm7, "stnp">;
-
-//---
-// (Register offset)
-
-let AddedComplexity = 10 in {
-
-// Integer
-def STRHHro : Store16RO<0b01, 0, 0b00, GPR32, "strh",
-                            [(truncstorei16 GPR32:$Rt, ro_indexed16:$addr)]>;
-def STRBBro : Store8RO<0b00,  0, 0b00, GPR32, "strb",
-                            [(truncstorei8 GPR32:$Rt, ro_indexed8:$addr)]>;
-def STRWro  : Store32RO<0b10,   0, 0b00, GPR32, "str",
-                            [(store GPR32:$Rt, ro_indexed32:$addr)]>;
-def STRXro  : Store64RO<0b11,   0, 0b00, GPR64, "str",
-                            [(store GPR64:$Rt, ro_indexed64:$addr)]>;
-
-// truncstore i64
-def : Pat<(truncstorei8 GPR64:$Rt, ro_indexed8:$addr),
-           (STRBBro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed8:$addr)>;
-def : Pat<(truncstorei16 GPR64:$Rt, ro_indexed16:$addr),
-           (STRHHro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed16:$addr)>;
-def : Pat<(truncstorei32 GPR64:$Rt, ro_indexed32:$addr),
-           (STRWro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed32:$addr)>;
-
-
-// Floating-point
-def STRBro : Store8RO<0b00,  1, 0b00, FPR8,  "str",
-                            [(store FPR8:$Rt, ro_indexed8:$addr)]>;
-def STRHro : Store16RO<0b01, 1, 0b00, FPR16, "str",
-                            [(store FPR16:$Rt, ro_indexed16:$addr)]>;
-def STRSro : Store32RO<0b10,   1, 0b00, FPR32, "str",
-                            [(store (f32 FPR32:$Rt), ro_indexed32:$addr)]>;
-def STRDro : Store64RO<0b11,   1, 0b00, FPR64, "str",
-                            [(store (f64 FPR64:$Rt), ro_indexed64:$addr)]>;
-def STRQro : Store128RO<0b00,   1, 0b10, FPR128, "str", []> {
-  let mayStore = 1;
-}
-
-// Match all store 64 bits width whose type is compatible with FPR64
-def : Pat<(store (v2f32 FPR64:$Rn), ro_indexed64:$addr),
-          (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-def : Pat<(store (v1f64 FPR64:$Rn), ro_indexed64:$addr),
-          (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-def : Pat<(store (v8i8 FPR64:$Rn), ro_indexed64:$addr),
-          (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-def : Pat<(store (v4i16 FPR64:$Rn), ro_indexed64:$addr),
-          (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-def : Pat<(store (v2i32 FPR64:$Rn), ro_indexed64:$addr),
-          (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-def : Pat<(store (v1i64 FPR64:$Rn), ro_indexed64:$addr),
-          (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-
-// Match all store 128 bits width whose type is compatible with FPR128
-def : Pat<(store (v4f32 FPR128:$Rn), ro_indexed128:$addr),
-          (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (v2f64 FPR128:$Rn), ro_indexed128:$addr),
-          (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (v16i8 FPR128:$Rn), ro_indexed128:$addr),
-          (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (v8i16 FPR128:$Rn), ro_indexed128:$addr),
-          (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (v4i32 FPR128:$Rn), ro_indexed128:$addr),
-          (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (v2i64 FPR128:$Rn), ro_indexed128:$addr),
-          (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (f128 FPR128:$Rn),  ro_indexed128:$addr),
-          (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-
-//---
-// (unsigned immediate)
-def STRXui : StoreUI<0b11, 0, 0b00, GPR64, am_indexed64, "str",
-                     [(store GPR64:$Rt, am_indexed64:$addr)]>;
-def STRWui : StoreUI<0b10, 0, 0b00, GPR32, am_indexed32, "str",
-                     [(store GPR32:$Rt, am_indexed32:$addr)]>;
-def STRBui : StoreUI<0b00, 1, 0b00, FPR8, am_indexed8, "str",
-                     [(store FPR8:$Rt, am_indexed8:$addr)]>;
-def STRHui : StoreUI<0b01, 1, 0b00, FPR16, am_indexed16, "str",
-                     [(store FPR16:$Rt, am_indexed16:$addr)]>;
-def STRSui : StoreUI<0b10, 1, 0b00, FPR32, am_indexed32, "str",
-                     [(store (f32 FPR32:$Rt), am_indexed32:$addr)]>;
-def STRDui : StoreUI<0b11, 1, 0b00, FPR64, am_indexed64, "str",
-                     [(store (f64 FPR64:$Rt), am_indexed64:$addr)]>;
-def STRQui : StoreUI<0b00, 1, 0b10, FPR128, am_indexed128, "str", []> {
-  let mayStore = 1;
-}
-
-// Match all store 64 bits width whose type is compatible with FPR64
-def : Pat<(store (v2f32 FPR64:$Rn), am_indexed64:$addr),
-          (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-def : Pat<(store (v1f64 FPR64:$Rn), am_indexed64:$addr),
-          (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-def : Pat<(store (v8i8 FPR64:$Rn), am_indexed64:$addr),
-          (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-def : Pat<(store (v4i16 FPR64:$Rn), am_indexed64:$addr),
-          (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-def : Pat<(store (v2i32 FPR64:$Rn), am_indexed64:$addr),
-          (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-def : Pat<(store (v1i64 FPR64:$Rn), am_indexed64:$addr),
-          (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-
-// Match all store 128 bits width whose type is compatible with FPR128
-def : Pat<(store (v4f32 FPR128:$Rn), am_indexed128:$addr),
-          (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (v2f64 FPR128:$Rn), am_indexed128:$addr),
-          (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (v16i8 FPR128:$Rn), am_indexed128:$addr),
-          (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (v8i16 FPR128:$Rn), am_indexed128:$addr),
-          (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (v4i32 FPR128:$Rn), am_indexed128:$addr),
-          (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (v2i64 FPR128:$Rn), am_indexed128:$addr),
-          (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (f128  FPR128:$Rn), am_indexed128:$addr),
-          (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-
-def STRHHui : StoreUI<0b01, 0, 0b00, GPR32, am_indexed16, "strh",
-                      [(truncstorei16 GPR32:$Rt, am_indexed16:$addr)]>;
-def STRBBui : StoreUI<0b00, 0, 0b00, GPR32, am_indexed8,  "strb",
-                      [(truncstorei8 GPR32:$Rt, am_indexed8:$addr)]>;
-
-// truncstore i64
-def : Pat<(truncstorei32 GPR64:$Rt, am_indexed32:$addr),
-  (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed32:$addr)>;
-def : Pat<(truncstorei16 GPR64:$Rt, am_indexed16:$addr),
-  (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed16:$addr)>;
-def : Pat<(truncstorei8 GPR64:$Rt, am_indexed8:$addr),
-  (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed8:$addr)>;
-
-} // AddedComplexity = 10
-
-//---
-// (unscaled immediate)
-def STURXi : StoreUnscaled<0b11, 0, 0b00, GPR64, am_unscaled64, "stur",
-                           [(store GPR64:$Rt, am_unscaled64:$addr)]>;
-def STURWi : StoreUnscaled<0b10, 0, 0b00, GPR32, am_unscaled32, "stur",
-                           [(store GPR32:$Rt, am_unscaled32:$addr)]>;
-def STURBi : StoreUnscaled<0b00, 1, 0b00, FPR8,  am_unscaled8, "stur",
-                           [(store FPR8:$Rt, am_unscaled8:$addr)]>;
-def STURHi : StoreUnscaled<0b01, 1, 0b00, FPR16, am_unscaled16, "stur",
-                           [(store FPR16:$Rt, am_unscaled16:$addr)]>;
-def STURSi : StoreUnscaled<0b10, 1, 0b00, FPR32, am_unscaled32, "stur",
-                           [(store (f32 FPR32:$Rt), am_unscaled32:$addr)]>;
-def STURDi : StoreUnscaled<0b11, 1, 0b00, FPR64, am_unscaled64, "stur",
-                           [(store (f64 FPR64:$Rt), am_unscaled64:$addr)]>;
-def STURQi : StoreUnscaled<0b00, 1, 0b10, FPR128, am_unscaled128, "stur",
-                           [(store (v2f64 FPR128:$Rt), am_unscaled128:$addr)]>;
-def STURHHi : StoreUnscaled<0b01, 0, 0b00, GPR32, am_unscaled16, "sturh",
-                            [(truncstorei16 GPR32:$Rt, am_unscaled16:$addr)]>;
-def STURBBi : StoreUnscaled<0b00, 0, 0b00, GPR32, am_unscaled8, "sturb",
-                            [(truncstorei8 GPR32:$Rt, am_unscaled8:$addr)]>;
-
-// Match all store 64 bits width whose type is compatible with FPR64
-def : Pat<(store (v2f32 FPR64:$Rn), am_unscaled64:$addr),
-          (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-def : Pat<(store (v1f64 FPR64:$Rn), am_unscaled64:$addr),
-          (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-def : Pat<(store (v8i8 FPR64:$Rn), am_unscaled64:$addr),
-          (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-def : Pat<(store (v4i16 FPR64:$Rn), am_unscaled64:$addr),
-          (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-def : Pat<(store (v2i32 FPR64:$Rn), am_unscaled64:$addr),
-          (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-def : Pat<(store (v1i64 FPR64:$Rn), am_unscaled64:$addr),
-          (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-
-// Match all store 128 bits width whose type is compatible with FPR128
-def : Pat<(store (v4f32 FPR128:$Rn), am_unscaled128:$addr),
-          (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (v2f64 FPR128:$Rn), am_unscaled128:$addr),
-          (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (v16i8 FPR128:$Rn), am_unscaled128:$addr),
-          (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (v8i16 FPR128:$Rn), am_unscaled128:$addr),
-          (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (v4i32 FPR128:$Rn), am_unscaled128:$addr),
-          (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (v2i64 FPR128:$Rn), am_unscaled128:$addr),
-          (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (f128  FPR128:$Rn), am_unscaled128:$addr),
-          (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-
-// unscaled i64 truncating stores
-def : Pat<(truncstorei32 GPR64:$Rt, am_unscaled32:$addr),
-  (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled32:$addr)>;
-def : Pat<(truncstorei16 GPR64:$Rt, am_unscaled16:$addr),
-  (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled16:$addr)>;
-def : Pat<(truncstorei8 GPR64:$Rt, am_unscaled8:$addr),
-  (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled8:$addr)>;
-
-//---
-// STR mnemonics fall back to STUR for negative or unaligned offsets.
-def : InstAlias<"str $Rt, $addr", (STURXi GPR64:$Rt, am_unscaled_fb64:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURWi GPR32:$Rt, am_unscaled_fb32:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURBi FPR8:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURHi FPR16:$Rt, am_unscaled_fb16:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURSi FPR32:$Rt, am_unscaled_fb32:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURDi FPR64:$Rt, am_unscaled_fb64:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURQi FPR128:$Rt, am_unscaled_fb128:$addr)>;
-
-def : InstAlias<"strb $Rt, $addr", (STURBBi GPR32:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"strh $Rt, $addr", (STURHHi GPR32:$Rt, am_unscaled_fb16:$addr)>;
-
-//---
-// (unscaled immediate, unprivileged)
-def STTRWi : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
-def STTRXi : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
-
-def STTRHi : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
-def STTRBi : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
-
-//---
-// (immediate pre-indexed)
-def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str">;
-def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str">;
-def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8,  "str">;
-def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str">;
-def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str">;
-def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str">;
-def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str">;
-
-def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb">;
-def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh">;
-
-// ISel pseudos and patterns. See expanded comment on StorePreIdxPseudo.
-defm STRDpre : StorePreIdxPseudo<FPR64, f64, pre_store>;
-defm STRSpre : StorePreIdxPseudo<FPR32, f32, pre_store>;
-defm STRXpre : StorePreIdxPseudo<GPR64, i64, pre_store>;
-defm STRWpre : StorePreIdxPseudo<GPR32, i32, pre_store>;
-defm STRHHpre : StorePreIdxPseudo<GPR32, i32, pre_truncsti16>;
-defm STRBBpre : StorePreIdxPseudo<GPR32, i32, pre_truncsti8>;
-// truncstore i64
-def : Pat<(pre_truncsti32 GPR64:$Rt, am_noindex:$addr, simm9:$off),
-  (STRWpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
-                  simm9:$off)>;
-def : Pat<(pre_truncsti16 GPR64:$Rt, am_noindex:$addr, simm9:$off),
-  (STRHHpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
-                  simm9:$off)>;
-def : Pat<(pre_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off),
-  (STRBBpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
-                  simm9:$off)>;
-
-//---
-// (immediate post-indexed)
-def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str">;
-def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str">;
-def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8,  "str">;
-def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str">;
-def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str">;
-def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str">;
-def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str">;
-
-def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb">;
-def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh">;
-
-// ISel pseudos and patterns. See expanded comment on StorePostIdxPseudo.
-defm STRDpost : StorePostIdxPseudo<FPR64, f64, post_store, STRDpost>;
-defm STRSpost : StorePostIdxPseudo<FPR32, f32, post_store, STRSpost>;
-defm STRXpost : StorePostIdxPseudo<GPR64, i64, post_store, STRXpost>;
-defm STRWpost : StorePostIdxPseudo<GPR32, i32, post_store, STRWpost>;
-defm STRHHpost : StorePostIdxPseudo<GPR32, i32, post_truncsti16, STRHHpost>;
-defm STRBBpost : StorePostIdxPseudo<GPR32, i32, post_truncsti8, STRBBpost>;
-// truncstore i64
-def : Pat<(post_truncsti32 GPR64:$Rt, am_noindex:$addr, simm9:$off),
-  (STRWpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
-                  simm9:$off)>;
-def : Pat<(post_truncsti16 GPR64:$Rt, am_noindex:$addr, simm9:$off),
-  (STRHHpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
-                  simm9:$off)>;
-def : Pat<(post_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off),
-  (STRBBpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
-                  simm9:$off)>;
-
-
-//===----------------------------------------------------------------------===//
-// Load/store exclusive instructions.
-//===----------------------------------------------------------------------===//
-
-def LDARW  : LoadAcquire   <0b10, 1, 1, 0, 1, GPR32, "ldar">;
-def LDARX  : LoadAcquire   <0b11, 1, 1, 0, 1, GPR64, "ldar">;
-def LDARB  : LoadAcquire   <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
-def LDARH  : LoadAcquire   <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
-
-def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
-def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
-def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
-def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
-
-def LDXRW  : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
-def LDXRX  : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
-def LDXRB  : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
-def LDXRH  : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
-
-def STLRW  : StoreRelease  <0b10, 1, 0, 0, 1, GPR32, "stlr">;
-def STLRX  : StoreRelease  <0b11, 1, 0, 0, 1, GPR64, "stlr">;
-def STLRB  : StoreRelease  <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
-def STLRH  : StoreRelease  <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
-
-def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
-def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
-def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
-def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
-
-def STXRW  : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
-def STXRX  : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
-def STXRB  : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
-def STXRH  : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
-
-def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
-def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
-
-def LDXPW  : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
-def LDXPX  : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
-
-def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
-def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
-
-def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
-def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
-
-//===----------------------------------------------------------------------===//
-// Scaled floating point to integer conversion instructions.
-//===----------------------------------------------------------------------===//
-
-defm FCVTAS : FPToInteger<0b00, 0b100, "fcvtas", int_arm64_neon_fcvtas>;
-defm FCVTAU : FPToInteger<0b00, 0b101, "fcvtau", int_arm64_neon_fcvtau>;
-defm FCVTMS : FPToInteger<0b10, 0b000, "fcvtms", int_arm64_neon_fcvtms>;
-defm FCVTMU : FPToInteger<0b10, 0b001, "fcvtmu", int_arm64_neon_fcvtmu>;
-defm FCVTNS : FPToInteger<0b00, 0b000, "fcvtns", int_arm64_neon_fcvtns>;
-defm FCVTNU : FPToInteger<0b00, 0b001, "fcvtnu", int_arm64_neon_fcvtnu>;
-defm FCVTPS : FPToInteger<0b01, 0b000, "fcvtps", int_arm64_neon_fcvtps>;
-defm FCVTPU : FPToInteger<0b01, 0b001, "fcvtpu", int_arm64_neon_fcvtpu>;
-defm FCVTZS : FPToInteger<0b11, 0b000, "fcvtzs", fp_to_sint>;
-defm FCVTZU : FPToInteger<0b11, 0b001, "fcvtzu", fp_to_uint>;
-let isCodeGenOnly = 1 in {
-defm FCVTZS_Int : FPToInteger<0b11, 0b000, "fcvtzs", int_arm64_neon_fcvtzs>;
-defm FCVTZU_Int : FPToInteger<0b11, 0b001, "fcvtzu", int_arm64_neon_fcvtzu>;
-}
-
-//===----------------------------------------------------------------------===//
-// Scaled integer to floating point conversion instructions.
-//===----------------------------------------------------------------------===//
-
-defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>;
-defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
-
-//===----------------------------------------------------------------------===//
-// Unscaled integer to floating point conversion instruction.
-//===----------------------------------------------------------------------===//
-
-defm FMOV : UnscaledConversion<"fmov">;
-
-def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>;
-def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
-
-def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
-
-def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))), (FMOVDXr V64:$Vn)>;
-def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-
-def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), (COPY_TO_REGCLASS GPR32:$Xn,
-                                                                FPR32)>;
-def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), (COPY_TO_REGCLASS FPR32:$Xn,
-                                                                GPR32)>;
-def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), (COPY_TO_REGCLASS GPR64:$Xn,
-                                                                FPR64)>;
-def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), (COPY_TO_REGCLASS FPR64:$Xn,
-                                                                GPR64)>;
-
-//===----------------------------------------------------------------------===//
-// Floating point conversion instruction.
-//===----------------------------------------------------------------------===//
-
-defm FCVT : FPConversion<"fcvt">;
-
-def : Pat<(f32_to_f16 FPR32:$Rn),
-          (i32 (COPY_TO_REGCLASS
-                   (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
-                   GPR32))>;
-
-def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
-                          [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
-
-//===----------------------------------------------------------------------===//
-// Floating point single operand instructions.
-//===----------------------------------------------------------------------===//
-
-defm FABS   : SingleOperandFPData<0b0001, "fabs", fabs>;
-defm FMOV   : SingleOperandFPData<0b0000, "fmov">;
-defm FNEG   : SingleOperandFPData<0b0010, "fneg", fneg>;
-defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>;
-defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
-defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
-defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_arm64_neon_frintn>;
-defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
-
-def : Pat<(v1f64 (int_arm64_neon_frintn (v1f64 FPR64:$Rn))),
-          (FRINTNDr FPR64:$Rn)>;
-
-// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior
-// in the C spec. Setting hasSideEffects ensures it is not DCE'd.
-// <rdar://problem/13715968>
-// TODO: We should really model the FPSR flags correctly. This is really ugly.
-let hasSideEffects = 1 in {
-defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
-}
-
-defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
-
-let SchedRW = [WriteFDiv] in {
-defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
-}
-
-//===----------------------------------------------------------------------===//
-// Floating point two operand instructions.
-//===----------------------------------------------------------------------===//
-
-defm FADD   : TwoOperandFPData<0b0010, "fadd", fadd>;
-let SchedRW = [WriteFDiv] in {
-defm FDIV   : TwoOperandFPData<0b0001, "fdiv", fdiv>;
-}
-defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_arm64_neon_fmaxnm>;
-defm FMAX   : TwoOperandFPData<0b0100, "fmax", ARM64fmax>;
-defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_arm64_neon_fminnm>;
-defm FMIN   : TwoOperandFPData<0b0101, "fmin", ARM64fmin>;
-let SchedRW = [WriteFMul] in {
-defm FMUL   : TwoOperandFPData<0b0000, "fmul", fmul>;
-defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
-}
-defm FSUB   : TwoOperandFPData<0b0011, "fsub", fsub>;
-
-def : Pat<(v1f64 (ARM64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
-          (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (ARM64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
-          (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_arm64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
-          (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_arm64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
-          (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
-
-//===----------------------------------------------------------------------===//
-// Floating point three operand instructions.
-//===----------------------------------------------------------------------===//
-
-defm FMADD  : ThreeOperandFPData<0, 0, "fmadd", fma>;
-defm FMSUB  : ThreeOperandFPData<0, 1, "fmsub",
-     TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
-defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
-     TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
-defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
-     TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
-
-// The following def pats catch the case where the LHS of an FMA is negated.
-// The TriOpFrag above catches the case where the middle operand is negated.
-def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Rd)),
-          (FMSUBSrrr FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
-
-def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Rd)),
-          (FMSUBDrrr FPR64:$Rd, FPR64:$Rn, FPR64:$Rm)>;
-
-//===----------------------------------------------------------------------===//
-// Floating point comparison instructions.
-//===----------------------------------------------------------------------===//
-
-defm FCMPE : FPComparison<1, "fcmpe">;
-defm FCMP  : FPComparison<0, "fcmp", ARM64fcmp>;
-
-//===----------------------------------------------------------------------===//
-// Floating point conditional comparison instructions.
-//===----------------------------------------------------------------------===//
-
-defm FCCMPE : FPCondComparison<1, "fccmpe">;
-defm FCCMP  : FPCondComparison<0, "fccmp">;
-
-//===----------------------------------------------------------------------===//
-// Floating point conditional select instruction.
-//===----------------------------------------------------------------------===//
-
-defm FCSEL : FPCondSelect<"fcsel">;
-
-// CSEL instructions providing f128 types need to be handled by a
-// pseudo-instruction since the eventual code will need to introduce basic
-// blocks and control flow.
-def F128CSEL : Pseudo<(outs FPR128:$Rd),
-                      (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
-                      [(set (f128 FPR128:$Rd),
-                            (ARM64csel FPR128:$Rn, FPR128:$Rm,
-                                       (i32 imm:$cond), CPSR))]> {
-  let Uses = [CPSR];
-  let usesCustomInserter = 1;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Floating point immediate move.
-//===----------------------------------------------------------------------===//
-
-let isReMaterializable = 1 in {
-defm FMOV : FPMoveImmediate<"fmov">;
-}
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD two vector instructions.
-//===----------------------------------------------------------------------===//
-
-defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_arm64_neon_abs>;
-defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_arm64_neon_cls>;
-defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
-defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", ARM64cmeqz>;
-defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", ARM64cmgez>;
-defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", ARM64cmgtz>;
-defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", ARM64cmlez>;
-defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", ARM64cmltz>;
-defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
-defm FABS   : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
-
-defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>;
-defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", ARM64fcmgez>;
-defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>;
-defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", ARM64fcmlez>;
-defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", ARM64fcmltz>;
-defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_arm64_neon_fcvtas>;
-defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_arm64_neon_fcvtau>;
-defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
-def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (v4i16 V64:$Rn))),
-          (FCVTLv4i16 V64:$Rn)>;
-def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
-                                                              (i64 4)))),
-          (FCVTLv8i16 V128:$Rn)>;
-def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
-def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
-                                                    (i64 2))))),
-          (FCVTLv4i32 V128:$Rn)>;
-
-defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_arm64_neon_fcvtms>;
-defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_arm64_neon_fcvtmu>;
-defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_arm64_neon_fcvtns>;
-defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_arm64_neon_fcvtnu>;
-defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
-def : Pat<(v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
-          (FCVTNv4i16 V128:$Rn)>;
-def : Pat<(concat_vectors V64:$Rd,
-                          (v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
-          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
-def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))),
-          (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_arm64_neon_fcvtps>;
-defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_arm64_neon_fcvtpu>;
-defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
-                                        int_arm64_neon_fcvtxn>;
-defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
-defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
-let isCodeGenOnly = 1 in {
-defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs",
-                                       int_arm64_neon_fcvtzs>;
-defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu",
-                                       int_arm64_neon_fcvtzu>;
-}
-defm FNEG   : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
-defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_arm64_neon_frecpe>;
-defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>;
-defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
-defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
-defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_arm64_neon_frintn>;
-defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
-defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
-defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
-defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_arm64_neon_frsqrte>;
-defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
-defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
-                               UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
-defm NOT    : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
-// Aliases for MVN -> NOT.
-def : InstAlias<"mvn.8b $Vd, $Vn", (NOTv8i8 V64:$Vd, V64:$Vn)>;
-def : InstAlias<"mvn.16b $Vd, $Vn", (NOTv16i8 V128:$Vd, V128:$Vn)>;
-def : InstAlias<"mvn $Vd.8b, $Vn.8b", (NOTv8i8 V64:$Vd, V64:$Vn)>;
-def : InstAlias<"mvn $Vd.16b, $Vn.16b", (NOTv16i8 V128:$Vd, V128:$Vn)>;
-
-def : Pat<(ARM64neg (v8i8  V64:$Rn)),  (NEGv8i8  V64:$Rn)>;
-def : Pat<(ARM64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>;
-def : Pat<(ARM64neg (v4i16 V64:$Rn)),  (NEGv4i16 V64:$Rn)>;
-def : Pat<(ARM64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>;
-def : Pat<(ARM64neg (v2i32 V64:$Rn)),  (NEGv2i32 V64:$Rn)>;
-def : Pat<(ARM64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>;
-def : Pat<(ARM64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>;
-
-def : Pat<(ARM64not (v8i8 V64:$Rn)),   (NOTv8i8  V64:$Rn)>;
-def : Pat<(ARM64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(ARM64not (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
-def : Pat<(ARM64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(ARM64not (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
-def : Pat<(ARM64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(ARM64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-
-def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
-def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
-def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-
-defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_arm64_neon_rbit>;
-defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", ARM64rev16>;
-defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", ARM64rev32>;
-defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", ARM64rev64>;
-defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
-       BinOpFrag<(add node:$LHS, (int_arm64_neon_saddlp node:$RHS))> >;
-defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_arm64_neon_saddlp>;
-defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
-defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
-defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_arm64_neon_sqabs>;
-defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_arm64_neon_sqneg>;
-defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_arm64_neon_sqxtn>;
-defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_arm64_neon_sqxtun>;
-defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_arm64_neon_suqadd>;
-defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
-       BinOpFrag<(add node:$LHS, (int_arm64_neon_uaddlp node:$RHS))> >;
-defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp",
-                    int_arm64_neon_uaddlp>;
-defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
-defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_arm64_neon_uqxtn>;
-defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_arm64_neon_urecpe>;
-defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_arm64_neon_ursqrte>;
-defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_arm64_neon_usqadd>;
-defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
-
-def : Pat<(v2f32 (ARM64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
-def : Pat<(v4f32 (ARM64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
-
-// Patterns for vector long shift (by element width). These need to match all
-// three of zext, sext and anyext so it's easier to pull the patterns out of the
-// definition.
-multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
-  def : Pat<(ARM64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
-            (SHLLv8i8 V64:$Rn)>;
-  def : Pat<(ARM64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
-            (SHLLv16i8 V128:$Rn)>;
-  def : Pat<(ARM64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
-            (SHLLv4i16 V64:$Rn)>;
-  def : Pat<(ARM64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
-            (SHLLv8i16 V128:$Rn)>;
-  def : Pat<(ARM64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
-            (SHLLv2i32 V64:$Rn)>;
-  def : Pat<(ARM64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
-            (SHLLv4i32 V128:$Rn)>;
-}
-
-defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
-defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
-defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD three vector instructions.
-//===----------------------------------------------------------------------===//
-
-defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
-defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", int_arm64_neon_addp>;
-defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", ARM64cmeq>;
-defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", ARM64cmge>;
-defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", ARM64cmgt>;
-defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", ARM64cmhi>;
-defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", ARM64cmhs>;
-defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", ARM64cmtst>;
-defm FABD    : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_arm64_neon_fabd>;
-defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_arm64_neon_facge>;
-defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_arm64_neon_facgt>;
-defm FADDP   : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_arm64_neon_addp>;
-defm FADD    : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>;
-defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>;
-defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>;
-defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>;
-defm FDIV    : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
-defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_arm64_neon_fmaxnmp>;
-defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_arm64_neon_fmaxnm>;
-defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_arm64_neon_fmaxp>;
-defm FMAX    : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", ARM64fmax>;
-defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_arm64_neon_fminnmp>;
-defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_arm64_neon_fminnm>;
-defm FMINP   : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_arm64_neon_fminp>;
-defm FMIN    : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", ARM64fmin>;
-
-// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
-// instruction expects the addend first, while the fma intrinsic puts it last.
-defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla",
-            TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
-defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls",
-            TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
-
-// The following def pats catch the case where the LHS of an FMA is negated.
-// The TriOpFrag above catches the case where the middle operand is negated.
-def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)),
-          (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>;
-
-def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
-          (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>;
-
-def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
-          (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
-
-defm FMULX    : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_arm64_neon_fmulx>;
-defm FMUL     : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>;
-defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_arm64_neon_frecps>;
-defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_arm64_neon_frsqrts>;
-defm FSUB     : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>;
-defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
-                      TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
-defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
-                      TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >;
-defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
-defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_arm64_neon_pmul>;
-defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
-      TriOpFrag<(add node:$LHS, (int_arm64_neon_sabd node:$MHS, node:$RHS))> >;
-defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_arm64_neon_sabd>;
-defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_arm64_neon_shadd>;
-defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_arm64_neon_shsub>;
-defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_arm64_neon_smaxp>;
-defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_arm64_neon_smax>;
-defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_arm64_neon_sminp>;
-defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_arm64_neon_smin>;
-defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_arm64_neon_sqadd>;
-defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_arm64_neon_sqdmulh>;
-defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_arm64_neon_sqrdmulh>;
-defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_arm64_neon_sqrshl>;
-defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_arm64_neon_sqshl>;
-defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_arm64_neon_sqsub>;
-defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_arm64_neon_srhadd>;
-defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_arm64_neon_srshl>;
-defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_arm64_neon_sshl>;
-defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
-defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
-      TriOpFrag<(add node:$LHS, (int_arm64_neon_uabd node:$MHS, node:$RHS))> >;
-defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_arm64_neon_uabd>;
-defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_arm64_neon_uhadd>;
-defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_arm64_neon_uhsub>;
-defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_arm64_neon_umaxp>;
-defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_arm64_neon_umax>;
-defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_arm64_neon_uminp>;
-defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_arm64_neon_umin>;
-defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_arm64_neon_uqadd>;
-defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_arm64_neon_uqrshl>;
-defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_arm64_neon_uqshl>;
-defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_arm64_neon_uqsub>;
-defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_arm64_neon_urhadd>;
-defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_arm64_neon_urshl>;
-defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_arm64_neon_ushl>;
-
-defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
-defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
-                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
-defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">;
-defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", ARM64bit>;
-defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl",
-    TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>;
-defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
-defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
-                                  BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
-defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
-
-// FIXME: the .16b and .8b variantes should be emitted by the
-// AsmWriter. TableGen's AsmWriter-generator doesn't deal with variant syntaxes
-// in aliases yet though.
-def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
-                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
-def : InstAlias<"{mov\t$dst.8h, $src.8h|mov.8h\t$dst, $src}",
-                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
-def : InstAlias<"{mov\t$dst.4s, $src.4s|mov.4s\t$dst, $src}",
-                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
-def : InstAlias<"{mov\t$dst.2d, $src.2d|mov.2d\t$dst, $src}",
-                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
-
-def : InstAlias<"{mov\t$dst.8b, $src.8b|mov.8b\t$dst, $src}",
-                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
-def : InstAlias<"{mov\t$dst.4h, $src.4h|mov.4h\t$dst, $src}",
-                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
-def : InstAlias<"{mov\t$dst.2s, $src.2s|mov.2s\t$dst, $src}",
-                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
-def : InstAlias<"{mov\t$dst.1d, $src.1d|mov.1d\t$dst, $src}",
-                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
-
-def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
-                "|cmls.8b\t$dst, $src1, $src2}",
-                (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
-                "|cmls.16b\t$dst, $src1, $src2}",
-                (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
-                "|cmls.4h\t$dst, $src1, $src2}",
-                (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
-                "|cmls.8h\t$dst, $src1, $src2}",
-                (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
-                "|cmls.2s\t$dst, $src1, $src2}",
-                (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
-                "|cmls.4s\t$dst, $src1, $src2}",
-                (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
-                "|cmls.2d\t$dst, $src1, $src2}",
-                (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
-                "|cmlo.8b\t$dst, $src1, $src2}",
-                (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
-                "|cmlo.16b\t$dst, $src1, $src2}",
-                (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
-                "|cmlo.4h\t$dst, $src1, $src2}",
-                (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
-                "|cmlo.8h\t$dst, $src1, $src2}",
-                (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
-                "|cmlo.2s\t$dst, $src1, $src2}",
-                (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
-                "|cmlo.4s\t$dst, $src1, $src2}",
-                (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
-                "|cmlo.2d\t$dst, $src1, $src2}",
-                (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
-                "|cmle.8b\t$dst, $src1, $src2}",
-                (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
-                "|cmle.16b\t$dst, $src1, $src2}",
-                (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
-                "|cmle.4h\t$dst, $src1, $src2}",
-                (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
-                "|cmle.8h\t$dst, $src1, $src2}",
-                (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
-                "|cmle.2s\t$dst, $src1, $src2}",
-                (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
-                "|cmle.4s\t$dst, $src1, $src2}",
-                (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
-                "|cmle.2d\t$dst, $src1, $src2}",
-                (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
-                "|cmlt.8b\t$dst, $src1, $src2}",
-                (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
-                "|cmlt.16b\t$dst, $src1, $src2}",
-                (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
-                "|cmlt.4h\t$dst, $src1, $src2}",
-                (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
-                "|cmlt.8h\t$dst, $src1, $src2}",
-                (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
-                "|cmlt.2s\t$dst, $src1, $src2}",
-                (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
-                "|cmlt.4s\t$dst, $src1, $src2}",
-                (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
-                "|cmlt.2d\t$dst, $src1, $src2}",
-                (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
-                "|fcmle.2s\t$dst, $src1, $src2}",
-                (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
-                "|fcmle.4s\t$dst, $src1, $src2}",
-                (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
-                "|fcmle.2d\t$dst, $src1, $src2}",
-                (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
-                "|fcmlt.2s\t$dst, $src1, $src2}",
-                (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
-                "|fcmlt.4s\t$dst, $src1, $src2}",
-                (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
-                "|fcmlt.2d\t$dst, $src1, $src2}",
-                (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
-                "|facle.2s\t$dst, $src1, $src2}",
-                (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
-                "|facle.4s\t$dst, $src1, $src2}",
-                (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
-                "|facle.2d\t$dst, $src1, $src2}",
-                (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
-                "|faclt.2s\t$dst, $src1, $src2}",
-                (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
-                "|faclt.4s\t$dst, $src1, $src2}",
-                (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
-                "|faclt.2d\t$dst, $src1, $src2}",
-                (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD three scalar instructions.
-//===----------------------------------------------------------------------===//
-
-defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
-defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", ARM64cmeq>;
-defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", ARM64cmge>;
-defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", ARM64cmgt>;
-defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", ARM64cmhi>;
-defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", ARM64cmhs>;
-defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", ARM64cmtst>;
-defm FABD     : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_arm64_sisd_fabd>;
-def : Pat<(v1f64 (int_arm64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
-          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
-defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge",
-                                     int_arm64_neon_facge>;
-defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt",
-                                     int_arm64_neon_facgt>;
-defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>;
-defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>;
-defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>;
-defm FMULX    : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_arm64_neon_fmulx>;
-defm FRECPS   : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_arm64_neon_frecps>;
-defm FRSQRTS  : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_arm64_neon_frsqrts>;
-defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_arm64_neon_sqadd>;
-defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_arm64_neon_sqdmulh>;
-defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_arm64_neon_sqrdmulh>;
-defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_arm64_neon_sqrshl>;
-defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_arm64_neon_sqshl>;
-defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_arm64_neon_sqsub>;
-defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_arm64_neon_srshl>;
-defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_arm64_neon_sshl>;
-defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
-defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_arm64_neon_uqadd>;
-defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_arm64_neon_uqrshl>;
-defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_arm64_neon_uqshl>;
-defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_arm64_neon_uqsub>;
-defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_arm64_neon_urshl>;
-defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_arm64_neon_ushl>;
-
-def : InstAlias<"cmls $dst, $src1, $src2",
-                (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"cmle $dst, $src1, $src2",
-                (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"cmlo $dst, $src1, $src2",
-                (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"cmlt $dst, $src1, $src2",
-                (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"fcmle $dst, $src1, $src2",
-                (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>;
-def : InstAlias<"fcmle $dst, $src1, $src2",
-                (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"fcmlt $dst, $src1, $src2",
-                (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>;
-def : InstAlias<"fcmlt $dst, $src1, $src2",
-                (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"facle $dst, $src1, $src2",
-                (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>;
-def : InstAlias<"facle $dst, $src1, $src2",
-                (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"faclt $dst, $src1, $src2",
-                (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>;
-def : InstAlias<"faclt $dst, $src1, $src2",
-                (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD three scalar instructions (mixed operands).
-//===----------------------------------------------------------------------===//
-defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
-                                       int_arm64_neon_sqdmulls_scalar>;
-defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
-defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
-
-def : Pat<(i64 (int_arm64_neon_sqadd (i64 FPR64:$Rd),
-                   (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
-                                                        (i32 FPR32:$Rm))))),
-          (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_sqsub (i64 FPR64:$Rd),
-                   (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
-                                                        (i32 FPR32:$Rm))))),
-          (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD two scalar instructions.
-//===----------------------------------------------------------------------===//
-
-defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", int_arm64_neon_abs>;
-defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", ARM64cmeqz>;
-defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", ARM64cmgez>;
-defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", ARM64cmgtz>;
-defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", ARM64cmlez>;
-defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", ARM64cmltz>;
-defm FCMEQ  : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>;
-defm FCMGE  : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", ARM64fcmgez>;
-defm FCMGT  : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>;
-defm FCMLE  : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", ARM64fcmlez>;
-defm FCMLT  : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", ARM64fcmltz>;
-defm FCVTAS : SIMDTwoScalarSD<   0, 0, 0b11100, "fcvtas">;
-defm FCVTAU : SIMDTwoScalarSD<   1, 0, 0b11100, "fcvtau">;
-defm FCVTMS : SIMDTwoScalarSD<   0, 0, 0b11011, "fcvtms">;
-defm FCVTMU : SIMDTwoScalarSD<   1, 0, 0b11011, "fcvtmu">;
-defm FCVTNS : SIMDTwoScalarSD<   0, 0, 0b11010, "fcvtns">;
-defm FCVTNU : SIMDTwoScalarSD<   1, 0, 0b11010, "fcvtnu">;
-defm FCVTPS : SIMDTwoScalarSD<   0, 1, 0b11010, "fcvtps">;
-defm FCVTPU : SIMDTwoScalarSD<   1, 1, 0b11010, "fcvtpu">;
-def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDTwoScalarSD<   0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDTwoScalarSD<   1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDTwoScalarSD<   0, 1, 0b11101, "frecpe">;
-defm FRECPX : SIMDTwoScalarSD<   0, 1, 0b11111, "frecpx">;
-defm FRSQRTE : SIMDTwoScalarSD<  1, 1, 0b11101, "frsqrte">;
-defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
-                                 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
-defm SCVTF  : SIMDTwoScalarCVTSD<   0, 0, 0b11101, "scvtf", ARM64sitof>;
-defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_arm64_neon_sqabs>;
-defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_arm64_neon_sqneg>;
-defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_arm64_neon_scalar_sqxtn>;
-defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_arm64_neon_scalar_sqxtun>;
-defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
-                                     int_arm64_neon_suqadd>;
-defm UCVTF  : SIMDTwoScalarCVTSD<   1, 0, 0b11101, "ucvtf", ARM64uitof>;
-defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_arm64_neon_scalar_uqxtn>;
-defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
-                                    int_arm64_neon_usqadd>;
-
-def : Pat<(v1i64 (int_arm64_neon_fcvtas (v1f64 FPR64:$Rn))),
-          (FCVTASv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtau (v1f64 FPR64:$Rn))),
-          (FCVTAUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtms (v1f64 FPR64:$Rn))),
-          (FCVTMSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtmu (v1f64 FPR64:$Rn))),
-          (FCVTMUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtns (v1f64 FPR64:$Rn))),
-          (FCVTNSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtnu (v1f64 FPR64:$Rn))),
-          (FCVTNUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtps (v1f64 FPR64:$Rn))),
-          (FCVTPSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtpu (v1f64 FPR64:$Rn))),
-          (FCVTPUv1i64 FPR64:$Rn)>;
-
-def : Pat<(f32 (int_arm64_neon_frecpe (f32 FPR32:$Rn))),
-          (FRECPEv1i32 FPR32:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_frecpe (f64 FPR64:$Rn))),
-          (FRECPEv1i64 FPR64:$Rn)>;
-def : Pat<(v1f64 (int_arm64_neon_frecpe (v1f64 FPR64:$Rn))),
-          (FRECPEv1i64 FPR64:$Rn)>;
-
-def : Pat<(f32 (int_arm64_neon_frecpx (f32 FPR32:$Rn))),
-          (FRECPXv1i32 FPR32:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_frecpx (f64 FPR64:$Rn))),
-          (FRECPXv1i64 FPR64:$Rn)>;
-
-def : Pat<(f32 (int_arm64_neon_frsqrte (f32 FPR32:$Rn))),
-          (FRSQRTEv1i32 FPR32:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_frsqrte (f64 FPR64:$Rn))),
-          (FRSQRTEv1i64 FPR64:$Rn)>;
-def : Pat<(v1f64 (int_arm64_neon_frsqrte (v1f64 FPR64:$Rn))),
-          (FRSQRTEv1i64 FPR64:$Rn)>;
-
-// If an integer is about to be converted to a floating point value,
-// just load it on the floating point unit.
-// Here are the patterns for 8 and 16-bits to float.
-// 8-bits -> float.
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 ro_indexed8:$addr)))),
-           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
-                          (LDRBro ro_indexed8:$addr), bsub))>;
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 am_indexed8:$addr)))),
-           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
-                          (LDRBui am_indexed8:$addr), bsub))>;
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 am_unscaled8:$addr)))),
-           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
-                          (LDURBi am_unscaled8:$addr), bsub))>;
-// 16-bits -> float.
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 ro_indexed16:$addr)))),
-           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
-                          (LDRHro ro_indexed16:$addr), hsub))>;
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_indexed16:$addr)))),
-           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
-                          (LDRHui am_indexed16:$addr), hsub))>;
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))),
-           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
-                          (LDURHi am_unscaled16:$addr), hsub))>;
-// 32-bits are handled in target specific dag combine:
-// performIntToFpCombine.
-// 64-bits integer to 32-bits floating point, not possible with
-// UCVTF on floating point registers (both source and destination
-// must have the same size).
-
-// Here are the patterns for 8, 16, 32, and 64-bits to double.
-// 8-bits -> double.
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 ro_indexed8:$addr)))),
-           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                          (LDRBro ro_indexed8:$addr), bsub))>;
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 am_indexed8:$addr)))),
-           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                          (LDRBui am_indexed8:$addr), bsub))>;
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 am_unscaled8:$addr)))),
-           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                          (LDURBi am_unscaled8:$addr), bsub))>;
-// 16-bits -> double.
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 ro_indexed16:$addr)))),
-           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                          (LDRHro ro_indexed16:$addr), hsub))>;
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 am_indexed16:$addr)))),
-           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                          (LDRHui am_indexed16:$addr), hsub))>;
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))),
-           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                          (LDURHi am_unscaled16:$addr), hsub))>;
-// 32-bits -> double.
-def : Pat <(f64 (uint_to_fp (i32 (load ro_indexed32:$addr)))),
-           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                          (LDRSro ro_indexed32:$addr), ssub))>;
-def : Pat <(f64 (uint_to_fp (i32 (load am_indexed32:$addr)))),
-           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                          (LDRSui am_indexed32:$addr), ssub))>;
-def : Pat <(f64 (uint_to_fp (i32 (load am_unscaled32:$addr)))),
-           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                          (LDURSi am_unscaled32:$addr), ssub))>;
-// 64-bits -> double are handled in target specific dag combine:
-// performIntToFpCombine.
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD three different-sized vector instructions.
-//===----------------------------------------------------------------------===//
-
-defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_arm64_neon_addhn>;
-defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_arm64_neon_subhn>;
-defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_arm64_neon_raddhn>;
-defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_arm64_neon_rsubhn>;
-defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_arm64_neon_pmull>;
-defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
-                                             int_arm64_neon_sabd>;
-defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
-                                          int_arm64_neon_sabd>;
-defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
-            BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
-defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
-                 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
-defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
-    TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
-    TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_arm64_neon_smull>;
-defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
-                                               int_arm64_neon_sqadd>;
-defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
-                                               int_arm64_neon_sqsub>;
-defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
-                                     int_arm64_neon_sqdmull>;
-defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
-                 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
-defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
-                 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
-defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
-                                              int_arm64_neon_uabd>;
-defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
-                                          int_arm64_neon_uabd>;
-defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
-                 BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
-defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
-                 BinOpFrag<(add node:$LHS, (zext node:$RHS))>>;
-defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
-    TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
-    TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_arm64_neon_umull>;
-defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
-                 BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>;
-defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
-                 BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
-
-// Patterns for 64-bit pmull
-def : Pat<(int_arm64_neon_pmull64 V64:$Rn, V64:$Rm),
-          (PMULLv1i64 V64:$Rn, V64:$Rm)>;
-def : Pat<(int_arm64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
-                                  (vector_extract (v2i64 V128:$Rm), (i64 1))),
-          (PMULLv2i64 V128:$Rn, V128:$Rm)>;
-
-// CodeGen patterns for addhn and subhn instructions, which can actually be
-// written in LLVM IR without too much difficulty.
-
-// ADDHN
-def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
-          (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm),
-                                           (i32 16))))),
-          (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm),
-                                           (i32 32))))),
-          (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v8i8 V64:$Rd),
-                          (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm),
-                                                    (i32 8))))),
-          (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
-                            V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v4i16 V64:$Rd),
-                          (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm),
-                                                    (i32 16))))),
-          (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
-                            V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v2i32 V64:$Rd),
-                          (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm),
-                                                    (i32 32))))),
-          (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
-                            V128:$Rn, V128:$Rm)>;
-
-// SUBHN
-def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
-          (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
-                                           (i32 16))))),
-          (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
-                                           (i32 32))))),
-          (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v8i8 V64:$Rd),
-                          (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
-                                                    (i32 8))))),
-          (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
-                            V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v4i16 V64:$Rd),
-                          (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
-                                                    (i32 16))))),
-          (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
-                            V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v2i32 V64:$Rd),
-                          (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
-                                                    (i32 32))))),
-          (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
-                            V128:$Rn, V128:$Rm)>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD bitwise extract from vector instruction.
-//----------------------------------------------------------------------------
-
-defm EXT : SIMDBitwiseExtract<"ext">;
-
-def : Pat<(v4i16 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
-          (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v8i16 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
-          (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v2f32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
-          (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v4i32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v4f32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2f64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-
-// We use EXT to handle extract_subvector to copy the upper 64-bits of a
-// 128-bit vector.
-def : Pat<(v8i8  (extract_subvector V128:$Rn, (i64 8))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-
-
-//----------------------------------------------------------------------------
-// AdvSIMD zip vector
-//----------------------------------------------------------------------------
-
-defm TRN1 : SIMDZipVector<0b010, "trn1", ARM64trn1>;
-defm TRN2 : SIMDZipVector<0b110, "trn2", ARM64trn2>;
-defm UZP1 : SIMDZipVector<0b001, "uzp1", ARM64uzp1>;
-defm UZP2 : SIMDZipVector<0b101, "uzp2", ARM64uzp2>;
-defm ZIP1 : SIMDZipVector<0b011, "zip1", ARM64zip1>;
-defm ZIP2 : SIMDZipVector<0b111, "zip2", ARM64zip2>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD TBL/TBX instructions
-//----------------------------------------------------------------------------
-
-defm TBL : SIMDTableLookup<    0, "tbl">;
-defm TBX : SIMDTableLookupTied<1, "tbx">;
-
-def : Pat<(v8i8 (int_arm64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
-          (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
-def : Pat<(v16i8 (int_arm64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
-          (TBLv16i8One V128:$Ri, V128:$Rn)>;
-
-def : Pat<(v8i8 (int_arm64_neon_tbx1 (v8i8 V64:$Rd),
-                  (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
-          (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
-def : Pat<(v16i8 (int_arm64_neon_tbx1 (v16i8 V128:$Rd),
-                   (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
-          (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
-
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar CPY instruction
-//----------------------------------------------------------------------------
-
-defm CPY : SIMDScalarCPY<"cpy">;
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar pairwise instructions
-//----------------------------------------------------------------------------
-
-defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
-defm FADDP   : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">;
-defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
-defm FMAXP   : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
-defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
-defm FMINP   : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
-def : Pat<(i64 (int_arm64_neon_saddv (v2i64 V128:$Rn))),
-          (ADDPv2i64p V128:$Rn)>;
-def : Pat<(i64 (int_arm64_neon_uaddv (v2i64 V128:$Rn))),
-          (ADDPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_faddv (v2f32 V64:$Rn))),
-          (FADDPv2i32p V64:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_faddv (v4f32 V128:$Rn))),
-          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
-def : Pat<(f64 (int_arm64_neon_faddv (v2f64 V128:$Rn))),
-          (FADDPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))),
-          (FMAXNMPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fmaxnmv (v2f64 V128:$Rn))),
-          (FMAXNMPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))),
-          (FMAXPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fmaxv (v2f64 V128:$Rn))),
-          (FMAXPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))),
-          (FMINNMPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fminnmv (v2f64 V128:$Rn))),
-          (FMINNMPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))),
-          (FMINPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fminv (v2f64 V128:$Rn))),
-          (FMINPv2i64p V128:$Rn)>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD INS/DUP instructions
-//----------------------------------------------------------------------------
-
-def DUPv8i8gpr  : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>;
-def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>;
-def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>;
-def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>;
-def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>;
-def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>;
-def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>;
-
-def DUPv2i64lane : SIMDDup64FromElement;
-def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
-def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
-def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
-def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
-def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
-def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
-
-def : Pat<(v2f32 (ARM64dup (f32 FPR32:$Rn))),
-          (v2f32 (DUPv2i32lane
-            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
-            (i64 0)))>;
-def : Pat<(v4f32 (ARM64dup (f32 FPR32:$Rn))),
-          (v4f32 (DUPv4i32lane
-            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
-            (i64 0)))>;
-def : Pat<(v2f64 (ARM64dup (f64 FPR64:$Rn))),
-          (v2f64 (DUPv2i64lane
-            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
-            (i64 0)))>;
-
-def : Pat<(v2f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
-          (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
-def : Pat<(v4f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
-         (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
-def : Pat<(v2f64 (ARM64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
-          (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
-
-defm SMOV : SMov;
-defm UMOV : UMov;
-
-def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
-          (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
-def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
-          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
-def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
-          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
-def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
-          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
-def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
-          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
-def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
-          (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
-
-// Extracting i8 or i16 elements will have the zero-extend transformed to
-// an 'and' mask by type legalization since neither i8 nor i16 are legal types
-// for ARM64. Match these patterns here since UMOV already zeroes out the high
-// bits of the destination register.
-def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
-               (i32 0xff)),
-          (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
-def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
-               (i32 0xffff)),
-          (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
-
-defm INS : SIMDIns;
-
-def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
-          (INSvi8gpr (v16i8 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn)>;
-def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
-          (EXTRACT_SUBREG
-            (INSvi8gpr (v16i8 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn), dsub)>;
-
-def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
-          (INSvi16gpr (v8i16 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn)>;
-def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
-          (EXTRACT_SUBREG
-            (INSvi16gpr (v8i16 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn), dsub)>;
-
-def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
-            (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
-                                  (i32 FPR32:$Rn), ssub))>;
-def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
-            (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
-                                  (i32 FPR32:$Rn), ssub))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
-            (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
-                                  (i64 FPR64:$Rn), dsub))>;
-
-def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
-          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
-def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
-          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
-def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
-          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
-
-def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
-            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
-          (EXTRACT_SUBREG
-            (INSvi32lane
-              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
-              VectorIndexS:$imm,
-              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
-              (i64 0)),
-            dsub)>;
-def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
-            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
-          (INSvi32lane
-            V128:$Rn, VectorIndexS:$imm,
-            (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
-            (i64 0))>;
-def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
-            (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
-          (INSvi64lane
-            V128:$Rn, VectorIndexD:$imm,
-            (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
-            (i64 0))>;
-
-// Copy an element at a constant index in one vector into a constant indexed
-// element of another.
-// FIXME refactor to a shared class/dev parameterized on vector type, vector
-// index type and INS extension
-def : Pat<(v16i8 (int_arm64_neon_vcopy_lane
-                   (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
-                   VectorIndexB:$idx2)),
-          (v16i8 (INSvi8lane
-                   V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
-          )>;
-def : Pat<(v8i16 (int_arm64_neon_vcopy_lane
-                   (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
-                   VectorIndexH:$idx2)),
-          (v8i16 (INSvi16lane
-                   V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
-          )>;
-def : Pat<(v4i32 (int_arm64_neon_vcopy_lane
-                   (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
-                   VectorIndexS:$idx2)),
-          (v4i32 (INSvi32lane
-                   V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
-          )>;
-def : Pat<(v2i64 (int_arm64_neon_vcopy_lane
-                   (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
-                   VectorIndexD:$idx2)),
-          (v2i64 (INSvi64lane
-                   V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
-          )>;
-
-// Floating point vector extractions are codegen'd as either a sequence of
-// subregister extractions, possibly fed by an INS if the lane number is
-// anything other than zero.
-def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
-          (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
-def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
-          (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
-def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
-          (f64 (EXTRACT_SUBREG
-            (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
-                         V128:$Rn, VectorIndexD:$idx),
-            dsub))>;
-def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
-          (f32 (EXTRACT_SUBREG
-            (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
-                         V128:$Rn, VectorIndexS:$idx),
-            ssub))>;
-
-// All concat_vectors operations are canonicalised to act on i64 vectors for
-// ARM64. In the general case we need an instruction, which had just as well be
-// INS.
-class ConcatPat<ValueType DstTy, ValueType SrcTy>
-  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
-        (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
-                     (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
-
-def : ConcatPat<v2i64, v1i64>;
-def : ConcatPat<v2f64, v1f64>;
-def : ConcatPat<v4i32, v2i32>;
-def : ConcatPat<v4f32, v2f32>;
-def : ConcatPat<v8i16, v4i16>;
-def : ConcatPat<v16i8, v8i8>;
-
-// If the high lanes are undef, though, we can just ignore them:
-class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
-  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
-        (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
-
-def : ConcatUndefPat<v2i64, v1i64>;
-def : ConcatUndefPat<v2f64, v1f64>;
-def : ConcatUndefPat<v4i32, v2i32>;
-def : ConcatUndefPat<v4f32, v2f32>;
-def : ConcatUndefPat<v8i16, v4i16>;
-def : ConcatUndefPat<v16i8, v8i8>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD across lanes instructions
-//----------------------------------------------------------------------------
-
-defm ADDV    : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
-defm SMAXV   : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
-defm SMINV   : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
-defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
-defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
-defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
-defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
-defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_arm64_neon_fmaxnmv>;
-defm FMAXV   : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_arm64_neon_fmaxv>;
-defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_arm64_neon_fminnmv>;
-defm FMINV   : SIMDAcrossLanesS<0b01111, 1, "fminv", int_arm64_neon_fminv>;
-
-multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-  def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)),
-        (i32 (SMOVvi8to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
-          (i64 0)))>;
-  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
-        (i32 (SMOVvi8to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
-          (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)),
-        (i32 (SMOVvi8to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
-          (i64 0)))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
-        (i32 (SMOVvi8to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
-          (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)),
-          (i32 (SMOVvi16to32
-           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
-           (i64 0)))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
-          (i32 (SMOVvi16to32
-           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
-           (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)),
-        (i32 (SMOVvi16to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
-          (i64 0)))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
-        (i32 (SMOVvi16to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
-          (i64 0)))>;
-
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
-          ssub))>;
-}
-
-multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-  def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
-          ssub))>;
-  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
-          ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
-          ssub))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
-          ssub))>;
-
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)),
-          (i32 (EXTRACT_SUBREG
-            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
-            ssub))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
-          (i32 (EXTRACT_SUBREG
-            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
-            ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
-          ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
-          ssub))>;
-
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
-          ssub))>;
-
-}
-
-multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
-  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
-        (i32 (SMOVvi16to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
-          (i64 0)))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
-        (i32 (SMOVvi16to32
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
-          (i64 0)))>;
-
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
-          (i32 (EXTRACT_SUBREG
-           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
-           ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
-          ssub))>;
-
-def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
-        (i64 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
-          dsub))>;
-}
-
-multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
-                                                Intrinsic intOp> {
-  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
-          ssub))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
-          ssub))>;
-
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
-          (i32 (EXTRACT_SUBREG
-            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
-            ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
-        (i32 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
-          ssub))>;
-
-def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
-        (i64 (EXTRACT_SUBREG
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
-          dsub))>;
-}
-
-defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  int_arm64_neon_saddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_arm64_neon_saddv (v2i32 V64:$Rn))),
-          (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV",  int_arm64_neon_uaddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_arm64_neon_uaddv (v2i32 V64:$Rn))),
-          (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_arm64_neon_smaxv>;
-def : Pat<(i32 (int_arm64_neon_smaxv (v2i32 V64:$Rn))),
-           (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_arm64_neon_sminv>;
-def : Pat<(i32 (int_arm64_neon_sminv (v2i32 V64:$Rn))),
-           (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_arm64_neon_umaxv>;
-def : Pat<(i32 (int_arm64_neon_umaxv (v2i32 V64:$Rn))),
-           (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_arm64_neon_uminv>;
-def : Pat<(i32 (int_arm64_neon_uminv (v2i32 V64:$Rn))),
-           (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_arm64_neon_saddlv>;
-defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_arm64_neon_uaddlv>;
-
-// The vaddlv_s32 intrinsic gets mapped to SADDLP.
-def : Pat<(i64 (int_arm64_neon_saddlv (v2i32 V64:$Rn))),
-          (i64 (EXTRACT_SUBREG
-            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-              (SADDLPv2i32_v1i64 V64:$Rn), dsub),
-            dsub))>;
-// The vaddlv_u32 intrinsic gets mapped to UADDLP.
-def : Pat<(i64 (int_arm64_neon_uaddlv (v2i32 V64:$Rn))),
-          (i64 (EXTRACT_SUBREG
-            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
-              (UADDLPv2i32_v1i64 V64:$Rn), dsub),
-            dsub))>;
-
-//------------------------------------------------------------------------------
-// AdvSIMD modified immediate instructions
-//------------------------------------------------------------------------------
-
-// AdvSIMD BIC
-defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", ARM64bici>;
-// AdvSIMD ORR
-defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", ARM64orri>;
-
-
-// AdvSIMD FMOV
-def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8,
-                                              "fmov", ".2d",
-                       [(set (v2f64 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>;
-def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64,  fpimm8,
-                                              "fmov", ".2s",
-                       [(set (v2f32 V64:$Rd), (ARM64fmov imm0_255:$imm8))]>;
-def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8,
-                                              "fmov", ".4s",
-                       [(set (v4f32 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>;
-
-// AdvSIMD MOVI
-
-// EDIT byte mask: scalar
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
-                    [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
-// The movi_edit node has the immediate value already encoded, so we use
-// a plain imm0_255 here.
-def : Pat<(f64 (ARM64movi_edit imm0_255:$shift)),
-          (MOVID imm0_255:$shift)>;
-
-def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v8i8  immAllZerosV), (MOVID (i32 0))>;
-
-def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v8i8  immAllOnesV), (MOVID (i32 255))>;
-
-// EDIT byte mask: 2d
-
-// The movi_edit node has the immediate value already encoded, so we use
-// a plain imm0_255 in the pattern
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128,
-                                                simdimmtype10,
-                                                "movi", ".2d",
-                   [(set (v2i64 V128:$Rd), (ARM64movi_edit imm0_255:$imm8))]>;
-
-
-// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing.
-// Complexity is added to break a tie with a plain MOVI.
-let AddedComplexity = 1 in {
-def : Pat<(f32   fpimm0),
-          (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>,
-      Requires<[HasZCZ]>;
-def : Pat<(f64   fpimm0),
-          (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>,
-      Requires<[HasZCZ]>;
-}
-
-def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
-def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
-def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
-def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
-
-def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
-def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
-def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
-def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
-
-// EDIT per word & halfword: 2s, 4h, 4s, & 8h
-defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
-def : Pat<(v2i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v8i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
-
-// EDIT per word: 2s & 4s with MSL shifter
-def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
-                      [(set (v2i32 V64:$Rd),
-                            (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
-                      [(set (v4i32 V128:$Rd),
-                            (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-
-// Per byte: 8b & 16b
-def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64,  imm0_255,
-                                                 "movi", ".8b",
-                       [(set (v8i8 V64:$Rd), (ARM64movi imm0_255:$imm8))]>;
-def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255,
-                                                 "movi", ".16b",
-                       [(set (v16i8 V128:$Rd), (ARM64movi imm0_255:$imm8))]>;
-
-// AdvSIMD MVNI
-
-// EDIT per word & halfword: 2s, 4h, 4s, & 8h
-defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
-def : Pat<(v2i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v8i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
-
-// EDIT per word: 2s & 4s with MSL shifter
-def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
-                      [(set (v2i32 V64:$Rd),
-                            (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
-                      [(set (v4i32 V128:$Rd),
-                            (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD indexed element
-//----------------------------------------------------------------------------
-
-let neverHasSideEffects = 1 in {
-  defm FMLA  : SIMDFPIndexedSDTied<0, 0b0001, "fmla">;
-  defm FMLS  : SIMDFPIndexedSDTied<0, 0b0101, "fmls">;
-}
-
-// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
-// instruction expects the addend first, while the intrinsic expects it last.
-
-// On the other hand, there are quite a few valid combinatorial options due to
-// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
-defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
-           TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
-           TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
-
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
-           TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
-           TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
-           TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
-           TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
-
-multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
-  // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
-  // and DUP scalar.
-  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
-                           (ARM64duplane32 (v4f32 (fneg V128:$Rm)),
-                                           VectorIndexS:$idx))),
-            (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
-  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
-                           (v2f32 (ARM64duplane32
-                                      (v4f32 (insert_subvector undef,
-                                                 (v2f32 (fneg V64:$Rm)),
-                                                 (i32 0))),
-                                      VectorIndexS:$idx)))),
-            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
-                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
-                               VectorIndexS:$idx)>;
-  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
-                           (ARM64dup (f32 (fneg FPR32Op:$Rm))))),
-            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
-                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
-
-  // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
-  // and DUP scalar.
-  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
-                           (ARM64duplane32 (v4f32 (fneg V128:$Rm)),
-                                           VectorIndexS:$idx))),
-            (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
-                               VectorIndexS:$idx)>;
-  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
-                           (v4f32 (ARM64duplane32
-                                      (v4f32 (insert_subvector undef,
-                                                 (v2f32 (fneg V64:$Rm)),
-                                                 (i32 0))),
-                                      VectorIndexS:$idx)))),
-            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
-                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
-                               VectorIndexS:$idx)>;
-  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
-                           (ARM64dup (f32 (fneg FPR32Op:$Rm))))),
-            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
-                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
-
-  // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
-  // (DUPLANE from 64-bit would be trivial).
-  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
-                           (ARM64duplane64 (v2f64 (fneg V128:$Rm)),
-                                           VectorIndexD:$idx))),
-            (FMLSv2i64_indexed
-                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
-  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
-                           (ARM64dup (f64 (fneg FPR64Op:$Rm))))),
-            (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
-                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
-
-  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
-  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
-                         (vector_extract (v4f32 (fneg V128:$Rm)),
-                                         VectorIndexS:$idx))),
-            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
-                V128:$Rm, VectorIndexS:$idx)>;
-  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
-                         (vector_extract (v2f32 (fneg V64:$Rm)),
-                                         VectorIndexS:$idx))),
-            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
-                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
-
-  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
-  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
-                         (vector_extract (v2f64 (fneg V128:$Rm)),
-                                         VectorIndexS:$idx))),
-            (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
-                V128:$Rm, VectorIndexS:$idx)>;
-}
-
-defm : FMLSIndexedAfterNegPatterns<
-           TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
-defm : FMLSIndexedAfterNegPatterns<
-           TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
-
-defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_arm64_neon_fmulx>;
-defm FMUL  : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>;
-
-def : Pat<(v2f32 (fmul V64:$Rn, (ARM64dup (f32 FPR32:$Rm)))),
-          (FMULv2i32_indexed V64:$Rn,
-            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
-            (i64 0))>;
-def : Pat<(v4f32 (fmul V128:$Rn, (ARM64dup (f32 FPR32:$Rm)))),
-          (FMULv4i32_indexed V128:$Rn,
-            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
-            (i64 0))>;
-def : Pat<(v2f64 (fmul V128:$Rn, (ARM64dup (f64 FPR64:$Rm)))),
-          (FMULv2i64_indexed V128:$Rn,
-            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
-            (i64 0))>;
-
-defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_arm64_neon_sqdmulh>;
-defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_arm64_neon_sqrdmulh>;
-defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla",
-              TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>;
-defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls",
-              TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>;
-defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
-defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
-    TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
-    TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
-                int_arm64_neon_smull>;
-defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
-                                           int_arm64_neon_sqadd>;
-defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
-                                           int_arm64_neon_sqsub>;
-defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_arm64_neon_sqdmull>;
-defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
-    TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
-    TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
-                int_arm64_neon_umull>;
-
-// A scalar sqdmull with the second operand being a vector lane can be
-// handled directly with the indexed instruction encoding.
-def : Pat<(int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
-                                          (vector_extract (v4i32 V128:$Vm),
-                                                           VectorIndexS:$idx)),
-          (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar shift instructions
-//----------------------------------------------------------------------------
-defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">;
-defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">;
-defm SCVTF  : SIMDScalarRShiftSD<0, 0b11100, "scvtf">;
-defm UCVTF  : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">;
-// Codegen patterns for the above. We don't put these directly on the
-// instructions because TableGen's type inference can't handle the truth.
-// Having the same base pattern for fp <--> int totally freaks it out.
-def : Pat<(int_arm64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
-          (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(int_arm64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
-          (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(i64 (int_arm64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
-          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(i64 (int_arm64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
-          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
-                                            vecshiftR64:$imm)),
-          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
-                                            vecshiftR64:$imm)),
-          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(int_arm64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
-          (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(int_arm64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
-          (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(f64 (int_arm64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
-          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(f64 (int_arm64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
-          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1f64 (int_arm64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
-                                            vecshiftR64:$imm)),
-          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1f64 (int_arm64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
-                                            vecshiftR64:$imm)),
-          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-
-defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", ARM64vshl>;
-defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
-defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
-                                     int_arm64_neon_sqrshrn>;
-defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
-                                     int_arm64_neon_sqrshrun>;
-defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>;
-defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>;
-defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
-                                     int_arm64_neon_sqshrn>;
-defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
-                                     int_arm64_neon_sqshrun>;
-defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
-defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", ARM64srshri>;
-defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
-    TriOpFrag<(add node:$LHS,
-                   (ARM64srshri node:$MHS, node:$RHS))>>;
-defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", ARM64vashr>;
-defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
-    TriOpFrag<(add node:$LHS,
-                   (ARM64vashr node:$MHS, node:$RHS))>>;
-defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
-                                     int_arm64_neon_uqrshrn>;
-defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>;
-defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
-                                     int_arm64_neon_uqshrn>;
-defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", ARM64urshri>;
-defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
-    TriOpFrag<(add node:$LHS,
-                   (ARM64urshri node:$MHS, node:$RHS))>>;
-defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", ARM64vlshr>;
-defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
-    TriOpFrag<(add node:$LHS,
-                   (ARM64vlshr node:$MHS, node:$RHS))>>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD vector shift instructions
-//----------------------------------------------------------------------------
-defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_arm64_neon_vcvtfp2fxs>;
-defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_arm64_neon_vcvtfp2fxu>;
-defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf",
-                                   int_arm64_neon_vcvtfxs2fp>;
-defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
-                                         int_arm64_neon_rshrn>;
-defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", ARM64vshl>;
-defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
-                          BinOpFrag<(trunc (ARM64vashr node:$LHS, node:$RHS))>>;
-defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_arm64_neon_vsli>;
-def : Pat<(v1i64 (int_arm64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
-                                      (i32 vecshiftL64:$imm))),
-          (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
-defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
-                                         int_arm64_neon_sqrshrn>;
-defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
-                                         int_arm64_neon_sqrshrun>;
-defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>;
-defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>;
-defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
-                                         int_arm64_neon_sqshrn>;
-defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
-                                         int_arm64_neon_sqshrun>;
-defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_arm64_neon_vsri>;
-def : Pat<(v1i64 (int_arm64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
-                                      (i32 vecshiftR64:$imm))),
-          (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
-defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", ARM64srshri>;
-defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
-                 TriOpFrag<(add node:$LHS,
-                                (ARM64srshri node:$MHS, node:$RHS))> >;
-defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
-                BinOpFrag<(ARM64vshl (sext node:$LHS), node:$RHS)>>;
-
-defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", ARM64vashr>;
-defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
-                TriOpFrag<(add node:$LHS, (ARM64vashr node:$MHS, node:$RHS))>>;
-defm UCVTF   : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf",
-                        int_arm64_neon_vcvtfxu2fp>;
-defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
-                                         int_arm64_neon_uqrshrn>;
-defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>;
-defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
-                                         int_arm64_neon_uqshrn>;
-defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", ARM64urshri>;
-defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
-                TriOpFrag<(add node:$LHS,
-                               (ARM64urshri node:$MHS, node:$RHS))> >;
-defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
-                BinOpFrag<(ARM64vshl (zext node:$LHS), node:$RHS)>>;
-defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", ARM64vlshr>;
-defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
-                TriOpFrag<(add node:$LHS, (ARM64vlshr node:$MHS, node:$RHS))> >;
-
-// SHRN patterns for when a logical right shift was used instead of arithmetic
-// (the immediate guarantees no sign bits actually end up in the result so it
-// doesn't matter).
-def : Pat<(v8i8 (trunc (ARM64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
-          (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
-def : Pat<(v4i16 (trunc (ARM64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
-          (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
-def : Pat<(v2i32 (trunc (ARM64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
-          (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
-
-def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
-                                 (trunc (ARM64vlshr (v8i16 V128:$Rn),
-                                                    vecshiftR16Narrow:$imm)))),
-          (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
-                           V128:$Rn, vecshiftR16Narrow:$imm)>;
-def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
-                                 (trunc (ARM64vlshr (v4i32 V128:$Rn),
-                                                    vecshiftR32Narrow:$imm)))),
-          (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
-                           V128:$Rn, vecshiftR32Narrow:$imm)>;
-def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
-                                 (trunc (ARM64vlshr (v2i64 V128:$Rn),
-                                                    vecshiftR64Narrow:$imm)))),
-          (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
-                           V128:$Rn, vecshiftR32Narrow:$imm)>;
-
-// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
-// Anyexts are implemented as zexts.
-def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
-def : Pat<(v8i16 (zext   (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
-def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
-def : Pat<(v4i32 (sext   (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
-def : Pat<(v4i32 (zext   (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
-def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
-def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
-def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
-def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
-// Also match an extend from the upper half of a 128 bit source register.
-def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
-          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v8i16 (zext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
-          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v8i16 (sext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
-          (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
-          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (zext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
-          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (sext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
-          (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
-          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (zext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
-          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (sext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
-          (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
-
-// Vector shift sxtl aliases
-def : InstAlias<"sxtl.8h $dst, $src1",
-                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"sxtl $dst.8h, $src1.8b",
-                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"sxtl.4s $dst, $src1",
-                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"sxtl $dst.4s, $src1.4h",
-                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"sxtl.2d $dst, $src1",
-                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"sxtl $dst.2d, $src1.2s",
-                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
-
-// Vector shift sxtl2 aliases
-def : InstAlias<"sxtl2.8h $dst, $src1",
-                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
-                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"sxtl2.4s $dst, $src1",
-                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
-                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"sxtl2.2d $dst, $src1",
-                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
-                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
-
-// Vector shift uxtl aliases
-def : InstAlias<"uxtl.8h $dst, $src1",
-                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"uxtl $dst.8h, $src1.8b",
-                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"uxtl.4s $dst, $src1",
-                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"uxtl $dst.4s, $src1.4h",
-                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"uxtl.2d $dst, $src1",
-                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"uxtl $dst.2d, $src1.2s",
-                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
-
-// Vector shift uxtl2 aliases
-def : InstAlias<"uxtl2.8h $dst, $src1",
-                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
-                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"uxtl2.4s $dst, $src1",
-                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
-                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"uxtl2.2d $dst, $src1",
-                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
-                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
-
-// If an integer is about to be converted to a floating point value,
-// just load it on the floating point unit.
-// These patterns are more complex because floating point loads do not
-// support sign extension.
-// The sign extension has to be explicitly added and is only supported for
-// one step: byte-to-half, half-to-word, word-to-doubleword.
-// SCVTF GPR -> FPR is 9 cycles.
-// SCVTF FPR -> FPR is 4 cyclces.
-// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
-// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
-// and still being faster.
-// However, this is not good for code size.
-// 8-bits -> float. 2 sizes step-up.
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 ro_indexed8:$addr)))),
-           (SCVTFv1i32 (f32 (EXTRACT_SUBREG
-                              (SSHLLv4i16_shift
-                                (f64
-                                  (EXTRACT_SUBREG
-                                    (SSHLLv8i8_shift
-                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                                  (LDRBro ro_indexed8:$addr),
-                                                  bsub),
-                                     0),
-                                   dsub)),
-                               0),
-                           ssub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 am_indexed8:$addr)))),
-           (SCVTFv1i32 (f32 (EXTRACT_SUBREG
-                              (SSHLLv4i16_shift
-                                (f64
-                                  (EXTRACT_SUBREG
-                                    (SSHLLv8i8_shift
-                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                                  (LDRBui am_indexed8:$addr),
-                                                  bsub),
-                                     0),
-                                   dsub)),
-                               0),
-                           ssub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 am_unscaled8:$addr)))),
-           (SCVTFv1i32 (f32 (EXTRACT_SUBREG
-                              (SSHLLv4i16_shift
-                                (f64
-                                  (EXTRACT_SUBREG
-                                    (SSHLLv8i8_shift
-                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                                  (LDURBi am_unscaled8:$addr),
-                                                  bsub),
-                                     0),
-                                   dsub)),
-                               0),
-                           ssub)))>, Requires<[NotForCodeSize]>;
-// 16-bits -> float. 1 size step-up.
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 ro_indexed16:$addr)))),
-           (SCVTFv1i32 (f32 (EXTRACT_SUBREG
-                              (SSHLLv4i16_shift
-                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                               (LDRHro ro_indexed16:$addr),
-                                               hsub),
-                               0),
-                           ssub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_indexed16:$addr)))),
-           (SCVTFv1i32 (f32 (EXTRACT_SUBREG
-                              (SSHLLv4i16_shift
-                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                               (LDRHui am_indexed16:$addr),
-                                               hsub),
-                               0),
-                           ssub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))),
-           (SCVTFv1i32 (f32 (EXTRACT_SUBREG
-                              (SSHLLv4i16_shift
-                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                               (LDURHi am_unscaled16:$addr),
-                                               hsub),
-                               0),
-                           ssub)))>, Requires<[NotForCodeSize]>;
-// 32-bits to 32-bits are handled in target specific dag combine:
-// performIntToFpCombine.
-// 64-bits integer to 32-bits floating point, not possible with
-// SCVTF on floating point registers (both source and destination
-// must have the same size).
-
-// Here are the patterns for 8, 16, 32, and 64-bits to double.
-// 8-bits -> double. 3 size step-up: give up.
-// 16-bits -> double. 2 size step.
-def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 ro_indexed16:$addr)))),
-           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
-                              (SSHLLv2i32_shift
-                                 (f64
-                                  (EXTRACT_SUBREG
-                                    (SSHLLv4i16_shift
-                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                                  (LDRHro ro_indexed16:$addr),
-                                                  hsub),
-                                     0),
-                                   dsub)),
-                               0),
-                             dsub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 am_indexed16:$addr)))),
-           (SCVTFv1i64  (f64 (EXTRACT_SUBREG
-                               (SSHLLv2i32_shift
-                                 (f64
-                                   (EXTRACT_SUBREG
-                                     (SSHLLv4i16_shift
-                                       (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                                  (LDRHui am_indexed16:$addr),
-                                                  hsub),
-                                      0),
-                                    dsub)),
-                                 0),
-                              dsub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))),
-           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
-                              (SSHLLv2i32_shift
-                                (f64
-                                  (EXTRACT_SUBREG
-                                    (SSHLLv4i16_shift
-                                     (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                                  (LDURHi am_unscaled16:$addr),
-                                                  hsub),
-                                      0),
-                                   dsub)),
-                               0),
-                             dsub)))>, Requires<[NotForCodeSize]>;
-// 32-bits -> double. 1 size step-up.
-def : Pat <(f64 (sint_to_fp (i32 (load ro_indexed32:$addr)))),
-           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
-                              (SSHLLv2i32_shift
-                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                               (LDRSro ro_indexed32:$addr),
-                                               ssub),
-                               0),
-                             dsub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f64 (sint_to_fp (i32 (load am_indexed32:$addr)))),
-           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
-                              (SSHLLv2i32_shift
-                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                               (LDRSui am_indexed32:$addr),
-                                               ssub),
-                               0),
-                             dsub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f64 (sint_to_fp (i32 (load am_unscaled32:$addr)))),
-           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
-                              (SSHLLv2i32_shift
-                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                               (LDURSi am_unscaled32:$addr),
-                                               ssub),
-                               0),
-                             dsub)))>, Requires<[NotForCodeSize]>;
-// 64-bits -> double are handled in target specific dag combine:
-// performIntToFpCombine.
-
-
-//----------------------------------------------------------------------------
-// AdvSIMD Load-Store Structure
-//----------------------------------------------------------------------------
-defm LD1 : SIMDLd1Multiple<"ld1">;
-defm LD2 : SIMDLd2Multiple<"ld2">;
-defm LD3 : SIMDLd3Multiple<"ld3">;
-defm LD4 : SIMDLd4Multiple<"ld4">;
-
-defm ST1 : SIMDSt1Multiple<"st1">;
-defm ST2 : SIMDSt2Multiple<"st2">;
-defm ST3 : SIMDSt3Multiple<"st3">;
-defm ST4 : SIMDSt4Multiple<"st4">;
-
-class Ld1Pat<ValueType ty, Instruction INST>
-  : Pat<(ty (load am_simdnoindex:$vaddr)), (INST am_simdnoindex:$vaddr)>;
-
-def : Ld1Pat<v16i8, LD1Onev16b>;
-def : Ld1Pat<v8i16, LD1Onev8h>;
-def : Ld1Pat<v4i32, LD1Onev4s>;
-def : Ld1Pat<v2i64, LD1Onev2d>;
-def : Ld1Pat<v8i8,  LD1Onev8b>;
-def : Ld1Pat<v4i16, LD1Onev4h>;
-def : Ld1Pat<v2i32, LD1Onev2s>;
-def : Ld1Pat<v1i64, LD1Onev1d>;
-
-class St1Pat<ValueType ty, Instruction INST>
-  : Pat<(store ty:$Vt, am_simdnoindex:$vaddr),
-        (INST ty:$Vt, am_simdnoindex:$vaddr)>;
-
-def : St1Pat<v16i8, ST1Onev16b>;
-def : St1Pat<v8i16, ST1Onev8h>;
-def : St1Pat<v4i32, ST1Onev4s>;
-def : St1Pat<v2i64, ST1Onev2d>;
-def : St1Pat<v8i8,  ST1Onev8b>;
-def : St1Pat<v4i16, ST1Onev4h>;
-def : St1Pat<v2i32, ST1Onev2s>;
-def : St1Pat<v1i64, ST1Onev1d>;
-
-//---
-// Single-element
-//---
-
-defm LD1R          : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
-defm LD2R          : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
-defm LD3R          : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
-defm LD4R          : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
-let mayLoad = 1, neverHasSideEffects = 1 in {
-defm LD1 : SIMDLdSingleBTied<0, 0b000,       "ld1", VecListOneb,   GPR64pi1>;
-defm LD1 : SIMDLdSingleHTied<0, 0b010, 0,    "ld1", VecListOneh,   GPR64pi2>;
-defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes,   GPR64pi4>;
-defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned,   GPR64pi8>;
-defm LD2 : SIMDLdSingleBTied<1, 0b000,       "ld2", VecListTwob,   GPR64pi2>;
-defm LD2 : SIMDLdSingleHTied<1, 0b010, 0,    "ld2", VecListTwoh,   GPR64pi4>;
-defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos,   GPR64pi8>;
-defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod,   GPR64pi16>;
-defm LD3 : SIMDLdSingleBTied<0, 0b001,       "ld3", VecListThreeb, GPR64pi3>;
-defm LD3 : SIMDLdSingleHTied<0, 0b011, 0,    "ld3", VecListThreeh, GPR64pi6>;
-defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
-defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
-defm LD4 : SIMDLdSingleBTied<1, 0b001,       "ld4", VecListFourb,  GPR64pi4>;
-defm LD4 : SIMDLdSingleHTied<1, 0b011, 0,    "ld4", VecListFourh,  GPR64pi8>;
-defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
-defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
-}
-
-def : Pat<(v8i8 (ARM64dup (i32 (extloadi8 am_simdnoindex:$vaddr)))),
-          (LD1Rv8b am_simdnoindex:$vaddr)>;
-def : Pat<(v16i8 (ARM64dup (i32 (extloadi8 am_simdnoindex:$vaddr)))),
-          (LD1Rv16b am_simdnoindex:$vaddr)>;
-def : Pat<(v4i16 (ARM64dup (i32 (extloadi16 am_simdnoindex:$vaddr)))),
-          (LD1Rv4h am_simdnoindex:$vaddr)>;
-def : Pat<(v8i16 (ARM64dup (i32 (extloadi16 am_simdnoindex:$vaddr)))),
-          (LD1Rv8h am_simdnoindex:$vaddr)>;
-def : Pat<(v2i32 (ARM64dup (i32 (load am_simdnoindex:$vaddr)))),
-          (LD1Rv2s am_simdnoindex:$vaddr)>;
-def : Pat<(v4i32 (ARM64dup (i32 (load am_simdnoindex:$vaddr)))),
-          (LD1Rv4s am_simdnoindex:$vaddr)>;
-def : Pat<(v2i64 (ARM64dup (i64 (load am_simdnoindex:$vaddr)))),
-          (LD1Rv2d am_simdnoindex:$vaddr)>;
-def : Pat<(v1i64 (ARM64dup (i64 (load am_simdnoindex:$vaddr)))),
-          (LD1Rv1d am_simdnoindex:$vaddr)>;
-// Grab the floating point version too
-def : Pat<(v2f32 (ARM64dup (f32 (load am_simdnoindex:$vaddr)))),
-          (LD1Rv2s am_simdnoindex:$vaddr)>;
-def : Pat<(v4f32 (ARM64dup (f32 (load am_simdnoindex:$vaddr)))),
-          (LD1Rv4s am_simdnoindex:$vaddr)>;
-def : Pat<(v2f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))),
-          (LD1Rv2d am_simdnoindex:$vaddr)>;
-def : Pat<(v1f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))),
-          (LD1Rv1d am_simdnoindex:$vaddr)>;
-
-class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
-                    ValueType VTy, ValueType STy, Instruction LD1>
-  : Pat<(vector_insert (VTy VecListOne128:$Rd),
-           (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx),
-        (LD1 VecListOne128:$Rd, VecIndex:$idx, am_simdnoindex:$vaddr)>;
-
-def : Ld1Lane128Pat<extloadi8,  VectorIndexB, v16i8, i32, LD1i8>;
-def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
-def : Ld1Lane128Pat<load,       VectorIndexS, v4i32, i32, LD1i32>;
-def : Ld1Lane128Pat<load,       VectorIndexS, v4f32, f32, LD1i32>;
-def : Ld1Lane128Pat<load,       VectorIndexD, v2i64, i64, LD1i64>;
-def : Ld1Lane128Pat<load,       VectorIndexD, v2f64, f64, LD1i64>;
-
-class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
-                   ValueType VTy, ValueType STy, Instruction LD1>
-  : Pat<(vector_insert (VTy VecListOne64:$Rd),
-           (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx),
-        (EXTRACT_SUBREG
-            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
-                          VecIndex:$idx, am_simdnoindex:$vaddr),
-            dsub)>;
-
-def : Ld1Lane64Pat<extloadi8,  VectorIndexB, v8i8,  i32, LD1i8>;
-def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
-def : Ld1Lane64Pat<load,       VectorIndexS, v2i32, i32, LD1i32>;
-def : Ld1Lane64Pat<load,       VectorIndexS, v2f32, f32, LD1i32>;
-
-
-defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
-defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
-defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
-defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
-
-// Stores
-defm ST1 : SIMDStSingleB<0, 0b000,       "st1", VecListOneb, GPR64pi1>;
-defm ST1 : SIMDStSingleH<0, 0b010, 0,    "st1", VecListOneh, GPR64pi2>;
-defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
-defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
-
-let AddedComplexity = 8 in
-class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
-                    ValueType VTy, ValueType STy, Instruction ST1>
-  : Pat<(scalar_store
-             (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
-             am_simdnoindex:$vaddr),
-        (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr)>;
-
-def : St1Lane128Pat<truncstorei8,  VectorIndexB, v16i8, i32, ST1i8>;
-def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
-def : St1Lane128Pat<store,         VectorIndexS, v4i32, i32, ST1i32>;
-def : St1Lane128Pat<store,         VectorIndexS, v4f32, f32, ST1i32>;
-def : St1Lane128Pat<store,         VectorIndexD, v2i64, i64, ST1i64>;
-def : St1Lane128Pat<store,         VectorIndexD, v2f64, f64, ST1i64>;
-
-let AddedComplexity = 8 in
-class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
-                   ValueType VTy, ValueType STy, Instruction ST1>
-  : Pat<(scalar_store
-             (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
-             am_simdnoindex:$vaddr),
-        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
-             VecIndex:$idx, am_simdnoindex:$vaddr)>;
-
-def : St1Lane64Pat<truncstorei8,  VectorIndexB, v8i8, i32, ST1i8>;
-def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
-def : St1Lane64Pat<store,         VectorIndexS, v2i32, i32, ST1i32>;
-def : St1Lane64Pat<store,         VectorIndexS, v2f32, f32, ST1i32>;
-
-let mayStore = 1, neverHasSideEffects = 1 in {
-defm ST2 : SIMDStSingleB<1, 0b000,       "st2", VecListTwob,   GPR64pi2>;
-defm ST2 : SIMDStSingleH<1, 0b010, 0,    "st2", VecListTwoh,   GPR64pi4>;
-defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos,   GPR64pi8>;
-defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod,   GPR64pi16>;
-defm ST3 : SIMDStSingleB<0, 0b001,       "st3", VecListThreeb, GPR64pi3>;
-defm ST3 : SIMDStSingleH<0, 0b011, 0,    "st3", VecListThreeh, GPR64pi6>;
-defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
-defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
-defm ST4 : SIMDStSingleB<1, 0b001,       "st4", VecListFourb,  GPR64pi4>;
-defm ST4 : SIMDStSingleH<1, 0b011, 0,    "st4", VecListFourh,  GPR64pi8>;
-defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours,  GPR64pi16>;
-defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd,  GPR64pi32>;
-}
-
-defm ST1 : SIMDLdSt1SingleAliases<"st1">;
-defm ST2 : SIMDLdSt2SingleAliases<"st2">;
-defm ST3 : SIMDLdSt3SingleAliases<"st3">;
-defm ST4 : SIMDLdSt4SingleAliases<"st4">;
-
-//----------------------------------------------------------------------------
-// Crypto extensions
-//----------------------------------------------------------------------------
-
-def AESErr   : AESTiedInst<0b0100, "aese",   int_arm64_crypto_aese>;
-def AESDrr   : AESTiedInst<0b0101, "aesd",   int_arm64_crypto_aesd>;
-def AESMCrr  : AESInst<    0b0110, "aesmc",  int_arm64_crypto_aesmc>;
-def AESIMCrr : AESInst<    0b0111, "aesimc", int_arm64_crypto_aesimc>;
-
-def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_arm64_crypto_sha1c>;
-def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_arm64_crypto_sha1p>;
-def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_arm64_crypto_sha1m>;
-def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_arm64_crypto_sha1su0>;
-def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_arm64_crypto_sha256h>;
-def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_arm64_crypto_sha256h2>;
-def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_arm64_crypto_sha256su1>;
-
-def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_arm64_crypto_sha1h>;
-def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_arm64_crypto_sha1su1>;
-def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_arm64_crypto_sha256su0>;
-
-//----------------------------------------------------------------------------
-// Compiler-pseudos
-//----------------------------------------------------------------------------
-// FIXME: Like for X86, these should go in their own separate .td file.
-
-// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. But any other 32-bit operation will zero-extend
-// up to 64 bits.
-// FIXME: X86 also checks for CMOV here. Do we need something similar?
-def def32 : PatLeaf<(i32 GPR32:$src), [{
-  return N->getOpcode() != ISD::TRUNCATE &&
-         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
-         N->getOpcode() != ISD::CopyFromReg;
-}]>;
-
-// In the case of a 32-bit def that is known to implicitly zero-extend,
-// we can use a SUBREG_TO_REG.
-def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
-
-// For an anyext, we don't care what the high bits are, so we can perform an
-// INSERT_SUBREF into an IMPLICIT_DEF.
-def : Pat<(i64 (anyext GPR32:$src)),
-          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
-
-// When we need to explicitly zero-extend, we use an unsigned bitfield move
-// instruction (UBFM) on the enclosing super-reg.
-def : Pat<(i64 (zext GPR32:$src)),
- (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
-
-// To sign extend, we use a signed bitfield move instruction (SBFM) on the
-// containing super-reg.
-def : Pat<(i64 (sext GPR32:$src)),
-   (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
-def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
-def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
-def : Pat<(i64 (sext_inreg GPR64:$src, i8)),  (SBFMXri GPR64:$src, 0, 7)>;
-def : Pat<(i64 (sext_inreg GPR64:$src, i1)),  (SBFMXri GPR64:$src, 0, 0)>;
-def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
-def : Pat<(i32 (sext_inreg GPR32:$src, i8)),  (SBFMWri GPR32:$src, 0, 7)>;
-def : Pat<(i32 (sext_inreg GPR32:$src, i1)),  (SBFMWri GPR32:$src, 0, 0)>;
-
-def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
-          (SBFMWri GPR32:$Rn, (i64 (i32shift_a       imm0_31:$imm)),
-                              (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
-def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
-          (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
-                              (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
-
-def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
-          (SBFMWri GPR32:$Rn, (i64 (i32shift_a        imm0_31:$imm)),
-                              (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
-def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
-          (SBFMXri GPR64:$Rn, (i64 (i64shift_a        imm0_63:$imm)),
-                              (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
-
-def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
-          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
-                   (i64 (i64shift_a        imm0_63:$imm)),
-                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
-
-// sra patterns have an AddedComplexity of 10, so make sure we have a higher
-// AddedComplexity for the following patterns since we want to match sext + sra
-// patterns before we attempt to match a single sra node.
-let AddedComplexity = 20 in {
-// We support all sext + sra combinations which preserve at least one bit of the
-// original value which is to be sign extended. E.g. we support shifts up to
-// bitwidth-1 bits.
-def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
-          (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
-def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
-          (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
-
-def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
-          (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
-def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
-          (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
-
-def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
-          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
-                   (i64 imm0_31:$imm), 31)>;
-} // AddedComplexity = 20
-
-// To truncate, we can simply extract from a subregister.
-def : Pat<(i32 (trunc GPR64sp:$src)),
-          (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
-
-// __builtin_trap() uses the BRK instruction on ARM64.
-def : Pat<(trap), (BRK 1)>;
-
-// Conversions within AdvSIMD types in the same register size are free.
-
-def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
-
-def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
-
-def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
-
-def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))), (v8i8  FPR64:$src)>;
-def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))), (v8i8  FPR64:$src)>;
-def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))), (v8i8  FPR64:$src)>;
-def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))), (v8i8  FPR64:$src)>;
-def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))), (v8i8  FPR64:$src)>;
-def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))), (v8i8  FPR64:$src)>;
-
-def : Pat<(f64   (bitconvert (v1i64 FPR64:$src))), (f64   FPR64:$src)>;
-def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))), (f64   FPR64:$src)>;
-def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))), (f64   FPR64:$src)>;
-def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))), (f64   FPR64:$src)>;
-def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))), (f64   FPR64:$src)>;
-def : Pat<(f64   (bitconvert (v1f64 FPR64:$src))), (f64   FPR64:$src)>;
-
-def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))), (v1f64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (f64   FPR64:$src))), (v1f64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
-
-def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
-
-
-def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
-def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
-def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
-def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
-def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
-
-def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
-
-def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
-
-def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
-
-def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
-
-def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
-
-def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
-
-def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
-          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
-def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
-          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
-def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
-          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
-def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
-          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
-
-// A 64-bit subvector insert to the first 128-bit vector position
-// is a subregister copy that needs no instruction.
-def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)),
-          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)),
-          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)),
-          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)),
-          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)),
-          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)),
-          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-
-// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
-// or v2f32.
-def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
-                    (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
-           (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
-def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
-                     (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
-           (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
-    // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
-    // so we match on v4f32 here, not v2f32. This will also catch adding
-    // the low two lanes of a true v4f32 vector.
-def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
-                (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
-          (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
-
-// Scalar 64-bit shifts in FPR64 registers.
-def : Pat<(i64 (int_arm64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
-          (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
-          (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
-          (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
-          (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-
-// Tail call return handling. These are all compiler pseudo-instructions,
-// so no encoding information or anything like that.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
-  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst), []>;
-  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst), []>;
-}
-
-def : Pat<(ARM64tcret tcGPR64:$dst), (TCRETURNri tcGPR64:$dst)>;
-def : Pat<(ARM64tcret (i64 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>;
-def : Pat<(ARM64tcret (i64 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>;
-
-include "ARM64InstrAtomics.td"
diff --git a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp b/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
deleted file mode 100644
index c0031a4..0000000
--- a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
+++ /dev/null
@@ -1,947 +0,0 @@
-//===-- ARM64LoadStoreOptimizer.cpp - ARM64 load/store opt. pass --*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass that performs load / store related peephole
-// optimizations. This pass should be run after register allocation.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-ldst-opt"
-#include "ARM64InstrInfo.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-/// ARM64AllocLoadStoreOpt - Post-register allocation pass to combine
-/// load / store instructions to form ldp / stp instructions.
-
-STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
-STATISTIC(NumPostFolded, "Number of post-index updates folded");
-STATISTIC(NumPreFolded, "Number of pre-index updates folded");
-STATISTIC(NumUnscaledPairCreated,
-          "Number of load/store from unscaled generated");
-
-static cl::opt<bool> DoLoadStoreOpt("arm64-load-store-opt", cl::init(true),
-                                    cl::Hidden);
-static cl::opt<unsigned> ScanLimit("arm64-load-store-scan-limit", cl::init(20),
-                                   cl::Hidden);
-
-// Place holder while testing unscaled load/store combining
-static cl::opt<bool>
-EnableARM64UnscaledMemOp("arm64-unscaled-mem-op", cl::Hidden,
-                         cl::desc("Allow ARM64 unscaled load/store combining"),
-                         cl::init(true));
-
-namespace {
-struct ARM64LoadStoreOpt : public MachineFunctionPass {
-  static char ID;
-  ARM64LoadStoreOpt() : MachineFunctionPass(ID) {}
-
-  const ARM64InstrInfo *TII;
-  const TargetRegisterInfo *TRI;
-
-  // Scan the instructions looking for a load/store that can be combined
-  // with the current instruction into a load/store pair.
-  // Return the matching instruction if one is found, else MBB->end().
-  // If a matching instruction is found, mergeForward is set to true if the
-  // merge is to remove the first instruction and replace the second with
-  // a pair-wise insn, and false if the reverse is true.
-  MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
-                                               bool &mergeForward,
-                                               unsigned Limit);
-  // Merge the two instructions indicated into a single pair-wise instruction.
-  // If mergeForward is true, erase the first instruction and fold its
-  // operation into the second. If false, the reverse. Return the instruction
-  // following the first instruction (which may change during proecessing).
-  MachineBasicBlock::iterator
-  mergePairedInsns(MachineBasicBlock::iterator I,
-                   MachineBasicBlock::iterator Paired, bool mergeForward);
-
-  // Scan the instruction list to find a base register update that can
-  // be combined with the current instruction (a load or store) using
-  // pre or post indexed addressing with writeback. Scan forwards.
-  MachineBasicBlock::iterator
-  findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
-                                int Value);
-
-  // Scan the instruction list to find a base register update that can
-  // be combined with the current instruction (a load or store) using
-  // pre or post indexed addressing with writeback. Scan backwards.
-  MachineBasicBlock::iterator
-  findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
-
-  // Merge a pre-index base register update into a ld/st instruction.
-  MachineBasicBlock::iterator
-  mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
-                        MachineBasicBlock::iterator Update);
-
-  // Merge a post-index base register update into a ld/st instruction.
-  MachineBasicBlock::iterator
-  mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
-                         MachineBasicBlock::iterator Update);
-
-  bool optimizeBlock(MachineBasicBlock &MBB);
-
-  virtual bool runOnMachineFunction(MachineFunction &Fn);
-
-  virtual const char *getPassName() const {
-    return "ARM64 load / store optimization pass";
-  }
-
-private:
-  int getMemSize(MachineInstr *MemMI);
-};
-char ARM64LoadStoreOpt::ID = 0;
-}
-
-static bool isUnscaledLdst(unsigned Opc) {
-  switch (Opc) {
-  default:
-    return false;
-  case ARM64::STURSi:
-    return true;
-  case ARM64::STURDi:
-    return true;
-  case ARM64::STURQi:
-    return true;
-  case ARM64::STURWi:
-    return true;
-  case ARM64::STURXi:
-    return true;
-  case ARM64::LDURSi:
-    return true;
-  case ARM64::LDURDi:
-    return true;
-  case ARM64::LDURQi:
-    return true;
-  case ARM64::LDURWi:
-    return true;
-  case ARM64::LDURXi:
-    return true;
-  }
-}
-
-// Size in bytes of the data moved by an unscaled load or store
-int ARM64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
-  switch (MemMI->getOpcode()) {
-  default:
-    llvm_unreachable("Opcode has has unknown size!");
-  case ARM64::STRSui:
-  case ARM64::STURSi:
-    return 4;
-  case ARM64::STRDui:
-  case ARM64::STURDi:
-    return 8;
-  case ARM64::STRQui:
-  case ARM64::STURQi:
-    return 16;
-  case ARM64::STRWui:
-  case ARM64::STURWi:
-    return 4;
-  case ARM64::STRXui:
-  case ARM64::STURXi:
-    return 8;
-  case ARM64::LDRSui:
-  case ARM64::LDURSi:
-    return 4;
-  case ARM64::LDRDui:
-  case ARM64::LDURDi:
-    return 8;
-  case ARM64::LDRQui:
-  case ARM64::LDURQi:
-    return 16;
-  case ARM64::LDRWui:
-  case ARM64::LDURWi:
-    return 4;
-  case ARM64::LDRXui:
-  case ARM64::LDURXi:
-    return 8;
-  }
-}
-
-static unsigned getMatchingPairOpcode(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("Opcode has no pairwise equivalent!");
-  case ARM64::STRSui:
-  case ARM64::STURSi:
-    return ARM64::STPSi;
-  case ARM64::STRDui:
-  case ARM64::STURDi:
-    return ARM64::STPDi;
-  case ARM64::STRQui:
-  case ARM64::STURQi:
-    return ARM64::STPQi;
-  case ARM64::STRWui:
-  case ARM64::STURWi:
-    return ARM64::STPWi;
-  case ARM64::STRXui:
-  case ARM64::STURXi:
-    return ARM64::STPXi;
-  case ARM64::LDRSui:
-  case ARM64::LDURSi:
-    return ARM64::LDPSi;
-  case ARM64::LDRDui:
-  case ARM64::LDURDi:
-    return ARM64::LDPDi;
-  case ARM64::LDRQui:
-  case ARM64::LDURQi:
-    return ARM64::LDPQi;
-  case ARM64::LDRWui:
-  case ARM64::LDURWi:
-    return ARM64::LDPWi;
-  case ARM64::LDRXui:
-  case ARM64::LDURXi:
-    return ARM64::LDPXi;
-  }
-}
-
-static unsigned getPreIndexedOpcode(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("Opcode has no pre-indexed equivalent!");
-  case ARM64::STRSui:    return ARM64::STRSpre;
-  case ARM64::STRDui:    return ARM64::STRDpre;
-  case ARM64::STRQui:    return ARM64::STRQpre;
-  case ARM64::STRWui:    return ARM64::STRWpre;
-  case ARM64::STRXui:    return ARM64::STRXpre;
-  case ARM64::LDRSui:    return ARM64::LDRSpre;
-  case ARM64::LDRDui:    return ARM64::LDRDpre;
-  case ARM64::LDRQui:    return ARM64::LDRQpre;
-  case ARM64::LDRWui:    return ARM64::LDRWpre;
-  case ARM64::LDRXui:    return ARM64::LDRXpre;
-  }
-}
-
-static unsigned getPostIndexedOpcode(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("Opcode has no post-indexed wise equivalent!");
-  case ARM64::STRSui:
-    return ARM64::STRSpost;
-  case ARM64::STRDui:
-    return ARM64::STRDpost;
-  case ARM64::STRQui:
-    return ARM64::STRQpost;
-  case ARM64::STRWui:
-    return ARM64::STRWpost;
-  case ARM64::STRXui:
-    return ARM64::STRXpost;
-  case ARM64::LDRSui:
-    return ARM64::LDRSpost;
-  case ARM64::LDRDui:
-    return ARM64::LDRDpost;
-  case ARM64::LDRQui:
-    return ARM64::LDRQpost;
-  case ARM64::LDRWui:
-    return ARM64::LDRWpost;
-  case ARM64::LDRXui:
-    return ARM64::LDRXpost;
-  }
-}
-
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
-                                    MachineBasicBlock::iterator Paired,
-                                    bool mergeForward) {
-  MachineBasicBlock::iterator NextI = I;
-  ++NextI;
-  // If NextI is the second of the two instructions to be merged, we need
-  // to skip one further. Either way we merge will invalidate the iterator,
-  // and we don't need to scan the new instruction, as it's a pairwise
-  // instruction, which we're not considering for further action anyway.
-  if (NextI == Paired)
-    ++NextI;
-
-  bool IsUnscaled = isUnscaledLdst(I->getOpcode());
-  int OffsetStride = IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(I) : 1;
-
-  unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
-  // Insert our new paired instruction after whichever of the paired
-  // instructions mergeForward indicates.
-  MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I;
-  // Also based on mergeForward is from where we copy the base register operand
-  // so we get the flags compatible with the input code.
-  MachineOperand &BaseRegOp =
-      mergeForward ? Paired->getOperand(1) : I->getOperand(1);
-
-  // Which register is Rt and which is Rt2 depends on the offset order.
-  MachineInstr *RtMI, *Rt2MI;
-  if (I->getOperand(2).getImm() ==
-      Paired->getOperand(2).getImm() + OffsetStride) {
-    RtMI = Paired;
-    Rt2MI = I;
-  } else {
-    RtMI = I;
-    Rt2MI = Paired;
-  }
-  // Handle Unscaled
-  int OffsetImm = RtMI->getOperand(2).getImm();
-  if (IsUnscaled && EnableARM64UnscaledMemOp)
-    OffsetImm /= OffsetStride;
-
-  // Construct the new instruction.
-  MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
-                                    I->getDebugLoc(), TII->get(NewOpc))
-                                .addOperand(RtMI->getOperand(0))
-                                .addOperand(Rt2MI->getOperand(0))
-                                .addOperand(BaseRegOp)
-                                .addImm(OffsetImm);
-  (void)MIB;
-
-  // FIXME: Do we need/want to copy the mem operands from the source
-  //        instructions? Probably. What uses them after this?
-
-  DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n    ");
-  DEBUG(I->print(dbgs()));
-  DEBUG(dbgs() << "    ");
-  DEBUG(Paired->print(dbgs()));
-  DEBUG(dbgs() << "  with instruction:\n    ");
-  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
-  DEBUG(dbgs() << "\n");
-
-  // Erase the old instructions.
-  I->eraseFromParent();
-  Paired->eraseFromParent();
-
-  return NextI;
-}
-
-/// trackRegDefsUses - Remember what registers the specified instruction uses
-/// and modifies.
-static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
-                             BitVector &UsedRegs,
-                             const TargetRegisterInfo *TRI) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (MO.isRegMask())
-      ModifiedRegs.setBitsNotInMask(MO.getRegMask());
-
-    if (!MO.isReg())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (MO.isDef()) {
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-        ModifiedRegs.set(*AI);
-    } else {
-      assert(MO.isUse() && "Reg operand not a def and not a use?!?");
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-        UsedRegs.set(*AI);
-    }
-  }
-}
-
-static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
-  if (!IsUnscaled && (Offset > 63 || Offset < -64))
-    return false;
-  if (IsUnscaled) {
-    // Convert the byte-offset used by unscaled into an "element" offset used
-    // by the scaled pair load/store instructions.
-    int elemOffset = Offset / OffsetStride;
-    if (elemOffset > 63 || elemOffset < -64)
-      return false;
-  }
-  return true;
-}
-
-// Do alignment, specialized to power of 2 and for signed ints,
-// avoiding having to do a C-style cast from uint_64t to int when
-// using RoundUpToAlignment from include/llvm/Support/MathExtras.h.
-// FIXME: Move this function to include/MathExtras.h?
-static int alignTo(int Num, int PowOf2) {
-  return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
-}
-
-/// findMatchingInsn - Scan the instructions looking for a load/store that can
-/// be combined with the current instruction into a load/store pair.
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
-                                    bool &mergeForward, unsigned Limit) {
-  MachineBasicBlock::iterator E = I->getParent()->end();
-  MachineBasicBlock::iterator MBBI = I;
-  MachineInstr *FirstMI = I;
-  ++MBBI;
-
-  int Opc = FirstMI->getOpcode();
-  bool mayLoad = FirstMI->mayLoad();
-  bool IsUnscaled = isUnscaledLdst(Opc);
-  unsigned Reg = FirstMI->getOperand(0).getReg();
-  unsigned BaseReg = FirstMI->getOperand(1).getReg();
-  int Offset = FirstMI->getOperand(2).getImm();
-
-  // Early exit if the first instruction modifies the base register.
-  // e.g., ldr x0, [x0]
-  // Early exit if the offset if not possible to match. (6 bits of positive
-  // range, plus allow an extra one in case we find a later insn that matches
-  // with Offset-1
-  if (FirstMI->modifiesRegister(BaseReg, TRI))
-    return E;
-  int OffsetStride =
-      IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(FirstMI) : 1;
-  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
-    return E;
-
-  // Track which registers have been modified and used between the first insn
-  // (inclusive) and the second insn.
-  BitVector ModifiedRegs, UsedRegs;
-  ModifiedRegs.resize(TRI->getNumRegs());
-  UsedRegs.resize(TRI->getNumRegs());
-  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
-    MachineInstr *MI = MBBI;
-    // Skip DBG_VALUE instructions. Otherwise debug info can affect the
-    // optimization by changing how far we scan.
-    if (MI->isDebugValue())
-      continue;
-
-    // Now that we know this is a real instruction, count it.
-    ++Count;
-
-    if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) {
-      // If we've found another instruction with the same opcode, check to see
-      // if the base and offset are compatible with our starting instruction.
-      // These instructions all have scaled immediate operands, so we just
-      // check for +1/-1. Make sure to check the new instruction offset is
-      // actually an immediate and not a symbolic reference destined for
-      // a relocation.
-      //
-      // Pairwise instructions have a 7-bit signed offset field. Single insns
-      // have a 12-bit unsigned offset field. To be a valid combine, the
-      // final offset must be in range.
-      unsigned MIBaseReg = MI->getOperand(1).getReg();
-      int MIOffset = MI->getOperand(2).getImm();
-      if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
-                                   (Offset + OffsetStride == MIOffset))) {
-        int MinOffset = Offset < MIOffset ? Offset : MIOffset;
-        // If this is a volatile load/store that otherwise matched, stop looking
-        // as something is going on that we don't have enough information to
-        // safely transform. Similarly, stop if we see a hint to avoid pairs.
-        if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
-          return E;
-        // If the resultant immediate offset of merging these instructions
-        // is out of range for a pairwise instruction, bail and keep looking.
-        bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
-        if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
-          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
-          continue;
-        }
-        // If the alignment requirements of the paired (scaled) instruction
-        // can't express the offset of the unscaled input, bail and keep
-        // looking.
-        if (IsUnscaled && EnableARM64UnscaledMemOp &&
-            (alignTo(MinOffset, OffsetStride) != MinOffset)) {
-          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
-          continue;
-        }
-        // If the destination register of the loads is the same register, bail
-        // and keep looking. A load-pair instruction with both destination
-        // registers the same is UNPREDICTABLE and will result in an exception.
-        if (mayLoad && Reg == MI->getOperand(0).getReg()) {
-          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
-          continue;
-        }
-
-        // If the Rt of the second instruction was not modified or used between
-        // the two instructions, we can combine the second into the first.
-        if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
-            !UsedRegs[MI->getOperand(0).getReg()]) {
-          mergeForward = false;
-          return MBBI;
-        }
-
-        // Likewise, if the Rt of the first instruction is not modified or used
-        // between the two instructions, we can combine the first into the
-        // second.
-        if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
-            !UsedRegs[FirstMI->getOperand(0).getReg()]) {
-          mergeForward = true;
-          return MBBI;
-        }
-        // Unable to combine these instructions due to interference in between.
-        // Keep looking.
-      }
-    }
-
-    // If the instruction wasn't a matching load or store, but does (or can)
-    // modify memory, stop searching, as we don't have alias analysis or
-    // anything like that to tell us whether the access is tromping on the
-    // locations we care about. The big one we want to catch is calls.
-    //
-    // FIXME: Theoretically, we can do better than that for SP and FP based
-    // references since we can effectively know where those are touching. It's
-    // unclear if it's worth the extra code, though. Most paired instructions
-    // will be sequential, perhaps with a few intervening non-memory related
-    // instructions.
-    if (MI->mayStore() || MI->isCall())
-      return E;
-    // Likewise, if we're matching a store instruction, we don't want to
-    // move across a load, as it may be reading the same location.
-    if (FirstMI->mayStore() && MI->mayLoad())
-      return E;
-
-    // Update modified / uses register lists.
-    trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
-
-    // Otherwise, if the base register is modified, we have no match, so
-    // return early.
-    if (ModifiedRegs[BaseReg])
-      return E;
-  }
-  return E;
-}
-
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
-                                         MachineBasicBlock::iterator Update) {
-  assert((Update->getOpcode() == ARM64::ADDXri ||
-          Update->getOpcode() == ARM64::SUBXri) &&
-         "Unexpected base register update instruction to merge!");
-  MachineBasicBlock::iterator NextI = I;
-  // Return the instruction following the merged instruction, which is
-  // the instruction following our unmerged load. Unless that's the add/sub
-  // instruction we're merging, in which case it's the one after that.
-  if (++NextI == Update)
-    ++NextI;
-
-  int Value = Update->getOperand(2).getImm();
-  assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
-         "Can't merge 1 << 12 offset into pre-indexed load / store");
-  if (Update->getOpcode() == ARM64::SUBXri)
-    Value = -Value;
-
-  unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
-  MachineInstrBuilder MIB =
-      BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
-          .addOperand(I->getOperand(0))
-          .addOperand(I->getOperand(1))
-          .addImm(Value);
-  (void)MIB;
-
-  DEBUG(dbgs() << "Creating pre-indexed load/store.");
-  DEBUG(dbgs() << "    Replacing instructions:\n    ");
-  DEBUG(I->print(dbgs()));
-  DEBUG(dbgs() << "    ");
-  DEBUG(Update->print(dbgs()));
-  DEBUG(dbgs() << "  with instruction:\n    ");
-  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
-  DEBUG(dbgs() << "\n");
-
-  // Erase the old instructions for the block.
-  I->eraseFromParent();
-  Update->eraseFromParent();
-
-  return NextI;
-}
-
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
-                                          MachineBasicBlock::iterator Update) {
-  assert((Update->getOpcode() == ARM64::ADDXri ||
-          Update->getOpcode() == ARM64::SUBXri) &&
-         "Unexpected base register update instruction to merge!");
-  MachineBasicBlock::iterator NextI = I;
-  // Return the instruction following the merged instruction, which is
-  // the instruction following our unmerged load. Unless that's the add/sub
-  // instruction we're merging, in which case it's the one after that.
-  if (++NextI == Update)
-    ++NextI;
-
-  int Value = Update->getOperand(2).getImm();
-  assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
-         "Can't merge 1 << 12 offset into post-indexed load / store");
-  if (Update->getOpcode() == ARM64::SUBXri)
-    Value = -Value;
-
-  unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
-  MachineInstrBuilder MIB =
-      BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
-          .addOperand(I->getOperand(0))
-          .addOperand(I->getOperand(1))
-          .addImm(Value);
-  (void)MIB;
-
-  DEBUG(dbgs() << "Creating post-indexed load/store.");
-  DEBUG(dbgs() << "    Replacing instructions:\n    ");
-  DEBUG(I->print(dbgs()));
-  DEBUG(dbgs() << "    ");
-  DEBUG(Update->print(dbgs()));
-  DEBUG(dbgs() << "  with instruction:\n    ");
-  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
-  DEBUG(dbgs() << "\n");
-
-  // Erase the old instructions for the block.
-  I->eraseFromParent();
-  Update->eraseFromParent();
-
-  return NextI;
-}
-
-static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
-                                 int Offset) {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case ARM64::SUBXri:
-    // Negate the offset for a SUB instruction.
-    Offset *= -1;
-  // FALLTHROUGH
-  case ARM64::ADDXri:
-    // Make sure it's a vanilla immediate operand, not a relocation or
-    // anything else we can't handle.
-    if (!MI->getOperand(2).isImm())
-      break;
-    // Watch out for 1 << 12 shifted value.
-    if (ARM64_AM::getShiftValue(MI->getOperand(3).getImm()))
-      break;
-    // If the instruction has the base register as source and dest and the
-    // immediate will fit in a signed 9-bit integer, then we have a match.
-    if (MI->getOperand(0).getReg() == BaseReg &&
-        MI->getOperand(1).getReg() == BaseReg &&
-        MI->getOperand(2).getImm() <= 255 &&
-        MI->getOperand(2).getImm() >= -256) {
-      // If we have a non-zero Offset, we check that it matches the amount
-      // we're adding to the register.
-      if (!Offset || Offset == MI->getOperand(2).getImm())
-        return true;
-    }
-    break;
-  }
-  return false;
-}
-
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
-                                                 unsigned Limit, int Value) {
-  MachineBasicBlock::iterator E = I->getParent()->end();
-  MachineInstr *MemMI = I;
-  MachineBasicBlock::iterator MBBI = I;
-  const MachineFunction &MF = *MemMI->getParent()->getParent();
-
-  unsigned DestReg = MemMI->getOperand(0).getReg();
-  unsigned BaseReg = MemMI->getOperand(1).getReg();
-  int Offset = MemMI->getOperand(2).getImm() *
-               TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
-
-  // If the base register overlaps the destination register, we can't
-  // merge the update.
-  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
-    return E;
-
-  // Scan forward looking for post-index opportunities.
-  // Updating instructions can't be formed if the memory insn already
-  // has an offset other than the value we're looking for.
-  if (Offset != Value)
-    return E;
-
-  // Track which registers have been modified and used between the first insn
-  // (inclusive) and the second insn.
-  BitVector ModifiedRegs, UsedRegs;
-  ModifiedRegs.resize(TRI->getNumRegs());
-  UsedRegs.resize(TRI->getNumRegs());
-  ++MBBI;
-  for (unsigned Count = 0; MBBI != E; ++MBBI) {
-    MachineInstr *MI = MBBI;
-    // Skip DBG_VALUE instructions. Otherwise debug info can affect the
-    // optimization by changing how far we scan.
-    if (MI->isDebugValue())
-      continue;
-
-    // Now that we know this is a real instruction, count it.
-    ++Count;
-
-    // If we found a match, return it.
-    if (isMatchingUpdateInsn(MI, BaseReg, Value))
-      return MBBI;
-
-    // Update the status of what the instruction clobbered and used.
-    trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
-
-    // Otherwise, if the base register is used or modified, we have no match, so
-    // return early.
-    if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
-      return E;
-  }
-  return E;
-}
-
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I,
-                                                  unsigned Limit) {
-  MachineBasicBlock::iterator B = I->getParent()->begin();
-  MachineBasicBlock::iterator E = I->getParent()->end();
-  MachineInstr *MemMI = I;
-  MachineBasicBlock::iterator MBBI = I;
-  const MachineFunction &MF = *MemMI->getParent()->getParent();
-
-  unsigned DestReg = MemMI->getOperand(0).getReg();
-  unsigned BaseReg = MemMI->getOperand(1).getReg();
-  int Offset = MemMI->getOperand(2).getImm();
-  unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
-
-  // If the load/store is the first instruction in the block, there's obviously
-  // not any matching update. Ditto if the memory offset isn't zero.
-  if (MBBI == B || Offset != 0)
-    return E;
-  // If the base register overlaps the destination register, we can't
-  // merge the update.
-  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
-    return E;
-
-  // Track which registers have been modified and used between the first insn
-  // (inclusive) and the second insn.
-  BitVector ModifiedRegs, UsedRegs;
-  ModifiedRegs.resize(TRI->getNumRegs());
-  UsedRegs.resize(TRI->getNumRegs());
-  --MBBI;
-  for (unsigned Count = 0; MBBI != B; --MBBI) {
-    MachineInstr *MI = MBBI;
-    // Skip DBG_VALUE instructions. Otherwise debug info can affect the
-    // optimization by changing how far we scan.
-    if (MI->isDebugValue())
-      continue;
-
-    // Now that we know this is a real instruction, count it.
-    ++Count;
-
-    // If we found a match, return it.
-    if (isMatchingUpdateInsn(MI, BaseReg, RegSize))
-      return MBBI;
-
-    // Update the status of what the instruction clobbered and used.
-    trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
-
-    // Otherwise, if the base register is used or modified, we have no match, so
-    // return early.
-    if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
-      return E;
-  }
-  return E;
-}
-
-bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
-  bool Modified = false;
-  // Two tranformations to do here:
-  // 1) Find loads and stores that can be merged into a single load or store
-  //    pair instruction.
-  //      e.g.,
-  //        ldr x0, [x2]
-  //        ldr x1, [x2, #8]
-  //        ; becomes
-  //        ldp x0, x1, [x2]
-  // 2) Find base register updates that can be merged into the load or store
-  //    as a base-reg writeback.
-  //      e.g.,
-  //        ldr x0, [x2]
-  //        add x2, x2, #4
-  //        ; becomes
-  //        ldr x0, [x2], #4
-
-  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
-       MBBI != E;) {
-    MachineInstr *MI = MBBI;
-    switch (MI->getOpcode()) {
-    default:
-      // Just move on to the next instruction.
-      ++MBBI;
-      break;
-    case ARM64::STRSui:
-    case ARM64::STRDui:
-    case ARM64::STRQui:
-    case ARM64::STRXui:
-    case ARM64::STRWui:
-    case ARM64::LDRSui:
-    case ARM64::LDRDui:
-    case ARM64::LDRQui:
-    case ARM64::LDRXui:
-    case ARM64::LDRWui:
-    // do the unscaled versions as well
-    case ARM64::STURSi:
-    case ARM64::STURDi:
-    case ARM64::STURQi:
-    case ARM64::STURWi:
-    case ARM64::STURXi:
-    case ARM64::LDURSi:
-    case ARM64::LDURDi:
-    case ARM64::LDURQi:
-    case ARM64::LDURWi:
-    case ARM64::LDURXi: {
-      // If this is a volatile load/store, don't mess with it.
-      if (MI->hasOrderedMemoryRef()) {
-        ++MBBI;
-        break;
-      }
-      // Make sure this is a reg+imm (as opposed to an address reloc).
-      if (!MI->getOperand(2).isImm()) {
-        ++MBBI;
-        break;
-      }
-      // Check if this load/store has a hint to avoid pair formation.
-      // MachineMemOperands hints are set by the ARM64StorePairSuppress pass.
-      if (TII->isLdStPairSuppressed(MI)) {
-        ++MBBI;
-        break;
-      }
-      // Look ahead up to ScanLimit instructions for a pairable instruction.
-      bool mergeForward = false;
-      MachineBasicBlock::iterator Paired =
-          findMatchingInsn(MBBI, mergeForward, ScanLimit);
-      if (Paired != E) {
-        // Merge the loads into a pair. Keeping the iterator straight is a
-        // pain, so we let the merge routine tell us what the next instruction
-        // is after it's done mucking about.
-        MBBI = mergePairedInsns(MBBI, Paired, mergeForward);
-
-        Modified = true;
-        ++NumPairCreated;
-        if (isUnscaledLdst(MI->getOpcode()))
-          ++NumUnscaledPairCreated;
-        break;
-      }
-      ++MBBI;
-      break;
-    }
-      // FIXME: Do the other instructions.
-    }
-  }
-
-  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
-       MBBI != E;) {
-    MachineInstr *MI = MBBI;
-    // Do update merging. It's simpler to keep this separate from the above
-    // switch, though not strictly necessary.
-    int Opc = MI->getOpcode();
-    switch (Opc) {
-    default:
-      // Just move on to the next instruction.
-      ++MBBI;
-      break;
-    case ARM64::STRSui:
-    case ARM64::STRDui:
-    case ARM64::STRQui:
-    case ARM64::STRXui:
-    case ARM64::STRWui:
-    case ARM64::LDRSui:
-    case ARM64::LDRDui:
-    case ARM64::LDRQui:
-    case ARM64::LDRXui:
-    case ARM64::LDRWui:
-    // do the unscaled versions as well
-    case ARM64::STURSi:
-    case ARM64::STURDi:
-    case ARM64::STURQi:
-    case ARM64::STURWi:
-    case ARM64::STURXi:
-    case ARM64::LDURSi:
-    case ARM64::LDURDi:
-    case ARM64::LDURQi:
-    case ARM64::LDURWi:
-    case ARM64::LDURXi: {
-      // Make sure this is a reg+imm (as opposed to an address reloc).
-      if (!MI->getOperand(2).isImm()) {
-        ++MBBI;
-        break;
-      }
-      // Look ahead up to ScanLimit instructions for a mergable instruction.
-      MachineBasicBlock::iterator Update =
-          findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
-      if (Update != E) {
-        // Merge the update into the ld/st.
-        MBBI = mergePostIdxUpdateInsn(MBBI, Update);
-        Modified = true;
-        ++NumPostFolded;
-        break;
-      }
-      // Don't know how to handle pre/post-index versions, so move to the next
-      // instruction.
-      if (isUnscaledLdst(Opc)) {
-        ++MBBI;
-        break;
-      }
-
-      // Look back to try to find a pre-index instruction. For example,
-      // add x0, x0, #8
-      // ldr x1, [x0]
-      //   merged into:
-      // ldr x1, [x0, #8]!
-      Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
-      if (Update != E) {
-        // Merge the update into the ld/st.
-        MBBI = mergePreIdxUpdateInsn(MBBI, Update);
-        Modified = true;
-        ++NumPreFolded;
-        break;
-      }
-
-      // Look forward to try to find a post-index instruction. For example,
-      // ldr x1, [x0, #64]
-      // add x0, x0, #64
-      //   merged into:
-      // ldr x1, [x0], #64
-
-      // The immediate in the load/store is scaled by the size of the register
-      // being loaded. The immediate in the add we're looking for,
-      // however, is not, so adjust here.
-      int Value = MI->getOperand(2).getImm() *
-                  TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
-                      ->getSize();
-      Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
-      if (Update != E) {
-        // Merge the update into the ld/st.
-        MBBI = mergePreIdxUpdateInsn(MBBI, Update);
-        Modified = true;
-        ++NumPreFolded;
-        break;
-      }
-
-      // Nothing found. Just move to the next instruction.
-      ++MBBI;
-      break;
-    }
-      // FIXME: Do the other instructions.
-    }
-  }
-
-  return Modified;
-}
-
-bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  // Early exit if pass disabled.
-  if (!DoLoadStoreOpt)
-    return false;
-
-  const TargetMachine &TM = Fn.getTarget();
-  TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
-  TRI = TM.getRegisterInfo();
-
-  bool Modified = false;
-  for (auto &MBB : Fn)
-    Modified |= optimizeBlock(MBB);
-
-  return Modified;
-}
-
-// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
-// loads and stores near one another?
-
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
-FunctionPass *llvm::createARM64LoadStoreOptimizationPass() {
-  return new ARM64LoadStoreOpt();
-}
diff --git a/lib/Target/ARM64/ARM64MCInstLower.cpp b/lib/Target/ARM64/ARM64MCInstLower.cpp
deleted file mode 100644
index 01dc229..0000000
--- a/lib/Target/ARM64/ARM64MCInstLower.cpp
+++ /dev/null
@@ -1,201 +0,0 @@
-//===-- ARM64MCInstLower.cpp - Convert ARM64 MachineInstr to an MCInst---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to lower ARM64 MachineInstrs to their corresponding
-// MCInst records.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64MCInstLower.h"
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/CodeGen.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-ARM64MCInstLower::ARM64MCInstLower(MCContext &ctx, Mangler &mang,
-                                   AsmPrinter &printer)
-    : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
-
-MCSymbol *
-ARM64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
-  return Printer.getSymbol(MO.getGlobal());
-}
-
-MCSymbol *
-ARM64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const {
-  return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
-}
-
-MCOperand ARM64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
-                                                     MCSymbol *Sym) const {
-  // FIXME: We would like an efficient form for this, so we don't have to do a
-  // lot of extra uniquing.
-  MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
-  if ((MO.getTargetFlags() & ARM64II::MO_GOT) != 0) {
-    if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
-      RefKind = MCSymbolRefExpr::VK_GOTPAGE;
-    else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
-             ARM64II::MO_PAGEOFF)
-      RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
-    else
-      assert(0 && "Unexpected target flags with MO_GOT on GV operand");
-  } else if ((MO.getTargetFlags() & ARM64II::MO_TLS) != 0) {
-    if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
-      RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
-    else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
-             ARM64II::MO_PAGEOFF)
-      RefKind = MCSymbolRefExpr::VK_TLVPPAGEOFF;
-    else
-      llvm_unreachable("Unexpected target flags with MO_TLS on GV operand");
-  } else {
-    if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
-      RefKind = MCSymbolRefExpr::VK_PAGE;
-    else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
-             ARM64II::MO_PAGEOFF)
-      RefKind = MCSymbolRefExpr::VK_PAGEOFF;
-  }
-  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
-  if (!MO.isJTI() && MO.getOffset())
-    Expr = MCBinaryExpr::CreateAdd(
-        Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
-  return MCOperand::CreateExpr(Expr);
-}
-
-MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
-                                                  MCSymbol *Sym) const {
-  uint32_t RefFlags = 0;
-
-  if (MO.getTargetFlags() & ARM64II::MO_GOT)
-    RefFlags |= ARM64MCExpr::VK_GOT;
-  else if (MO.getTargetFlags() & ARM64II::MO_TLS) {
-    TLSModel::Model Model;
-    if (MO.isGlobal()) {
-      const GlobalValue *GV = MO.getGlobal();
-      Model = Printer.TM.getTLSModel(GV);
-    } else {
-      assert(MO.isSymbol() &&
-             StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
-             "unexpected external TLS symbol");
-      Model = TLSModel::GeneralDynamic;
-    }
-    switch (Model) {
-    case TLSModel::InitialExec:
-      RefFlags |= ARM64MCExpr::VK_GOTTPREL;
-      break;
-    case TLSModel::LocalExec:
-      RefFlags |= ARM64MCExpr::VK_TPREL;
-      break;
-    case TLSModel::LocalDynamic:
-      RefFlags |= ARM64MCExpr::VK_DTPREL;
-      break;
-    case TLSModel::GeneralDynamic:
-      RefFlags |= ARM64MCExpr::VK_TLSDESC;
-      break;
-    }
-  } else {
-    // No modifier means this is a generic reference, classified as absolute for
-    // the cases where it matters (:abs_g0: etc).
-    RefFlags |= ARM64MCExpr::VK_ABS;
-  }
-
-  if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
-    RefFlags |= ARM64MCExpr::VK_PAGE;
-  else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGEOFF)
-    RefFlags |= ARM64MCExpr::VK_PAGEOFF;
-  else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G3)
-    RefFlags |= ARM64MCExpr::VK_G3;
-  else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G2)
-    RefFlags |= ARM64MCExpr::VK_G2;
-  else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G1)
-    RefFlags |= ARM64MCExpr::VK_G1;
-  else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G0)
-    RefFlags |= ARM64MCExpr::VK_G0;
-
-  if (MO.getTargetFlags() & ARM64II::MO_NC)
-    RefFlags |= ARM64MCExpr::VK_NC;
-
-  const MCExpr *Expr =
-      MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx);
-  if (!MO.isJTI() && MO.getOffset())
-    Expr = MCBinaryExpr::CreateAdd(
-        Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
-
-  ARM64MCExpr::VariantKind RefKind;
-  RefKind = static_cast<ARM64MCExpr::VariantKind>(RefFlags);
-  Expr = ARM64MCExpr::Create(Expr, RefKind, Ctx);
-
-  return MCOperand::CreateExpr(Expr);
-}
-
-MCOperand ARM64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
-                                               MCSymbol *Sym) const {
-  if (TargetTriple.isOSDarwin())
-    return lowerSymbolOperandDarwin(MO, Sym);
-
-  assert(TargetTriple.isOSBinFormatELF() && "Expect Darwin or ELF target");
-  return lowerSymbolOperandELF(MO, Sym);
-}
-
-bool ARM64MCInstLower::lowerOperand(const MachineOperand &MO,
-                                    MCOperand &MCOp) const {
-  switch (MO.getType()) {
-  default:
-    assert(0 && "unknown operand type");
-  case MachineOperand::MO_Register:
-    // Ignore all implicit register operands.
-    if (MO.isImplicit())
-      return false;
-    MCOp = MCOperand::CreateReg(MO.getReg());
-    break;
-  case MachineOperand::MO_RegisterMask:
-    // Regmasks are like implicit defs.
-    return false;
-  case MachineOperand::MO_Immediate:
-    MCOp = MCOperand::CreateImm(MO.getImm());
-    break;
-  case MachineOperand::MO_MachineBasicBlock:
-    MCOp = MCOperand::CreateExpr(
-        MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx));
-    break;
-  case MachineOperand::MO_GlobalAddress:
-    MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
-    break;
-  case MachineOperand::MO_ExternalSymbol:
-    MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
-    break;
-  case MachineOperand::MO_JumpTableIndex:
-    MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
-    break;
-  case MachineOperand::MO_ConstantPoolIndex:
-    MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
-    break;
-  case MachineOperand::MO_BlockAddress:
-    MCOp = LowerSymbolOperand(
-        MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress()));
-    break;
-  }
-  return true;
-}
-
-void ARM64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
-  OutMI.setOpcode(MI->getOpcode());
-
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MCOperand MCOp;
-    if (lowerOperand(MI->getOperand(i), MCOp))
-      OutMI.addOperand(MCOp);
-  }
-}
diff --git a/lib/Target/ARM64/ARM64MCInstLower.h b/lib/Target/ARM64/ARM64MCInstLower.h
deleted file mode 100644
index 7e3a2c8..0000000
--- a/lib/Target/ARM64/ARM64MCInstLower.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- ARM64MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64_MCINSTLOWER_H
-#define ARM64_MCINSTLOWER_H
-
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-class AsmPrinter;
-class MCAsmInfo;
-class MCContext;
-class MCInst;
-class MCOperand;
-class MCSymbol;
-class MachineInstr;
-class MachineModuleInfoMachO;
-class MachineOperand;
-class Mangler;
-
-/// ARM64MCInstLower - This class is used to lower an MachineInstr
-/// into an MCInst.
-class LLVM_LIBRARY_VISIBILITY ARM64MCInstLower {
-  MCContext &Ctx;
-  AsmPrinter &Printer;
-  Triple TargetTriple;
-
-public:
-  ARM64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer);
-
-  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
-  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
-
-  MCOperand lowerSymbolOperandDarwin(const MachineOperand &MO,
-                                     MCSymbol *Sym) const;
-  MCOperand lowerSymbolOperandELF(const MachineOperand &MO,
-                                  MCSymbol *Sym) const;
-  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-
-  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
-  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
-};
-}
-
-#endif
diff --git a/lib/Target/ARM64/ARM64MachineFunctionInfo.h b/lib/Target/ARM64/ARM64MachineFunctionInfo.h
deleted file mode 100644
index 02bf7cf..0000000
--- a/lib/Target/ARM64/ARM64MachineFunctionInfo.h
+++ /dev/null
@@ -1,139 +0,0 @@
-//===- ARM64MachineFuctionInfo.h - ARM64 machine function info --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares ARM64-specific per-machine-function information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64MACHINEFUNCTIONINFO_H
-#define ARM64MACHINEFUNCTIONINFO_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/MC/MCLinkerOptimizationHint.h"
-
-namespace llvm {
-
-/// ARM64FunctionInfo - This class is derived from MachineFunctionInfo and
-/// contains private ARM64-specific information for each MachineFunction.
-class ARM64FunctionInfo : public MachineFunctionInfo {
-
-  /// HasStackFrame - True if this function has a stack frame. Set by
-  /// processFunctionBeforeCalleeSavedScan().
-  bool HasStackFrame;
-
-  /// \brief Amount of stack frame size, not including callee-saved registers.
-  unsigned LocalStackSize;
-
-  /// \brief Number of TLS accesses using the special (combinable)
-  /// _TLS_MODULE_BASE_ symbol.
-  unsigned NumLocalDynamicTLSAccesses;
-
-  /// \brief FrameIndex for start of varargs area for arguments passed on the
-  /// stack.
-  int VarArgsStackIndex;
-
-  /// \brief FrameIndex for start of varargs area for arguments passed in
-  /// general purpose registers.
-  int VarArgsGPRIndex;
-
-  /// \brief Size of the varargs area for arguments passed in general purpose
-  /// registers.
-  unsigned VarArgsGPRSize;
-
-  /// \brief FrameIndex for start of varargs area for arguments passed in
-  /// floating-point registers.
-  int VarArgsFPRIndex;
-
-  /// \brief Size of the varargs area for arguments passed in floating-point
-  /// registers.
-  unsigned VarArgsFPRSize;
-
-public:
-  ARM64FunctionInfo()
-      : HasStackFrame(false), NumLocalDynamicTLSAccesses(0),
-        VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0),
-        VarArgsFPRIndex(0), VarArgsFPRSize(0) {}
-
-  explicit ARM64FunctionInfo(MachineFunction &MF)
-      : HasStackFrame(false), NumLocalDynamicTLSAccesses(0),
-        VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0),
-        VarArgsFPRIndex(0), VarArgsFPRSize(0) {
-    (void)MF;
-  }
-
-  bool hasStackFrame() const { return HasStackFrame; }
-  void setHasStackFrame(bool s) { HasStackFrame = s; }
-
-  void setLocalStackSize(unsigned Size) { LocalStackSize = Size; }
-  unsigned getLocalStackSize() const { return LocalStackSize; }
-
-  void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
-  unsigned getNumLocalDynamicTLSAccesses() const {
-    return NumLocalDynamicTLSAccesses;
-  }
-
-  int getVarArgsStackIndex() const { return VarArgsStackIndex; }
-  void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; }
-
-  int getVarArgsGPRIndex() const { return VarArgsGPRIndex; }
-  void setVarArgsGPRIndex(int Index) { VarArgsGPRIndex = Index; }
-
-  unsigned getVarArgsGPRSize() const { return VarArgsGPRSize; }
-  void setVarArgsGPRSize(unsigned Size) { VarArgsGPRSize = Size; }
-
-  int getVarArgsFPRIndex() const { return VarArgsFPRIndex; }
-  void setVarArgsFPRIndex(int Index) { VarArgsFPRIndex = Index; }
-
-  unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
-  void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
-
-  typedef SmallPtrSet<const MachineInstr *, 16> SetOfInstructions;
-
-  const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
-
-  // Shortcuts for LOH related types.
-  class MILOHDirective {
-    MCLOHType Kind;
-
-    /// Arguments of this directive. Order matters.
-    SmallVector<const MachineInstr *, 3> Args;
-
-  public:
-    typedef SmallVectorImpl<const MachineInstr *> LOHArgs;
-
-    MILOHDirective(MCLOHType Kind, const LOHArgs &Args)
-        : Kind(Kind), Args(Args.begin(), Args.end()) {
-      assert(isValidMCLOHType(Kind) && "Invalid LOH directive type!");
-    }
-
-    MCLOHType getKind() const { return Kind; }
-    const LOHArgs &getArgs() const { return Args; }
-  };
-
-  typedef MILOHDirective::LOHArgs MILOHArgs;
-  typedef SmallVector<MILOHDirective, 32> MILOHContainer;
-
-  const MILOHContainer &getLOHContainer() const { return LOHContainerSet; }
-
-  /// Add a LOH directive of this @p Kind and this @p Args.
-  void addLOHDirective(MCLOHType Kind, const MILOHArgs &Args) {
-    LOHContainerSet.push_back(MILOHDirective(Kind, Args));
-    LOHRelated.insert(Args.begin(), Args.end());
-  }
-
-private:
-  // Hold the lists of LOHs.
-  MILOHContainer LOHContainerSet;
-  SetOfInstructions LOHRelated;
-};
-} // End llvm namespace
-
-#endif // ARM64MACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM64/ARM64PerfectShuffle.h b/lib/Target/ARM64/ARM64PerfectShuffle.h
deleted file mode 100644
index 6759236..0000000
--- a/lib/Target/ARM64/ARM64PerfectShuffle.h
+++ /dev/null
@@ -1,6586 +0,0 @@
-//===-- ARM64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file, which was autogenerated by llvm-PerfectShuffle, contains data
-// for the optimal way to build a perfect shuffle using AdvSIMD instructions.
-//
-//===----------------------------------------------------------------------===//
-
-// 31 entries have cost 0
-// 242 entries have cost 1
-// 1447 entries have cost 2
-// 3602 entries have cost 3
-// 1237 entries have cost 4
-// 2 entries have cost 5
-
-// This table is 6561*4 = 26244 bytes in size.
-static const unsigned PerfectShuffleTable[6561+1] = {
-  135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
-  1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
-  2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
-  2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
-  1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
-  2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
-  2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
-  2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
-  135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
-  2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
-  1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
-  1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
-  2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
-  2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
-  2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
-  3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
-  2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
-  1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
-  3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
-  3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
-  1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
-  2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
-  3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
-  3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
-  2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
-  2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
-  1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
-  2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
-  2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
-  2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
-  2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
-  2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
-  3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
-  3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
-  3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
-  2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
-  2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
-  2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
-  3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
-  3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
-  3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
-  1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
-  2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
-  3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
-  1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
-  2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
-  2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
-  3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
-  3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
-  2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
-  2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
-  2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
-  2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
-  2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
-  2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
-  3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
-  2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
-  3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
-  3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
-  3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
-  2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
-  2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
-  2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
-  2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
-  3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
-  3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
-  3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
-  2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
-  3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
-  3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
-  2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
-  2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
-  135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
-  1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
-  1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
-  2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
-  1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
-  1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
-  2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
-  2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
-  135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
-  2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
-  1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
-  2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
-  2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
-  2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
-  4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
-  2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
-  2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
-  1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
-  1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
-  2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
-  2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
-  2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
-  1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
-  2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
-  2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
-  2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
-  1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
-  1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
-  3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
-  2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
-  835584U, // <0,1,2,3>: Cost 0 copy LHS
-  1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
-  3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
-  2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
-  1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
-  835584U, // <0,1,2,u>: Cost 0 copy LHS
-  2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
-  2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
-  2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
-  2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
-  2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
-  2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
-  2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
-  2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
-  2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
-  2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
-  4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
-  2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
-  2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
-  2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
-  1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
-  2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
-  2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
-  1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
-  3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
-  2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
-  3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
-  2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
-  2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
-  2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
-  2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
-  2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
-  2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
-  2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
-  3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
-  2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
-  3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
-  2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
-  3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
-  2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
-  1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
-  1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
-  2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
-  2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
-  2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
-  2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
-  2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
-  3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
-  2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
-  2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
-  2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
-  1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
-  1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
-  2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
-  835584U, // <0,1,u,3>: Cost 0 copy LHS
-  1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
-  1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
-  2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
-  1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
-  835584U, // <0,1,u,u>: Cost 0 copy LHS
-  2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
-  1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
-  1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
-  2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
-  2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
-  2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
-  2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
-  2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
-  1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
-  2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
-  2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
-  2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
-  2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
-  2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
-  2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
-  2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
-  3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
-  2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
-  1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
-  2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
-  2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
-  3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
-  1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
-  2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
-  2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
-  2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
-  1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
-  2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
-  2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
-  3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
-  2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
-  2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
-  2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
-  3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
-  2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
-  2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
-  2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
-  4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
-  2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
-  2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
-  2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
-  1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
-  1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
-  2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
-  1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
-  2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
-  2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
-  3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
-  3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
-  2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
-  2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
-  2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
-  2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
-  2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
-  2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
-  3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
-  2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
-  2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
-  2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
-  3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
-  2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
-  2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
-  2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
-  2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
-  2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
-  3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
-  2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
-  2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
-  2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
-  3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
-  2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
-  2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
-  1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
-  1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
-  1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
-  3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
-  1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
-  1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
-  1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
-  2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
-  1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
-  2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
-  2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
-  2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
-  4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
-  2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
-  4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
-  3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
-  3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
-  2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
-  2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
-  2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
-  2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
-  2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
-  2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
-  4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
-  3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
-  3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
-  2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
-  1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
-  1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
-  2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
-  3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
-  1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
-  3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
-  2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
-  2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
-  1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
-  3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
-  2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
-  3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
-  2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
-  3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
-  3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
-  3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
-  3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
-  2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
-  3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
-  2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
-  2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
-  4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
-  3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
-  2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
-  3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
-  3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
-  2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
-  3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
-  3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
-  4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
-  3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
-  2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
-  3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
-  2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
-  2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
-  2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
-  2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
-  3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
-  3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
-  3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
-  3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
-  3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
-  3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
-  2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
-  2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
-  3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
-  2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
-  3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
-  3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
-  3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
-  3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
-  3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
-  3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
-  2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
-  1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
-  1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
-  2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
-  3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
-  1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
-  3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
-  2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
-  2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
-  1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
-  2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
-  2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
-  2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
-  3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
-  2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
-  2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
-  3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
-  3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
-  3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
-  2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
-  3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
-  3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
-  2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
-  2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
-  1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
-  3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
-  2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
-  1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
-  2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
-  2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
-  3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
-  2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
-  2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
-  2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
-  1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
-  2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
-  1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
-  3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
-  3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
-  3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
-  3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
-  3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
-  3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
-  4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
-  3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
-  3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
-  2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
-  3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
-  3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
-  3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
-  2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
-  2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
-  2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
-  3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
-  2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
-  2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
-  2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
-  3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
-  2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
-  2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
-  2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
-  1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
-  2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
-  1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
-  2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
-  3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
-  2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
-  3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
-  2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
-  3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
-  2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
-  2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
-  2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
-  3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
-  3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
-  3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
-  3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
-  3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
-  2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
-  2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
-  3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
-  2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
-  2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
-  2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
-  2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
-  2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
-  2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
-  1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
-  1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
-  2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
-  1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
-  3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
-  2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
-  3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
-  3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
-  3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
-  3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
-  2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
-  3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
-  2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
-  1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
-  2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
-  2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
-  2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
-  1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
-  2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
-  2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
-  2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
-  1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
-  2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
-  2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
-  3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
-  2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
-  2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
-  3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
-  4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
-  3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
-  3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
-  3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
-  3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
-  3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
-  3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
-  2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
-  3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
-  3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
-  2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
-  2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
-  2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
-  3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
-  3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
-  3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
-  3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
-  2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
-  3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
-  2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
-  2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
-  3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
-  3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
-  3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
-  3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
-  3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
-  2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
-  2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
-  2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
-  2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
-  2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
-  3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
-  3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
-  3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
-  2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
-  2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
-  3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
-  1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
-  1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
-  2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
-  2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
-  2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
-  3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
-  2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
-  2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
-  2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
-  2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
-  2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
-  1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
-  2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
-  2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
-  2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
-  1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
-  2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
-  2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
-  1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
-  1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
-  2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
-  2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
-  2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
-  3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
-  2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
-  2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
-  2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
-  2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
-  2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
-  2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
-  3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
-  2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
-  3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
-  2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
-  2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
-  2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
-  2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
-  2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
-  1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
-  2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
-  2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
-  2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
-  1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
-  2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
-  3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
-  2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
-  1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
-  3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
-  3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
-  3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
-  3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
-  2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
-  3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
-  3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
-  2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
-  2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
-  2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
-  4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
-  2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
-  3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
-  2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
-  2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
-  2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
-  4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
-  2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
-  2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
-  3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
-  3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
-  3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
-  3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
-  3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
-  3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
-  4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
-  2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
-  3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
-  3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
-  3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
-  3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
-  3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
-  3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
-  2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
-  2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
-  2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
-  2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
-  2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
-  3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
-  2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
-  2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
-  2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
-  3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
-  2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
-  2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
-  1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
-  2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
-  2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
-  2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
-  1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
-  2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
-  2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
-  2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
-  1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
-  2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
-  2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
-  2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
-  3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
-  2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
-  2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
-  2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
-  3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
-  2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
-  2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
-  3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
-  2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
-  3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
-  2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
-  3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
-  3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
-  2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
-  2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
-  2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
-  1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
-  3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
-  2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
-  2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
-  2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
-  2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
-  3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
-  1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
-  3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
-  3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
-  3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
-  3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
-  3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
-  3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
-  2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
-  3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
-  2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
-  3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
-  2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
-  2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
-  3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
-  3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
-  2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
-  3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
-  3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
-  2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
-  3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
-  3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
-  3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
-  3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
-  2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
-  3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
-  2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
-  2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
-  2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
-  3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
-  2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
-  3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
-  3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
-  3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
-  2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
-  3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
-  2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
-  2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
-  3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
-  3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
-  3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
-  3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
-  3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
-  3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
-  3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
-  2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
-  2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
-  2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
-  1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
-  2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
-  2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
-  2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
-  2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
-  2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
-  2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
-  1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
-  135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
-  1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
-  1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
-  2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
-  1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
-  2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
-  3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
-  2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
-  135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
-  1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
-  1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
-  1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
-  2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
-  1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
-  1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
-  2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
-  2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
-  1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
-  1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
-  1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
-  1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
-  835584U, // <0,u,2,3>: Cost 0 copy LHS
-  1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
-  3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
-  1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
-  1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
-  835584U, // <0,u,2,u>: Cost 0 copy LHS
-  2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
-  2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
-  2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
-  2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
-  2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
-  2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
-  2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
-  2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
-  2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
-  2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
-  2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
-  3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
-  2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
-  2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
-  1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
-  1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
-  2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
-  1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
-  2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
-  2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
-  3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
-  2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
-  2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
-  2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
-  1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
-  2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
-  1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
-  2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
-  2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
-  2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
-  2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
-  2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
-  2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
-  2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
-  1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
-  1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
-  2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
-  2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
-  2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
-  2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
-  2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
-  2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
-  2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
-  2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
-  2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
-  135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
-  1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
-  1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
-  835584U, // <0,u,u,3>: Cost 0 copy LHS
-  1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
-  1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
-  1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
-  1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
-  835584U, // <0,u,u,u>: Cost 0 copy LHS
-  2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
-  1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
-  2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
-  2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
-  2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
-  2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
-  3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
-  3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
-  1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
-  2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
-  2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
-  1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
-  3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
-  2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
-  2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
-  2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
-  3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
-  1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
-  2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
-  2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
-  2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
-  2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
-  2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
-  3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
-  2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
-  2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
-  2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
-  3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
-  3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
-  67944550U, // <1,0,3,2>: Cost 1 vrev LHS
-  2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
-  2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
-  4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
-  3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
-  2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
-  68386972U, // <1,0,3,u>: Cost 1 vrev LHS
-  2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
-  2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
-  2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
-  3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
-  3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
-  2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
-  3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
-  3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
-  2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
-  4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
-  2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
-  3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
-  3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
-  2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
-  3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
-  3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
-  3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
-  3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
-  3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
-  2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
-  3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
-  3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
-  3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
-  3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
-  3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
-  2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
-  2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
-  2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
-  4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
-  2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
-  3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
-  3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
-  3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
-  2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
-  3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
-  2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
-  3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
-  1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
-  67985515U, // <1,0,u,2>: Cost 1 vrev LHS
-  2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
-  2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
-  2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
-  2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
-  2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
-  68427937U, // <1,0,u,u>: Cost 1 vrev LHS
-  1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
-  1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
-  2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
-  2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
-  2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
-  2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
-  3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
-  3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
-  1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
-  1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
-  202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
-  2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
-  2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
-  1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
-  2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
-  2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
-  2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
-  202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
-  2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
-  2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
-  2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
-  2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
-  2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
-  3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
-  2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
-  3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
-  2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
-  2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
-  3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
-  4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
-  2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
-  2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
-  2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
-  3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
-  2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
-  2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
-  2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
-  2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
-  3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
-  3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
-  2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
-  1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
-  2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
-  3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
-  1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
-  2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
-  2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
-  4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
-  2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
-  2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
-  2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
-  2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
-  2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
-  2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
-  3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
-  2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
-  2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
-  3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
-  3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
-  4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
-  2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
-  2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
-  2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
-  2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
-  2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
-  3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
-  4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
-  2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
-  3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
-  3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
-  2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
-  2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
-  1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
-  202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
-  2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
-  2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
-  1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
-  1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
-  2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
-  2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
-  202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
-  2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
-  1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
-  2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
-  2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
-  2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
-  3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
-  2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
-  2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
-  1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
-  2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
-  2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
-  2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
-  2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
-  2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
-  2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
-  2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
-  3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
-  2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
-  2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
-  2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
-  2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
-  2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
-  3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
-  3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
-  2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
-  2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
-  2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
-  403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
-  1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
-  1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
-  1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
-  403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
-  1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
-  1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
-  1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
-  403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
-  2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
-  3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
-  3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
-  2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
-  2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
-  1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
-  2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
-  2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
-  1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
-  2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
-  2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
-  3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
-  2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
-  2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
-  2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
-  2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
-  2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
-  2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
-  2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
-  2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
-  2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
-  2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
-  3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
-  3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
-  2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
-  2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
-  2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
-  1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
-  2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
-  3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
-  2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
-  2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
-  3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
-  3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
-  2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
-  1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
-  403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
-  1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
-  1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
-  1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
-  403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
-  1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
-  1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
-  1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
-  403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
-  2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
-  1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
-  2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
-  2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
-  2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
-  3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
-  3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
-  3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
-  1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
-  2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
-  2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
-  2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
-  1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
-  2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
-  2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
-  3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
-  2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
-  1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
-  2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
-  2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
-  2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
-  2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
-  2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
-  3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
-  2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
-  3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
-  2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
-  1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
-  1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
-  2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
-  2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
-  1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
-  2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
-  2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
-  3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
-  1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
-  2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
-  2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
-  3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
-  2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
-  2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
-  1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
-  2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
-  3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
-  1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
-  2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
-  2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
-  2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
-  2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
-  2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
-  2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
-  2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
-  1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
-  1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
-  2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
-  3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
-  2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
-  3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
-  2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
-  3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
-  2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
-  2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
-  2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
-  2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
-  2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
-  3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
-  2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
-  2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
-  2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
-  3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
-  2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
-  2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
-  1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
-  1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
-  2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
-  1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
-  1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
-  1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
-  2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
-  1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
-  1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
-  2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
-  2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
-  3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
-  2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
-  2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
-  1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
-  2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
-  3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
-  1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
-  3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
-  2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
-  3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
-  3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
-  3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
-  2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
-  3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
-  3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
-  2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
-  3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
-  3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
-  3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
-  2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
-  3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
-  2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
-  3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
-  3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
-  2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
-  2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
-  2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
-  2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
-  3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
-  2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
-  3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
-  3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
-  3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
-  3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
-  2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
-  3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
-  3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
-  3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
-  2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
-  2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
-  2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
-  3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
-  2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
-  2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
-  2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
-  2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
-  3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
-  2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
-  2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
-  1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
-  3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
-  1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
-  2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
-  2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
-  3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
-  2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
-  2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
-  3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
-  3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
-  2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
-  2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
-  2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
-  3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
-  3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
-  3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
-  3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
-  3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
-  2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
-  3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
-  2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
-  2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
-  2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
-  2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
-  2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
-  2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
-  1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
-  1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
-  2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
-  1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
-  2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
-  1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
-  2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
-  3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
-  1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
-  2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
-  2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
-  3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
-  1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
-  2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
-  2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
-  2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
-  2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
-  2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
-  2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
-  2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
-  3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
-  2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
-  3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
-  2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
-  2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
-  2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
-  3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
-  3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
-  2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
-  4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
-  2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
-  2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
-  2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
-  3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
-  2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
-  2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
-  3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
-  4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
-  2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
-  2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
-  2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
-  2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
-  3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
-  3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
-  2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
-  1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
-  2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
-  2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
-  1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
-  2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
-  2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
-  3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
-  4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
-  2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
-  2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
-  2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
-  2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
-  2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
-  2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
-  2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
-  2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
-  3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
-  3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
-  3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
-  2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
-  2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
-  2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
-  2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
-  2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
-  2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
-  2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
-  2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
-  2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
-  3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
-  2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
-  2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
-  2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
-  1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
-  2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
-  2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
-  1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
-  1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
-  2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
-  2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
-  2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
-  3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
-  2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
-  2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
-  3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
-  2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
-  2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
-  2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
-  4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
-  2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
-  3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
-  2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
-  3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
-  3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
-  2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
-  3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
-  3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
-  2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
-  2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
-  2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
-  3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
-  3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
-  3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
-  3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
-  3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
-  2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
-  2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
-  2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
-  2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
-  3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
-  2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
-  3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
-  2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
-  4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
-  2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
-  3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
-  3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
-  3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
-  3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
-  3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
-  3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
-  3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
-  2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
-  3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
-  4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
-  2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
-  3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
-  3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
-  3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
-  3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
-  3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
-  3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
-  2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
-  2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
-  2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
-  2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
-  3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
-  3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
-  3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
-  3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
-  3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
-  2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
-  2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
-  2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
-  1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
-  2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
-  2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
-  3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
-  2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
-  2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
-  2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
-  4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
-  1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
-  1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
-  2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
-  3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
-  3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
-  2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
-  2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
-  2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
-  3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
-  1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
-  2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
-  2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
-  3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
-  3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
-  2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
-  3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
-  2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
-  2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
-  2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
-  2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
-  2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
-  3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
-  2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
-  2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
-  2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
-  2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
-  2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
-  2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
-  2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
-  3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
-  3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
-  2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
-  2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
-  3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
-  2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
-  3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
-  2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
-  1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
-  2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
-  2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
-  2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
-  1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
-  1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
-  2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
-  2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
-  1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
-  2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
-  3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
-  3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
-  3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
-  3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
-  2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
-  3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
-  3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
-  2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
-  2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
-  2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
-  3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
-  2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
-  2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
-  4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
-  2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
-  3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
-  2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
-  2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
-  2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
-  3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
-  3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
-  3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
-  3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
-  3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
-  2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
-  2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
-  2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
-  3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
-  3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
-  3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
-  3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
-  2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
-  3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
-  2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
-  2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
-  1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
-  2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
-  2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
-  2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
-  1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
-  1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
-  2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
-  2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
-  1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
-  1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
-  1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
-  2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
-  2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
-  1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
-  1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
-  2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
-  2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
-  1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
-  1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
-  202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
-  1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
-  1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
-  1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
-  2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
-  3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
-  2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
-  202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
-  2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
-  2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
-  2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
-  2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
-  2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
-  2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
-  2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
-  2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
-  2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
-  403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
-  1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
-  115726126U, // <1,u,3,2>: Cost 1 vrev LHS
-  2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
-  403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
-  1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
-  1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
-  2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
-  403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
-  2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
-  2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
-  2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
-  2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
-  2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
-  1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
-  2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
-  2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
-  1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
-  2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
-  2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
-  3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
-  2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
-  2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
-  2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
-  1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
-  1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
-  1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
-  2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
-  2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
-  2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
-  2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
-  2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
-  3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
-  2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
-  2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
-  2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
-  1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
-  2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
-  2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
-  2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
-  2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
-  2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
-  2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
-  2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
-  1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
-  403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
-  202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
-  115767091U, // <1,u,u,2>: Cost 1 vrev LHS
-  1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
-  403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
-  1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
-  1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
-  1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
-  403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
-  2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
-  2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
-  1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
-  2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
-  2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
-  3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
-  2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
-  4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
-  1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
-  2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
-  2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
-  1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
-  2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
-  2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
-  3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
-  2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
-  3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
-  1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
-  1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
-  2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
-  2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
-  2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
-  1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
-  2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
-  2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
-  2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
-  1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
-  2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
-  2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
-  2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
-  4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
-  3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
-  3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
-  4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
-  3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
-  2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
-  2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
-  2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
-  2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
-  3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
-  2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
-  2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
-  2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
-  3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
-  2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
-  3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
-  3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
-  2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
-  3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
-  2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
-  3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
-  3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
-  2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
-  2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
-  4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
-  2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
-  2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
-  3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
-  2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
-  3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
-  3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
-  3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
-  2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
-  2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
-  3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
-  2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
-  3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
-  3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
-  3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
-  3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
-  3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
-  2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
-  1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
-  2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
-  1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
-  2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
-  1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
-  2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
-  2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
-  2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
-  1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
-  2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
-  2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
-  3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
-  1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
-  2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
-  2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
-  3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
-  2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
-  1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
-  2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
-  2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
-  2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
-  2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
-  2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
-  3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
-  3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
-  3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
-  2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
-  2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
-  3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
-  2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
-  2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
-  2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
-  3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
-  3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
-  2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
-  2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
-  2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
-  2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
-  2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
-  2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
-  2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
-  2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
-  2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
-  4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
-  2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
-  2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
-  3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
-  3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
-  1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
-  3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
-  2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
-  3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
-  3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
-  1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
-  2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
-  2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
-  3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
-  2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
-  2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
-  3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
-  3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
-  3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
-  2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
-  2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
-  3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
-  2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
-  3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
-  2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
-  4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
-  3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
-  3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
-  2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
-  2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
-  3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
-  3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
-  2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
-  3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
-  3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
-  3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
-  3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
-  2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
-  2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
-  2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
-  2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
-  1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
-  2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
-  2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
-  2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
-  2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
-  1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
-  1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
-  1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
-  2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
-  2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
-  2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
-  3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
-  3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
-  3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
-  1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
-  2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
-  2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
-  2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
-  2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
-  3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
-  2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
-  3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
-  3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
-  2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
-  1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
-  2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
-  269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
-  2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
-  1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
-  2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
-  2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
-  2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
-  269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
-  2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
-  2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
-  2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
-  1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
-  2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
-  3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
-  2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
-  2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
-  1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
-  2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
-  3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
-  2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
-  3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
-  2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
-  1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
-  2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
-  3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
-  1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
-  3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
-  2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
-  2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
-  4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
-  2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
-  2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
-  2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
-  2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
-  2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
-  3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
-  3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
-  2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
-  2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
-  3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
-  3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
-  3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
-  2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
-  2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
-  2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
-  3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
-  2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
-  4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
-  2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
-  3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
-  3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
-  2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
-  2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
-  1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
-  1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
-  269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
-  1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
-  1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
-  1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
-  2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
-  2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
-  269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
-  1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
-  470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
-  1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
-  2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
-  1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
-  2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
-  2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
-  2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
-  470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
-  1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
-  1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
-  1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
-  1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
-  2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
-  1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
-  2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
-  2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
-  1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
-  2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
-  2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
-  1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
-  1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
-  2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
-  2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
-  1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
-  2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
-  1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
-  1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
-  2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
-  1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
-  1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
-  1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
-  2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
-  2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
-  2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
-  1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
-  1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
-  1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
-  2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
-  2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
-  1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
-  470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
-  1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
-  2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
-  470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
-  2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
-  1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
-  2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
-  2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
-  1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
-  1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
-  1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
-  1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
-  1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
-  2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
-  2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
-  1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
-  2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
-  2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
-  2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
-  1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
-  1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
-  1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
-  1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
-  2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
-  2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
-  2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
-  1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
-  2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
-  2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
-  1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
-  1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
-  1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
-  470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
-  1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
-  1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
-  1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
-  470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
-  1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
-  1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
-  470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
-  2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
-  2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
-  2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
-  2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
-  2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
-  2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
-  1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
-  3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
-  1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
-  2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
-  3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
-  2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
-  2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
-  3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
-  3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
-  3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
-  3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
-  2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
-  2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
-  3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
-  2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
-  2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
-  2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
-  2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
-  3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
-  3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
-  2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
-  2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
-  4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
-  3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
-  3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
-  3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
-  2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
-  2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
-  3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
-  2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
-  2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
-  3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
-  3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
-  3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
-  2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
-  2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
-  2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
-  4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
-  2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
-  2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
-  2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
-  2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
-  2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
-  2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
-  3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
-  1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
-  2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
-  1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
-  1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
-  2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
-  2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
-  2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
-  1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
-  2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
-  2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
-  2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
-  1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
-  2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
-  3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
-  3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
-  3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
-  3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
-  2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
-  2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
-  3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
-  2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
-  1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
-  2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
-  2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
-  2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
-  1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
-  2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
-  1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
-  2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
-  1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
-  3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
-  2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
-  2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
-  2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
-  2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
-  2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
-  3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
-  3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
-  2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
-  3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
-  3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
-  2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
-  2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
-  2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
-  3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
-  3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
-  2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
-  2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
-  3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
-  4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
-  2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
-  3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
-  2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
-  3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
-  3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
-  3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
-  3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
-  2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
-  2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
-  2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
-  3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
-  2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
-  2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
-  2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
-  2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
-  2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
-  2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
-  3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
-  2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
-  2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
-  2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
-  2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
-  3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
-  3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
-  2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
-  2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
-  3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
-  3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
-  3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
-  2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
-  2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
-  4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
-  2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
-  2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
-  2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
-  3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
-  2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
-  3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
-  2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
-  3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
-  4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
-  3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
-  3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
-  2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
-  2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
-  2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
-  3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
-  2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
-  2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
-  2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
-  4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
-  2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
-  2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
-  2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
-  2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
-  2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
-  2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
-  2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
-  2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
-  2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
-  2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
-  2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
-  1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
-  2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
-  3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
-  1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
-  3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
-  2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
-  2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
-  1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
-  2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
-  2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
-  2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
-  2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
-  2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
-  2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
-  3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
-  2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
-  2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
-  2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
-  2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
-  2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
-  2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
-  2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
-  2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
-  2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
-  2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
-  2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
-  2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
-  3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
-  2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
-  2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
-  2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
-  3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
-  3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
-  1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
-  1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
-  2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
-  2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
-  2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
-  3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
-  2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
-  1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
-  2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
-  4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
-  1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
-  3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
-  2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
-  2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
-  3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
-  2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
-  2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
-  2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
-  2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
-  2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
-  2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
-  3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
-  2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
-  3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
-  2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
-  3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
-  2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
-  2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
-  2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
-  2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
-  2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
-  2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
-  2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
-  2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
-  3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
-  2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
-  2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
-  2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
-  2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
-  1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
-  2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
-  2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
-  1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
-  1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
-  2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
-  1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
-  1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
-  2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
-  1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
-  2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
-  2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
-  2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
-  2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
-  2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
-  2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
-  1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
-  2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
-  3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
-  3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
-  3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
-  3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
-  3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
-  2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
-  3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
-  2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
-  3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
-  3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
-  2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
-  3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
-  3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
-  2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
-  2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
-  3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
-  2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
-  1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
-  2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
-  2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
-  2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
-  1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
-  2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
-  1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
-  2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
-  1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
-  2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
-  3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
-  3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
-  2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
-  3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
-  2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
-  2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
-  3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
-  2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
-  2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
-  3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
-  3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
-  3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
-  2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
-  3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
-  3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
-  2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
-  2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
-  2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
-  3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
-  2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
-  3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
-  2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
-  2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
-  2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
-  3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
-  2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
-  2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
-  3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
-  3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
-  3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
-  2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
-  3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
-  2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
-  2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
-  2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
-  1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
-  1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
-  2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
-  2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
-  1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
-  2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
-  1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
-  2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
-  1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
-  1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
-  470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
-  1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
-  1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
-  1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
-  2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
-  1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
-  3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
-  470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
-  1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
-  1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
-  1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
-  1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
-  2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
-  1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
-  2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
-  2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
-  1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
-  1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
-  2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
-  269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
-  1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
-  1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
-  2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
-  1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
-  3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
-  269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
-  1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
-  2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
-  1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
-  1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
-  1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
-  2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
-  1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
-  1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
-  1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
-  1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
-  1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
-  2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
-  1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
-  1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
-  470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
-  1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
-  3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
-  470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
-  2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
-  1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
-  2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
-  2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
-  1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
-  1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
-  1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
-  1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
-  1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
-  1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
-  2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
-  1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
-  2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
-  1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
-  2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
-  1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
-  1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
-  1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
-  1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
-  2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
-  2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
-  2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
-  1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
-  2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
-  2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
-  1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
-  1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
-  1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
-  470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
-  269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
-  1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
-  1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
-  470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
-  1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
-  1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
-  470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
-  1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
-  1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
-  1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
-  3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
-  2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
-  3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
-  3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
-  3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
-  1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
-  1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
-  2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
-  537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
-  2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
-  1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
-  2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
-  2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
-  2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
-  537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
-  1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
-  2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
-  2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
-  2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
-  1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
-  2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
-  2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
-  2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
-  1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
-  2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
-  2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
-  2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
-  2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
-  2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
-  3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
-  2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
-  3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
-  2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
-  2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
-  1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
-  1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
-  3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
-  2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
-  1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
-  2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
-  3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
-  1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
-  2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
-  2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
-  3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
-  3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
-  2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
-  2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
-  2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
-  2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
-  3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
-  2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
-  2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
-  2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
-  3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
-  3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
-  3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
-  2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
-  2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
-  2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
-  2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
-  2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
-  2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
-  3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
-  2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
-  3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
-  3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
-  2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
-  2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
-  1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
-  1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
-  537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
-  2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
-  1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
-  1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
-  2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
-  2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
-  537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
-  2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
-  2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
-  2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
-  1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
-  2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
-  2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
-  3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
-  3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
-  1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
-  2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
-  1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
-  2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
-  1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
-  2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
-  2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
-  3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
-  3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
-  1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
-  2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
-  2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
-  2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
-  1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
-  2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
-  2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
-  3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
-  2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
-  1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
-  1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
-  1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
-  2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
-  2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
-  1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
-  1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
-  2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
-  2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
-  1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
-  2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
-  2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
-  2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
-  2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
-  2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
-  2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
-  2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
-  3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
-  2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
-  2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
-  2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
-  2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
-  1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
-  2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
-  3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
-  3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
-  2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
-  1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
-  3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
-  2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
-  2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
-  2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
-  3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
-  2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
-  3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
-  2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
-  2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
-  3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
-  2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
-  4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
-  3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
-  3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
-  2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
-  3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
-  3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
-  3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
-  1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
-  1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
-  2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
-  1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
-  1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
-  1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
-  2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
-  2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
-  1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
-  2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
-  1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
-  2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
-  2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
-  2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
-  2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
-  2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
-  2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
-  1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
-  1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
-  2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
-  2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
-  2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
-  2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
-  2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
-  2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
-  3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
-  1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
-  2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
-  2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
-  1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
-  1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
-  2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
-  2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
-  2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
-  3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
-  1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
-  1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
-  2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
-  2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
-  2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
-  1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
-  2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
-  2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
-  2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
-  1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
-  2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
-  2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
-  2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
-  2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
-  2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
-  1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
-  2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
-  2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
-  1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
-  2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
-  2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
-  2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
-  2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
-  1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
-  2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
-  2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
-  2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
-  1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
-  2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
-  2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
-  2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
-  1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
-  2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
-  2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
-  2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
-  2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
-  1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
-  2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
-  2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
-  4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
-  2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
-  2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
-  2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
-  2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
-  2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
-  2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
-  1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
-  1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
-  1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
-  1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
-  1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
-  1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
-  2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
-  2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
-  1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
-  2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
-  1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
-  2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
-  2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
-  2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
-  2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
-  3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
-  4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
-  1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
-  2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
-  1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
-  2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
-  2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
-  2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
-  2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
-  3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
-  2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
-  1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
-  2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
-  2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
-  1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
-  2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
-  2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
-  2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
-  2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
-  2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
-  1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
-  1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
-  2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
-  2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
-  336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
-  1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
-  2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
-  2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
-  2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
-  336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
-  2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
-  2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
-  2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
-  2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
-  2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
-  1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
-  2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
-  4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
-  1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
-  2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
-  2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
-  2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
-  2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
-  2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
-  3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
-  2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
-  2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
-  2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
-  2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
-  3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
-  2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
-  2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
-  2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
-  3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
-  2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
-  2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
-  2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
-  2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
-  2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
-  2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
-  2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
-  2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
-  2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
-  3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
-  3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
-  2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
-  1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
-  1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
-  1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
-  336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
-  1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
-  1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
-  2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
-  2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
-  336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
-  2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
-  1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
-  2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
-  3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
-  2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
-  1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
-  1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
-  3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
-  1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
-  2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
-  2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
-  1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
-  2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
-  2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
-  2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
-  2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
-  2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
-  1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
-  3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
-  2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
-  2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
-  2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
-  2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
-  2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
-  2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
-  2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
-  2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
-  2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
-  2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
-  2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
-  2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
-  2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
-  2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
-  3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
-  3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
-  2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
-  2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
-  2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
-  2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
-  2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
-  1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
-  1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
-  1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
-  3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
-  1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
-  1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
-  2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
-  1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
-  2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
-  1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
-  2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
-  537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
-  2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
-  537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
-  2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
-  2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
-  2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
-  2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
-  1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
-  2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
-  2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
-  2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
-  1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
-  2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
-  3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
-  2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
-  3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
-  2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
-  2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
-  2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
-  2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
-  2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
-  1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
-  1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
-  1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
-  2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
-  1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
-  1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
-  537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
-  2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
-  537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
-  3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
-  2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
-  2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
-  3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
-  2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
-  2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
-  2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
-  2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
-  2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
-  2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
-  3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
-  2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
-  2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
-  2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
-  2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
-  3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
-  1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
-  1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
-  3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
-  2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
-  3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
-  2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
-  2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
-  2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
-  3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
-  2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
-  2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
-  3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
-  2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
-  3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
-  2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
-  2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
-  2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
-  4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
-  3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
-  3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
-  2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
-  3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
-  2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
-  3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
-  2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
-  2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
-  2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
-  1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
-  1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
-  2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
-  2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
-  2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
-  2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
-  2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
-  1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
-  2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
-  1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
-  1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
-  2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
-  2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
-  2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
-  2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
-  2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
-  2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
-  2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
-  1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
-  1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
-  1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
-  1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
-  2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
-  2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
-  1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
-  1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
-  2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
-  2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
-  1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
-  1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
-  1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
-  2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
-  2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
-  1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
-  1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
-  2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
-  1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
-  1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
-  2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
-  2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
-  2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
-  3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
-  2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
-  2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
-  2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
-  2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
-  2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
-  3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
-  3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
-  2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
-  3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
-  2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
-  3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
-  2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
-  2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
-  2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
-  2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
-  2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
-  2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
-  3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
-  2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
-  2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
-  2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
-  1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
-  1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
-  3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
-  3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
-  3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
-  2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
-  2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
-  3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
-  3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
-  2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
-  2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
-  3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
-  2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
-  2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
-  3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
-  2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
-  2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
-  2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
-  2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
-  2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
-  2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
-  3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
-  2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
-  2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
-  2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
-  3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
-  4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
-  2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
-  2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
-  2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
-  2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
-  2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
-  3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
-  2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
-  2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
-  1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
-  1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
-  1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
-  1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
-  2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
-  2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
-  2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
-  1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
-  2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
-  2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
-  2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
-  1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
-  1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
-  2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
-  2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
-  2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
-  1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
-  2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
-  1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
-  1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
-  1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
-  2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
-  1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
-  2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
-  3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
-  2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
-  2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
-  2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
-  2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
-  1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
-  2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
-  2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
-  2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
-  2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
-  2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
-  1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
-  2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
-  2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
-  1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
-  2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
-  3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
-  2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
-  2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
-  2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
-  2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
-  1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
-  2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
-  1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
-  2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
-  2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
-  2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
-  2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
-  2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
-  2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
-  2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
-  2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
-  2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
-  2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
-  2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
-  2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
-  3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
-  2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
-  1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
-  2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
-  2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
-  1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
-  2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
-  2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
-  3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
-  2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
-  2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
-  2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
-  2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
-  2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
-  2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
-  2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
-  2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
-  2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
-  2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
-  2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
-  2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
-  2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
-  2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
-  2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
-  2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
-  2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
-  2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
-  2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
-  2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
-  2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
-  2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
-  1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
-  1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
-  2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
-  1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
-  2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
-  2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
-  2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
-  1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
-  1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
-  1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
-  1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
-  1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
-  1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
-  1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
-  1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
-  2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
-  1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
-  1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
-  2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
-  1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
-  1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
-  1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
-  537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
-  1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
-  1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
-  1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
-  2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
-  1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
-  537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
-  1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
-  2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
-  1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
-  1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
-  1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
-  2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
-  1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
-  1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
-  1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
-  1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
-  1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
-  2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
-  336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
-  1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
-  1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
-  2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
-  2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
-  336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
-  2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
-  1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
-  1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
-  2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
-  1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
-  1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
-  1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
-  1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
-  1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
-  1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
-  2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
-  1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
-  1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
-  1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
-  1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
-  537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
-  1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
-  537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
-  2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
-  2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
-  2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
-  1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
-  1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
-  2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
-  1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
-  1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
-  1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
-  1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
-  1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
-  2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
-  3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
-  1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
-  1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
-  2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
-  1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
-  1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
-  1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
-  1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
-  537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
-  336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
-  1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
-  1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
-  537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
-  1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
-  537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
-  2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
-  2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
-  2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
-  3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
-  2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
-  3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
-  3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
-  3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
-  2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
-  2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
-  2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
-  1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
-  2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
-  2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
-  2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
-  3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
-  2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
-  1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
-  2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
-  3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
-  2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
-  2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
-  2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
-  3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
-  2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
-  3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
-  2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
-  3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
-  2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
-  2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
-  3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
-  3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
-  3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
-  3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
-  3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
-  2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
-  2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
-  2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
-  2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
-  3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
-  2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
-  2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
-  3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
-  3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
-  2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
-  2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
-  1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
-  2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
-  2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
-  2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
-  3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
-  3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
-  2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
-  1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
-  2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
-  3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
-  1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
-  2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
-  2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
-  2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
-  3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
-  2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
-  1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
-  3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
-  3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
-  4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
-  3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
-  3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
-  3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
-  3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
-  3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
-  4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
-  2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
-  1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
-  1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
-  2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
-  2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
-  2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
-  2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
-  2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
-  1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
-  2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
-  2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
-  3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
-  2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
-  2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
-  3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
-  3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
-  3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
-  2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
-  3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
-  2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
-  2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
-  2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
-  3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
-  3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
-  3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
-  3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
-  2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
-  2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
-  2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
-  3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
-  1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
-  2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
-  3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
-  3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
-  3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
-  1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
-  2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
-  2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
-  2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
-  2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
-  2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
-  2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
-  2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
-  3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
-  2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
-  3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
-  3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
-  3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
-  3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
-  3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
-  2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
-  3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
-  3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
-  2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
-  1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
-  2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
-  2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
-  2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
-  1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
-  2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
-  2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
-  2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
-  1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
-  2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
-  2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
-  4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
-  3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
-  2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
-  2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
-  4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
-  3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
-  2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
-  2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
-  3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
-  3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
-  2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
-  3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
-  3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
-  3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
-  3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
-  2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
-  1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
-  2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
-  2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
-  1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
-  1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
-  2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
-  2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
-  2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
-  1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
-  2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
-  2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
-  2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
-  3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
-  2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
-  3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
-  2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
-  3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
-  2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
-  3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
-  3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
-  3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
-  3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
-  2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
-  3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
-  3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
-  3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
-  2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
-  3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
-  3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
-  2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
-  2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
-  3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
-  3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
-  3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
-  3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
-  2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
-  2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
-  3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
-  3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
-  2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
-  2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
-  2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
-  3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
-  2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
-  2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
-  2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
-  2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
-  2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
-  2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
-  2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
-  2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
-  2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
-  3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
-  2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
-  2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
-  2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
-  2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
-  2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
-  2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
-  3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
-  2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
-  3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
-  2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
-  1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
-  2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
-  2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
-  2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
-  1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
-  2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
-  2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
-  2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
-  1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
-  2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
-  3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
-  3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
-  2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
-  4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
-  3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
-  3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
-  3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
-  2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
-  1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
-  2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
-  2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
-  2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
-  1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
-  2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
-  2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
-  2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
-  1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
-  3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
-  2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
-  2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
-  3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
-  3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
-  2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
-  3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
-  3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
-  2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
-  2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
-  3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
-  2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
-  2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
-  3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
-  3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
-  3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
-  3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
-  2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
-  3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
-  2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
-  3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
-  2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
-  3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
-  2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
-  3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
-  3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
-  2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
-  3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
-  3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
-  3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
-  2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
-  2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
-  3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
-  3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
-  3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
-  2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
-  2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
-  2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
-  2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
-  3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
-  2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
-  2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
-  2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
-  3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
-  2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
-  2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
-  2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
-  2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
-  2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
-  2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
-  4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
-  3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
-  2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
-  2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
-  2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
-  2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
-  2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
-  2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
-  2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
-  1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
-  4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
-  2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
-  1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
-  3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
-  3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
-  3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
-  3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
-  3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
-  3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
-  3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
-  3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
-  3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
-  2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
-  2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
-  2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
-  2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
-  2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
-  1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
-  2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
-  2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
-  1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
-  2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
-  1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
-  2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
-  3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
-  2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
-  2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
-  2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
-  3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
-  1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
-  2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
-  2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
-  2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
-  2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
-  2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
-  2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
-  3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
-  3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
-  2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
-  3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
-  3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
-  2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
-  2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
-  2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
-  3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
-  2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
-  3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
-  2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
-  2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
-  3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
-  3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
-  2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
-  2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
-  3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
-  3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
-  3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
-  2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
-  1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
-  2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
-  2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
-  2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
-  161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
-  1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
-  2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
-  2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
-  161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
-  2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
-  2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
-  3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
-  2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
-  2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
-  1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
-  1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
-  2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
-  1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
-  2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
-  3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
-  2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
-  2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
-  2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
-  2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
-  1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
-  2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
-  1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
-  2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
-  3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
-  3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
-  3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
-  2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
-  2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
-  2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
-  2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
-  2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
-  1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
-  1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
-  2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
-  2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
-  161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
-  1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
-  1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
-  2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
-  161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
-  2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
-  1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
-  2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
-  2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
-  2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
-  3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
-  2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
-  2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
-  1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
-  2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
-  2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
-  2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
-  2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
-  3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
-  2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
-  3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
-  2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
-  2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
-  2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
-  3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
-  2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
-  1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
-  2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
-  3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
-  2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
-  4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
-  1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
-  2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
-  3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
-  2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
-  2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
-  2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
-  2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
-  2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
-  2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
-  2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
-  2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
-  3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
-  2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
-  2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
-  2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
-  1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
-  2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
-  2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
-  1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
-  1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
-  2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
-  2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
-  2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
-  1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
-  2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
-  2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
-  2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
-  1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
-  1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
-  2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
-  2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
-  1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
-  1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
-  3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
-  2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
-  27705344U, // <4,5,6,7>: Cost 0 copy RHS
-  27705344U, // <4,5,6,u>: Cost 0 copy RHS
-  2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
-  2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
-  2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
-  2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
-  2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
-  2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
-  2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
-  2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
-  2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
-  1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
-  1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
-  2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
-  1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
-  1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
-  1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
-  2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
-  27705344U, // <4,5,u,7>: Cost 0 copy RHS
-  27705344U, // <4,5,u,u>: Cost 0 copy RHS
-  2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
-  1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
-  1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
-  3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
-  2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
-  3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
-  2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
-  4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
-  1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
-  2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
-  2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
-  2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
-  2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
-  3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
-  2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
-  3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
-  4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
-  2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
-  2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
-  2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
-  2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
-  2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
-  2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
-  2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
-  2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
-  2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
-  2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
-  2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
-  2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
-  3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
-  2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
-  2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
-  2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
-  3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
-  2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
-  2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
-  2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
-  2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
-  2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
-  2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
-  2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
-  1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
-  1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
-  2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
-  1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
-  2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
-  2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
-  2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
-  3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
-  2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
-  2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
-  2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
-  2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
-  2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
-  1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
-  2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
-  2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
-  2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
-  1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
-  2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
-  2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
-  2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
-  1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
-  2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
-  2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
-  3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
-  2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
-  2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
-  2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
-  3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
-  2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
-  2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
-  1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
-  1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
-  1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
-  2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
-  1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
-  1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
-  1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
-  2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
-  1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
-  3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
-  2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
-  2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
-  3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
-  3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
-  2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
-  3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
-  3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
-  2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
-  2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
-  3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
-  2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
-  3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
-  3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
-  3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
-  3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
-  3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
-  2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
-  3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
-  3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
-  3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
-  2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
-  3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
-  2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
-  3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
-  3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
-  2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
-  3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
-  3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
-  3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
-  3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
-  3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
-  2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
-  2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
-  3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
-  2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
-  2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
-  3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
-  3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
-  3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
-  2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
-  2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
-  2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
-  3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
-  2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
-  2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
-  3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
-  2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
-  3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
-  2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
-  2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
-  2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
-  2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
-  2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
-  1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
-  3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
-  2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
-  2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
-  1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
-  1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
-  2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
-  3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
-  1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
-  3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
-  3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
-  3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
-  3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
-  3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
-  2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
-  3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
-  2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
-  2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
-  1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
-  2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
-  2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
-  2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
-  1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
-  1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
-  2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
-  3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
-  1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
-  2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
-  1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
-  1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
-  2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
-  2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
-  2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
-  2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
-  2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
-  1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
-  2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
-  2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
-  1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
-  2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
-  2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
-  2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
-  3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
-  2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
-  1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
-  2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
-  2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
-  2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
-  1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
-  2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
-  2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
-  2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
-  2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
-  1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
-  2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
-  2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
-  2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
-  2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
-  2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
-  2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
-  2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
-  2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
-  2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
-  1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
-  2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
-  2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
-  2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
-  161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
-  1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
-  1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
-  2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
-  161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
-  1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
-  1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
-  2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
-  3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
-  1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
-  1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
-  1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
-  3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
-  1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
-  1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
-  2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
-  1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
-  1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
-  1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
-  1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
-  1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
-  27705344U, // <4,u,6,7>: Cost 0 copy RHS
-  27705344U, // <4,u,6,u>: Cost 0 copy RHS
-  2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
-  2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
-  2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
-  2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
-  2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
-  2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
-  2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
-  2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
-  2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
-  1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
-  1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
-  1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
-  1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
-  161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
-  1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
-  1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
-  27705344U, // <4,u,u,7>: Cost 0 copy RHS
-  27705344U, // <4,u,u,u>: Cost 0 copy RHS
-  2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
-  2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
-  2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
-  3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
-  2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
-  3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
-  3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
-  3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
-  2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
-  2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
-  2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
-  1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
-  3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
-  2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
-  2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
-  3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
-  3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
-  1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
-  2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
-  2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
-  2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
-  2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
-  2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
-  2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
-  2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
-  2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
-  2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
-  3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
-  3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
-  2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
-  3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
-  2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
-  3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
-  3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
-  3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
-  2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
-  2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
-  1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
-  2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
-  3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
-  2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
-  2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
-  3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
-  2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
-  1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
-  3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
-  2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
-  3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
-  3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
-  3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
-  2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
-  3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
-  3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
-  2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
-  4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
-  2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
-  3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
-  3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
-  3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
-  3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
-  3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
-  2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
-  2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
-  2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
-  2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
-  2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
-  3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
-  2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
-  2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
-  2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
-  4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
-  2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
-  2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
-  1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
-  1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
-  2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
-  2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
-  2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
-  2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
-  2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
-  1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
-  2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
-  1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
-  2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
-  2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
-  2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
-  2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
-  3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
-  3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
-  1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
-  2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
-  2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
-  2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
-  2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
-  2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
-  2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
-  3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
-  3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
-  2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
-  3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
-  3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
-  2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
-  2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
-  2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
-  2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
-  2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
-  3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
-  2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
-  2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
-  2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
-  3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
-  2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
-  2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
-  2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
-  3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
-  2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
-  2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
-  1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
-  2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
-  2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
-  2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
-  2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
-  1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
-  2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
-  3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
-  1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
-  2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
-  2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
-  3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
-  2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
-  2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
-  2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
-  2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
-  2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
-  2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
-  2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
-  3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
-  2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
-  3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
-  2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
-  2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
-  2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
-  2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
-  2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
-  2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
-  2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
-  2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
-  2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
-  2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
-  2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
-  4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
-  2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
-  2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
-  1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
-  1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
-  2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
-  2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
-  2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
-  1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
-  2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
-  2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
-  2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
-  3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
-  2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
-  2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
-  3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
-  3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
-  3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
-  3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
-  3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
-  2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
-  2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
-  3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
-  3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
-  2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
-  3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
-  3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
-  3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
-  3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
-  2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
-  3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
-  3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
-  2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
-  2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
-  2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
-  3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
-  3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
-  3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
-  2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
-  2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
-  2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
-  2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
-  3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
-  1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
-  2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
-  3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
-  3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
-  1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
-  2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
-  3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
-  2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
-  2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
-  2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
-  2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
-  2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
-  3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
-  2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
-  2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
-  3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
-  3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
-  2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
-  2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
-  2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
-  3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
-  3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
-  2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
-  3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
-  3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
-  2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
-  2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
-  2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
-  3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
-  3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
-  3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
-  2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
-  2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
-  2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
-  2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
-  2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
-  2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
-  4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
-  3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
-  3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
-  2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
-  2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
-  2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
-  2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
-  2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
-  1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
-  2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
-  2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
-  3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
-  1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
-  3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
-  2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
-  3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
-  3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
-  2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
-  3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
-  3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
-  2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
-  2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
-  3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
-  3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
-  2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
-  2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
-  3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
-  2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
-  3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
-  2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
-  2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
-  3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
-  3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
-  3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
-  2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
-  2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
-  2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
-  3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
-  3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
-  2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
-  3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
-  2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
-  3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
-  2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
-  2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
-  2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
-  3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
-  3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
-  2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
-  2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
-  3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
-  2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
-  2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
-  2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
-  2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
-  3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
-  2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
-  2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
-  2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
-  2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
-  2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
-  2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
-  2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
-  2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
-  3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
-  2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
-  2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
-  2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
-  3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
-  2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
-  2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
-  2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
-  3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
-  4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
-  2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
-  2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
-  1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
-  1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
-  2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
-  2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
-  1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
-  2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
-  2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
-  3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
-  1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
-  1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
-  1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
-  2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
-  2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
-  1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
-  2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
-  2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
-  2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
-  1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
-  3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
-  2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
-  3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
-  3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
-  2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
-  2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
-  2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
-  3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
-  2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
-  2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
-  3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
-  3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
-  3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
-  3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
-  2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
-  3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
-  3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
-  2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
-  3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
-  3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
-  3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
-  2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
-  3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
-  2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
-  3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
-  3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
-  2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
-  3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
-  3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
-  2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
-  3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
-  2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
-  3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
-  2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
-  3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
-  2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
-  2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
-  2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
-  3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
-  3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
-  2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
-  1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
-  2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
-  2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
-  1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
-  2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
-  2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
-  2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
-  3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
-  2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
-  2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
-  1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
-  3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
-  1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
-  2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
-  2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
-  3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
-  2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
-  2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
-  2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
-  3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
-  2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
-  2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
-  2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
-  2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
-  2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
-  2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
-  2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
-  3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
-  94817590U, // <5,4,7,6>: Cost 1 vrev RHS
-  2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
-  94965064U, // <5,4,7,u>: Cost 1 vrev RHS
-  2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
-  2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
-  2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
-  2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
-  2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
-  1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
-  94825783U, // <5,4,u,6>: Cost 1 vrev RHS
-  2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
-  94973257U, // <5,4,u,u>: Cost 1 vrev RHS
-  2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
-  1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
-  2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
-  3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
-  2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
-  2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
-  3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
-  4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
-  1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
-  2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
-  2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
-  2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
-  2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
-  2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
-  2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
-  3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
-  2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
-  2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
-  3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
-  3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
-  2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
-  2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
-  3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
-  2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
-  2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
-  4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
-  2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
-  2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
-  3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
-  3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
-  2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
-  2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
-  2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
-  3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
-  3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
-  2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
-  2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
-  2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
-  3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
-  3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
-  1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
-  1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
-  2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
-  2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
-  1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
-  1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
-  2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
-  2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
-  2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
-  1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
-  229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
-  2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
-  2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
-  229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
-  2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
-  3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
-  2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
-  3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
-  2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
-  2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
-  2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
-  2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
-  2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
-  2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
-  3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
-  2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
-  2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
-  2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
-  3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
-  4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
-  2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
-  2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
-  1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
-  1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
-  2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
-  2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
-  1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
-  229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
-  2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
-  2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
-  229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
-  2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
-  1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
-  2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
-  2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
-  2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
-  3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
-  3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
-  4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
-  1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
-  2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
-  2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
-  2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
-  2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
-  2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
-  2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
-  3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
-  2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
-  2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
-  3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
-  3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
-  2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
-  2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
-  2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
-  2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
-  2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
-  2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
-  2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
-  2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
-  3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
-  3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
-  2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
-  1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
-  2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
-  3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
-  4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
-  1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
-  2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
-  3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
-  2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
-  2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
-  2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
-  1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
-  2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
-  2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
-  1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
-  2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
-  2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
-  3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
-  3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
-  2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
-  2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
-  2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
-  2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
-  2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
-  2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
-  3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
-  2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
-  2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
-  2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
-  2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
-  2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
-  2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
-  2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
-  430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
-  1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
-  1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
-  1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
-  430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
-  1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
-  1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
-  1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
-  430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
-  430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
-  1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
-  1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
-  1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
-  430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
-  1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
-  1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
-  1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
-  430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
-  2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
-  1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
-  2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
-  2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
-  2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
-  2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
-  3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
-  2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
-  1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
-  2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
-  2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
-  2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
-  1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
-  2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
-  2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
-  2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
-  3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
-  1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
-  2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
-  3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
-  2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
-  2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
-  2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
-  2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
-  2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
-  3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
-  2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
-  2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
-  2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
-  3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
-  2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
-  2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
-  2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
-  3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
-  2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
-  2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
-  2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
-  2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
-  3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
-  2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
-  2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
-  1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
-  2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
-  2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
-  1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
-  2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
-  2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
-  2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
-  2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
-  2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
-  2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
-  2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
-  1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
-  1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
-  2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
-  3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
-  2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
-  2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
-  2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
-  2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
-  2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
-  2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
-  2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
-  1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
-  2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
-  2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
-  2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
-  1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
-  1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
-  2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
-  2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
-  1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
-  1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
-  1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
-  2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
-  1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
-  1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
-  1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
-  2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
-  1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
-  1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
-  2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
-  1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
-  2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
-  2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
-  2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
-  2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
-  2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
-  2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
-  1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
-  2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
-  2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
-  1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
-  1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
-  2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
-  2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
-  2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
-  2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
-  1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
-  2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
-  2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
-  2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
-  2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
-  2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
-  2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
-  2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
-  2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
-  2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
-  2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
-  2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
-  2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
-  2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
-  1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
-  2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
-  2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
-  2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
-  1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
-  1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
-  1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
-  2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
-  2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
-  1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
-  1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
-  2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
-  2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
-  1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
-  1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
-  2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
-  2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
-  2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
-  1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
-  229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
-  1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
-  1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
-  229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
-  2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
-  2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
-  2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
-  2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
-  2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
-  2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
-  2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
-  2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
-  2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
-  430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
-  1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
-  1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
-  2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
-  430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
-  1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
-  118708378U, // <5,u,7,6>: Cost 1 vrev RHS
-  2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
-  430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
-  430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
-  1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
-  1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
-  1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
-  430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
-  229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
-  118716571U, // <5,u,u,6>: Cost 1 vrev RHS
-  1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
-  430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
-  2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
-  2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
-  2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
-  3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
-  2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
-  3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
-  3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
-  3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
-  2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
-  2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
-  3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
-  1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
-  2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
-  2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
-  2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
-  2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
-  2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
-  1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
-  2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
-  2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
-  3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
-  2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
-  1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
-  3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
-  2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
-  3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
-  1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
-  3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
-  2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
-  2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
-  3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
-  2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
-  3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
-  3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
-  3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
-  2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
-  2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
-  2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
-  1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
-  3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
-  2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
-  2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
-  2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
-  2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
-  1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
-  3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
-  2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
-  3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
-  3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
-  2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
-  3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
-  2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
-  2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
-  2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
-  2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
-  2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
-  3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
-  3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
-  2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
-  3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
-  2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
-  2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
-  2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
-  2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
-  2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
-  2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
-  2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
-  2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
-  4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
-  3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
-  4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
-  2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
-  2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
-  2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
-  1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
-  2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
-  1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
-  2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
-  2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
-  2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
-  1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
-  2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
-  2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
-  3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
-  2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
-  2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
-  3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
-  2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
-  2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
-  2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
-  3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
-  2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
-  3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
-  2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
-  2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
-  3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
-  3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
-  3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
-  2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
-  2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
-  3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
-  3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
-  2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
-  2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
-  3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
-  2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
-  3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
-  2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
-  2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
-  2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
-  2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
-  3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
-  2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
-  2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
-  2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
-  3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
-  2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
-  2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
-  2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
-  3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
-  2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
-  3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
-  2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
-  3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
-  2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
-  2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
-  2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
-  3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
-  3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
-  2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
-  2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
-  3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
-  2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
-  3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
-  2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
-  2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
-  3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
-  3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
-  3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
-  2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
-  3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
-  2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
-  3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
-  3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
-  2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
-  2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
-  2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
-  3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
-  2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
-  2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
-  3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
-  4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
-  2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
-  2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
-  2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
-  2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
-  2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
-  2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
-  2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
-  2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
-  2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
-  2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
-  2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
-  1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
-  2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
-  3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
-  2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
-  3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
-  2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
-  3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
-  1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
-  2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
-  2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
-  2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
-  2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
-  3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
-  2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
-  2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
-  3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
-  2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
-  3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
-  3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
-  2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
-  2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
-  2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
-  3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
-  2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
-  3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
-  2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
-  2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
-  3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
-  2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
-  2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
-  2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
-  2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
-  2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
-  2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
-  2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
-  1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
-  3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
-  2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
-  2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
-  2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
-  1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
-  2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
-  3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
-  1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
-  3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
-  2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
-  3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
-  2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
-  2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
-  2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
-  2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
-  2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
-  2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
-  2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
-  2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
-  2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
-  2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
-  2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
-  2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
-  2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
-  2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
-  2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
-  2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
-  2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
-  2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
-  1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
-  2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
-  3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
-  2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
-  2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
-  1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
-  1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
-  1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
-  2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
-  1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
-  2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
-  1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
-  2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
-  2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
-  1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
-  3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
-  2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
-  2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
-  3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
-  2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
-  2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
-  3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
-  2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
-  2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
-  3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
-  3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
-  3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
-  3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
-  2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
-  3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
-  3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
-  3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
-  2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
-  3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
-  2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
-  3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
-  3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
-  3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
-  3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
-  2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
-  3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
-  2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
-  3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
-  3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
-  2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
-  2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
-  2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
-  3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
-  3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
-  3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
-  2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
-  2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
-  2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
-  2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
-  2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
-  2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
-  1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
-  2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
-  2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
-  1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
-  3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
-  3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
-  3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
-  3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
-  2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
-  3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
-  3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
-  3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
-  2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
-  2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
-  2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
-  2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
-  3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
-  2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
-  3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
-  2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
-  3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
-  2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
-  1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
-  2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
-  1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
-  2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
-  1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
-  2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
-  2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
-  2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
-  1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
-  1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
-  2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
-  1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
-  2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
-  1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
-  1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
-  2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
-  2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
-  1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
-  3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
-  2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
-  2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
-  3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
-  2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
-  2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
-  2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
-  3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
-  2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
-  2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
-  3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
-  3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
-  2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
-  3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
-  3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
-  2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
-  3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
-  2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
-  2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
-  3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
-  3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
-  3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
-  2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
-  2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
-  2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
-  3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
-  2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
-  3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
-  3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
-  3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
-  3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
-  2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
-  3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
-  2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
-  3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
-  2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
-  2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
-  3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
-  2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
-  3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
-  2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
-  2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
-  1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
-  2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
-  1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
-  2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
-  3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
-  2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
-  2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
-  2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
-  2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
-  1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
-  2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
-  1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
-  1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
-  2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
-  2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
-  2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
-  1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
-  2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
-  2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
-  2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
-  1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
-  2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
-  2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
-  3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
-  2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
-  2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
-  2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
-  2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
-  4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
-  2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
-  1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
-  2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
-  2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
-  2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
-  1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
-  2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
-  1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
-  2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
-  1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
-  3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
-  2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
-  3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
-  3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
-  3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
-  4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
-  3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
-  1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
-  1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
-  2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
-  3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
-  3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
-  3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
-  3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
-  3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
-  3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
-  2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
-  2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
-  2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
-  3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
-  3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
-  3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
-  2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
-  3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
-  2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
-  3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
-  2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
-  3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
-  3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
-  3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
-  3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
-  2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
-  3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
-  3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
-  2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
-  2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
-  2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
-  3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
-  3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
-  2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
-  2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
-  2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
-  3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
-  1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
-  1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
-  2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
-  3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
-  3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
-  3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
-  2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
-  2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
-  2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
-  2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
-  2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
-  2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
-  3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
-  3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
-  2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
-  2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
-  3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
-  2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
-  2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
-  2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
-  2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
-  2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
-  2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
-  3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
-  2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
-  2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
-  2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
-  3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
-  2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
-  2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
-  2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
-  2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
-  2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
-  2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
-  2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
-  2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
-  1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
-  1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
-  2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
-  1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
-  2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
-  3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
-  2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
-  3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
-  2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
-  4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
-  1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
-  2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
-  2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
-  2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
-  2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
-  3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
-  2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
-  2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
-  3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
-  2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
-  3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
-  3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
-  2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
-  2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
-  3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
-  3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
-  2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
-  2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
-  2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
-  2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
-  3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
-  3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
-  2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
-  2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
-  3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
-  2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
-  4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
-  2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
-  2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
-  3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
-  3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
-  3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
-  1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
-  1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
-  2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
-  2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
-  1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
-  3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
-  2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
-  3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
-  3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
-  2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
-  2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
-  2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
-  2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
-  2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
-  1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
-  2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
-  2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
-  2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
-  1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
-  2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
-  296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
-  2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
-  296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
-  2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
-  3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
-  2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
-  2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
-  2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
-  2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
-  2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
-  1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
-  1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
-  1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
-  1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
-  2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
-  2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
-  1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
-  1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
-  296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
-  1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
-  296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
-  1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
-  497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
-  1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
-  2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
-  1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
-  1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
-  2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
-  2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
-  497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
-  1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
-  1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
-  1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
-  1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
-  2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
-  1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
-  2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
-  2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
-  1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
-  2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
-  2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
-  1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
-  1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
-  2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
-  2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
-  1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
-  2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
-  1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
-  1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
-  2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
-  2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
-  1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
-  1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
-  2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
-  2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
-  2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
-  1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
-  1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
-  2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
-  2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
-  2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
-  1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
-  497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
-  1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
-  2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
-  497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
-  2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
-  1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
-  2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
-  2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
-  1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
-  1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
-  1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
-  1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
-  1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
-  2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
-  2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
-  1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
-  2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
-  2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
-  2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
-  1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
-  1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
-  1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
-  1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
-  2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
-  2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
-  2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
-  1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
-  2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
-  1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
-  1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
-  1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
-  1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
-  497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
-  1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
-  1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
-  1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
-  497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
-  1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
-  1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
-  497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
-  1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
-  497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
-  1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
-  2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
-  1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
-  1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
-  2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
-  1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
-  497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
-  1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
-  1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
-  1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
-  1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
-  2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
-  1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
-  2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
-  2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
-  1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
-  1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
-  2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
-  1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
-  1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
-  1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
-  2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
-  1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
-  2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
-  1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
-  1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
-  2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
-  2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
-  1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
-  1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
-  2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
-  2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
-  2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
-  1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
-  1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
-  2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
-  1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
-  2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
-  1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
-  497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
-  1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
-  1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
-  497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
-  2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
-  1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
-  2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
-  2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
-  1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
-  1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
-  1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
-  1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
-  1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
-  1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
-  2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
-  1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
-  2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
-  1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
-  2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
-  296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
-  1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
-  296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
-  1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
-  2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
-  1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
-  1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
-  1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
-  2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
-  1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
-  1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
-  1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
-  1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
-  497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
-  1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
-  1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
-  1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
-  497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
-  296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
-  1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
-  497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
-  1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
-  1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
-  1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
-  3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
-  2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
-  2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
-  2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
-  3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
-  1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
-  1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
-  2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
-  564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
-  2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
-  1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
-  2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
-  1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
-  2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
-  564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
-  1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
-  2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
-  2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
-  2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
-  1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
-  2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
-  2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
-  2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
-  1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
-  2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
-  2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
-  2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
-  2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
-  2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
-  2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
-  2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
-  3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
-  2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
-  2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
-  1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
-  1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
-  3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
-  2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
-  1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
-  2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
-  3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
-  1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
-  2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
-  2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
-  2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
-  2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
-  2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
-  2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
-  1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
-  2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
-  1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
-  2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
-  2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
-  2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
-  2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
-  3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
-  2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
-  2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
-  2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
-  2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
-  2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
-  2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
-  3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
-  3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
-  2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
-  2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
-  2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
-  2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
-  2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
-  1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
-  1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
-  564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
-  2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
-  1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
-  1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
-  1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
-  2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
-  564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
-  2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
-  2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
-  2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
-  1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
-  2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
-  2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
-  2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
-  3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
-  1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
-  2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
-  1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
-  3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
-  1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
-  2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
-  2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
-  2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
-  3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
-  1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
-  2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
-  2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
-  3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
-  1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
-  2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
-  2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
-  2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
-  2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
-  1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
-  2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
-  1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
-  2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
-  2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
-  2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
-  1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
-  2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
-  2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
-  1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
-  2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
-  2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
-  2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
-  2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
-  3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
-  2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
-  2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
-  3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
-  2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
-  2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
-  2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
-  3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
-  1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
-  2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
-  3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
-  2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
-  2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
-  1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
-  3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
-  2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
-  2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
-  2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
-  3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
-  2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
-  3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
-  2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
-  2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
-  2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
-  2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
-  3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
-  3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
-  2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
-  3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
-  3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
-  2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
-  3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
-  2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
-  1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
-  2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
-  1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
-  2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
-  1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
-  2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
-  2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
-  1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
-  2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
-  2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
-  2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
-  2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
-  2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
-  2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
-  2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
-  3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
-  2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
-  2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
-  3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
-  3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
-  2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
-  2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
-  3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
-  2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
-  2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
-  2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
-  2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
-  2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
-  1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
-  1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
-  2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
-  2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
-  2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
-  3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
-  1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
-  1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
-  2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
-  2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
-  2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
-  1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
-  2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
-  2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
-  2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
-  1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
-  2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
-  2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
-  2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
-  2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
-  2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
-  2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
-  2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
-  3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
-  2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
-  2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
-  3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
-  2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
-  2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
-  2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
-  3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
-  2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
-  3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
-  2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
-  2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
-  2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
-  2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
-  1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
-  2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
-  2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
-  2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
-  3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
-  1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
-  2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
-  3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
-  3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
-  2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
-  2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
-  3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
-  4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
-  2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
-  2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
-  1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
-  2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
-  1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
-  1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
-  1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
-  2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
-  2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
-  2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
-  1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
-  2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
-  1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
-  2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
-  2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
-  2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
-  2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
-  2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
-  3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
-  1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
-  2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
-  2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
-  2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
-  2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
-  2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
-  2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
-  3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
-  2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
-  2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
-  2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
-  2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
-  2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
-  2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
-  2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
-  2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
-  2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
-  2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
-  2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
-  2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
-  2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
-  2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
-  1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
-  2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
-  2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
-  2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
-  2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
-  1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
-  2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
-  2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
-  2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
-  2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
-  2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
-  1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
-  2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
-  3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
-  1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
-  2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
-  1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
-  2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
-  2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
-  2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
-  2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
-  2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
-  2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
-  1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
-  2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
-  2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
-  1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
-  2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
-  2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
-  3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
-  2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
-  2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
-  1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
-  2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
-  2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
-  2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
-  2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
-  2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
-  2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
-  2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
-  2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
-  2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
-  2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
-  1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
-  1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
-  1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
-  2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
-  1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
-  2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
-  2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
-  1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
-  2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
-  1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
-  2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
-  3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
-  2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
-  1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
-  1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
-  3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
-  1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
-  2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
-  2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
-  2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
-  2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
-  2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
-  2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
-  2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
-  3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
-  2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
-  3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
-  3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
-  2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
-  2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
-  3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
-  2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
-  2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
-  3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
-  2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
-  2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
-  3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
-  3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
-  2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
-  2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
-  2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
-  2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
-  3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
-  2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
-  2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
-  3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
-  3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
-  3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
-  1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
-  1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
-  1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
-  3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
-  1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
-  1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
-  2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
-  2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
-  2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
-  1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
-  2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
-  564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
-  2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
-  564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
-  2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
-  3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
-  2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
-  2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
-  1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
-  2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
-  2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
-  2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
-  1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
-  2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
-  3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
-  3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
-  3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
-  2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
-  2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
-  2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
-  2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
-  2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
-  1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
-  1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
-  2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
-  2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
-  1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
-  1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
-  564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
-  2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
-  564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
-  2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
-  2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
-  2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
-  3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
-  2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
-  2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
-  2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
-  2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
-  2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
-  2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
-  3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
-  3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
-  2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
-  2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
-  2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
-  2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
-  1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
-  1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
-  3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
-  3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
-  3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
-  2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
-  3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
-  2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
-  3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
-  2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
-  2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
-  3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
-  2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
-  3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
-  3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
-  3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
-  2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
-  3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
-  2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
-  2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
-  2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
-  3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
-  3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
-  3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
-  2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
-  2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
-  2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
-  1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
-  1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
-  2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
-  2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
-  3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
-  3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
-  2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
-  1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
-  2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
-  1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
-  1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
-  2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
-  2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
-  3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
-  2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
-  2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
-  2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
-  2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
-  1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
-  1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
-  1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
-  2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
-  2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
-  2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
-  1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
-  1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
-  2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
-  2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
-  1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
-  1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
-  2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
-  2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
-  2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
-  1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
-  1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
-  2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
-  1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
-  1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
-  2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
-  1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
-  2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
-  2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
-  2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
-  2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
-  2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
-  2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
-  1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
-  1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
-  2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
-  2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
-  2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
-  2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
-  2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
-  2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
-  2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
-  1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
-  2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
-  2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
-  2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
-  2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
-  2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
-  2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
-  2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
-  1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
-  1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
-  2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
-  2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
-  2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
-  2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
-  2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
-  2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
-  3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
-  2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
-  2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
-  2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
-  2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
-  2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
-  2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
-  2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
-  1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
-  2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
-  2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
-  1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
-  2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
-  2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
-  2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
-  3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
-  1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
-  2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
-  2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
-  2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
-  1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
-  2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
-  2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
-  2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
-  3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
-  2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
-  2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
-  1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
-  1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
-  1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
-  1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
-  2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
-  2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
-  2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
-  1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
-  2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
-  2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
-  2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
-  1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
-  1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
-  1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
-  2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
-  2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
-  1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
-  1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
-  1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
-  1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
-  1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
-  2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
-  1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
-  2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
-  3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
-  2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
-  2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
-  2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
-  2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
-  1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
-  2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
-  2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
-  2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
-  2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
-  2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
-  2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
-  2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
-  2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
-  2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
-  3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
-  3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
-  2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
-  2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
-  3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
-  3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
-  2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
-  2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
-  2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
-  2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
-  3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
-  3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
-  2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
-  2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
-  2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
-  2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
-  2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
-  2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
-  2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
-  3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
-  3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
-  3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
-  2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
-  1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
-  2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
-  2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
-  1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
-  2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
-  2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
-  3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
-  2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
-  2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
-  1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
-  2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
-  2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
-  1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
-  2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
-  2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
-  2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
-  2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
-  2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
-  2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
-  1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
-  2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
-  1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
-  1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
-  2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
-  2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
-  2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
-  1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
-  2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
-  2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
-  363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
-  363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
-  1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
-  1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
-  2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
-  2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
-  1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
-  1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
-  1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
-  363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
-  363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
-  1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
-  1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
-  1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
-  1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
-  2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
-  1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
-  1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
-  2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
-  1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
-  1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
-  1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
-  564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
-  1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
-  1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
-  2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
-  1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
-  1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
-  564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
-  1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
-  2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
-  1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
-  1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
-  1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
-  2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
-  2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
-  1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
-  1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
-  1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
-  1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
-  2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
-  1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
-  1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
-  1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
-  2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
-  2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
-  1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
-  2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
-  1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
-  1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
-  2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
-  1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
-  1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
-  1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
-  1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
-  1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
-  1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
-  1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
-  2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
-  1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
-  1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
-  1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
-  564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
-  1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
-  564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
-  2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
-  2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
-  1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
-  1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
-  1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
-  2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
-  1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
-  1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
-  1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
-  1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
-  2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
-  2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
-  2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
-  1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
-  1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
-  2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
-  363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
-  363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
-  1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
-  1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
-  564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
-  1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
-  1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
-  1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
-  564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
-  363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
-  564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
-  135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
-  1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
-  1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
-  2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
-  1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
-  2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
-  2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
-  2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
-  135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
-  1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
-  1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
-  537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
-  2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
-  1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
-  2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
-  1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
-  2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
-  537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
-  1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
-  2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
-  1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
-  2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
-  1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
-  2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
-  2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
-  2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
-  1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
-  2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
-  2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
-  72589981U, // <u,0,3,2>: Cost 1 vrev LHS
-  2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
-  2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
-  2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
-  2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
-  2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
-  73032403U, // <u,0,3,u>: Cost 1 vrev LHS
-  2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
-  1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
-  1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
-  3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
-  2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
-  1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
-  2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
-  2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
-  1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
-  2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
-  1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
-  2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
-  2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
-  2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
-  2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
-  1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
-  2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
-  1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
-  2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
-  2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
-  1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
-  2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
-  2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
-  2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
-  2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
-  2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
-  1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
-  2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
-  2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
-  2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
-  2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
-  2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
-  2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
-  2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
-  2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
-  2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
-  135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
-  1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
-  537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
-  2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
-  1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
-  1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
-  1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
-  2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
-  537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
-  1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
-  1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
-  2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
-  1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
-  2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
-  2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
-  2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
-  2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
-  1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
-  1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
-  202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
-  2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
-  1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
-  1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
-  2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
-  2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
-  2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
-  202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
-  1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
-  2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
-  2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
-  835584U, // <u,1,2,3>: Cost 0 copy LHS
-  1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
-  2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
-  2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
-  1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
-  835584U, // <u,1,2,u>: Cost 0 copy LHS
-  1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
-  1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
-  2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
-  2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
-  1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
-  1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
-  2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
-  2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
-  1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
-  1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
-  2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
-  2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
-  1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
-  2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
-  1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
-  2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
-  2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
-  1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
-  1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
-  2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
-  2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
-  1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
-  1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
-  2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
-  2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
-  2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
-  1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
-  2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
-  2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
-  2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
-  2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
-  2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
-  2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
-  2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
-  1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
-  1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
-  2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
-  2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
-  2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
-  2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
-  2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
-  2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
-  2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
-  2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
-  2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
-  1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
-  202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
-  2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
-  835584U, // <u,1,u,3>: Cost 0 copy LHS
-  1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
-  1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
-  2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
-  1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
-  835584U, // <u,1,u,u>: Cost 0 copy LHS
-  1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
-  1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
-  1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
-  2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
-  2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
-  2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
-  2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
-  2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
-  1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
-  1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
-  2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
-  2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
-  2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
-  2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
-  2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
-  2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
-  2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
-  1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
-  1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
-  2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
-  269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
-  1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
-  1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
-  2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
-  2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
-  2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
-  269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
-  408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
-  1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
-  1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
-  1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
-  408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
-  1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
-  1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
-  1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
-  408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
-  1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
-  2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
-  2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
-  2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
-  2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
-  1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
-  1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
-  2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
-  1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
-  2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
-  2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
-  2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
-  2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
-  1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
-  2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
-  2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
-  2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
-  1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
-  1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
-  2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
-  2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
-  1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
-  1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
-  2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
-  2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
-  2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
-  1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
-  1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
-  2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
-  2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
-  1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
-  2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
-  2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
-  2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
-  2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
-  1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
-  408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
-  1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
-  269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
-  1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
-  408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
-  1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
-  1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
-  1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
-  408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
-  1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
-  471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
-  1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
-  2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
-  1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
-  2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
-  2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
-  2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
-  471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
-  1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
-  1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
-  1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
-  1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
-  2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
-  1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
-  2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
-  2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
-  1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
-  1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
-  1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
-  1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
-  1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
-  1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
-  2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
-  1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
-  2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
-  1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
-  1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
-  1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
-  1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
-  336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
-  1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
-  2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
-  2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
-  2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
-  336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
-  1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
-  1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
-  2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
-  2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
-  1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
-  471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
-  1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
-  2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
-  471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
-  2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
-  1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
-  2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
-  2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
-  1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
-  1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
-  1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
-  1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
-  1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
-  2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
-  2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
-  1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
-  2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
-  2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
-  1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
-  1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
-  1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
-  1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
-  1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
-  1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
-  1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
-  2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
-  1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
-  2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
-  2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
-  1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
-  1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
-  1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
-  471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
-  1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
-  336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
-  1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
-  471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
-  1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
-  1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
-  471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
-  2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
-  1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
-  2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
-  2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
-  2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
-  1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
-  1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
-  3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
-  1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
-  2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
-  2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
-  1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
-  2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
-  2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
-  1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
-  2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
-  2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
-  1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
-  2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
-  2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
-  2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
-  2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
-  2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
-  2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
-  1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
-  2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
-  1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
-  2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
-  2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
-  2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
-  2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
-  2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
-  2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
-  2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
-  3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
-  2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
-  1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
-  2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
-  2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
-  2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
-  161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
-  1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
-  1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
-  2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
-  161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
-  1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
-  2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
-  1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
-  2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
-  1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
-  1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
-  537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
-  2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
-  537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
-  1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
-  2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
-  2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
-  2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
-  1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
-  2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
-  1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
-  2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
-  1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
-  2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
-  2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
-  2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
-  2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
-  2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
-  2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
-  96808489U, // <u,4,7,6>: Cost 1 vrev RHS
-  2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
-  96955963U, // <u,4,7,u>: Cost 1 vrev RHS
-  1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
-  1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
-  1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
-  2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
-  161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
-  1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
-  537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
-  2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
-  537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
-  2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
-  1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
-  2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
-  2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
-  1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
-  2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
-  2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
-  1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
-  1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
-  1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
-  2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
-  2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
-  2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
-  1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
-  2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
-  2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
-  1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
-  1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
-  2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
-  2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
-  2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
-  1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
-  2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
-  2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
-  2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
-  3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
-  1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
-  2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
-  2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
-  2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
-  2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
-  2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
-  3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
-  2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
-  2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
-  2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
-  2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
-  2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
-  2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
-  2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
-  1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
-  1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
-  2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
-  1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
-  1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
-  1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
-  2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
-  2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
-  2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
-  1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
-  229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
-  2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
-  1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
-  229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
-  1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
-  2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
-  2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
-  1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
-  1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
-  2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
-  2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
-  27705344U, // <u,5,6,7>: Cost 0 copy RHS
-  27705344U, // <u,5,6,u>: Cost 0 copy RHS
-  1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
-  1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
-  2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
-  2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
-  1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
-  1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
-  2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
-  2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
-  1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
-  1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
-  1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
-  2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
-  1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
-  1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
-  229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
-  2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
-  27705344U, // <u,5,u,7>: Cost 0 copy RHS
-  27705344U, // <u,5,u,u>: Cost 0 copy RHS
-  2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
-  1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
-  1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
-  2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
-  1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
-  2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
-  2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
-  2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
-  1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
-  1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
-  2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
-  2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
-  2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
-  2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
-  2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
-  2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
-  2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
-  1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
-  1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
-  2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
-  2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
-  2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
-  1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
-  2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
-  2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
-  1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
-  1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
-  2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
-  2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
-  2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
-  2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
-  1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
-  2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
-  3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
-  1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
-  1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
-  2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
-  2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
-  2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
-  2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
-  1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
-  1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
-  1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
-  2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
-  1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
-  2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
-  2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
-  2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
-  2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
-  1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
-  2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
-  2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
-  2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
-  1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
-  1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
-  2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
-  2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
-  2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
-  1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
-  2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
-  296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
-  1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
-  296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
-  432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
-  1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
-  1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
-  1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
-  432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
-  1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
-  1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
-  1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
-  432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
-  432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
-  1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
-  1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
-  1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
-  432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
-  1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
-  296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
-  1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
-  432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
-  1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
-  497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
-  1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
-  2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
-  1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
-  1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
-  2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
-  2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
-  497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
-  1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
-  1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
-  1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
-  1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
-  2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
-  1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
-  2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
-  2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
-  1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
-  2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
-  1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
-  1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
-  1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
-  2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
-  2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
-  1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
-  2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
-  1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
-  1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
-  2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
-  2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
-  1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
-  1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
-  1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
-  1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
-  2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
-  1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
-  1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
-  2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
-  2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
-  2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
-  1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
-  497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
-  1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
-  2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
-  497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
-  2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
-  1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
-  2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
-  2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
-  1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
-  1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
-  1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
-  1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
-  1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
-  1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
-  2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
-  1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
-  2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
-  1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
-  1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
-  1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
-  1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
-  1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
-  1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
-  2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
-  2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
-  2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
-  1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
-  1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
-  1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
-  363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
-  363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
-  1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
-  497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
-  1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
-  1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
-  1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
-  497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
-  1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
-  363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
-  497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
-  135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
-  471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
-  1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
-  1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
-  1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
-  1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
-  1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
-  1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
-  471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
-  1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
-  202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
-  537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
-  1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
-  1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
-  1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
-  1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
-  1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
-  537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
-  1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
-  1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
-  269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
-  835584U, // <u,u,2,3>: Cost 0 copy LHS
-  1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
-  2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
-  1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
-  1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
-  835584U, // <u,u,2,u>: Cost 0 copy LHS
-  408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
-  1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
-  120371557U, // <u,u,3,2>: Cost 1 vrev LHS
-  336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
-  408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
-  1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
-  1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
-  1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
-  408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
-  1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
-  1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
-  1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
-  1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
-  161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
-  471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
-  1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
-  1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
-  471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
-  1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
-  1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
-  1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
-  1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
-  1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
-  229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
-  537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
-  1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
-  537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
-  1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
-  2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
-  1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
-  1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
-  1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
-  1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
-  296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
-  27705344U, // <u,u,6,7>: Cost 0 copy RHS
-  27705344U, // <u,u,6,u>: Cost 0 copy RHS
-  432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
-  1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
-  1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
-  1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
-  432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
-  1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
-  120699277U, // <u,u,7,6>: Cost 1 vrev RHS
-  363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
-  432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
-  408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
-  471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
-  537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
-  835584U, // <u,u,u,3>: Cost 0 copy LHS
-  408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
-  471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
-  537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
-  27705344U, // <u,u,u,7>: Cost 0 copy RHS
-  835584U, // <u,u,u,u>: Cost 0 copy LHS
-  0
-};
diff --git a/lib/Target/ARM64/ARM64PromoteConstant.cpp b/lib/Target/ARM64/ARM64PromoteConstant.cpp
deleted file mode 100644
index 9fbaedb..0000000
--- a/lib/Target/ARM64/ARM64PromoteConstant.cpp
+++ /dev/null
@@ -1,585 +0,0 @@
-
-//===-- ARM64PromoteConstant.cpp --- Promote constant to global for ARM64 -===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ARM64PromoteConstant pass which promotes constant
-// to global variables when this is likely to be more efficient.
-// Currently only types related to constant vector (i.e., constant vector, array
-// of constant vectors, constant structure with a constant vector field, etc.)
-// are promoted to global variables.
-// Indeed, constant vector are likely to be lowered in target constant pool
-// during instruction selection.
-// Therefore, the access will remain the same (memory load), but the structures
-// types are not split into different constant pool accesses for each field.
-// The bonus side effect is that created globals may be merged by the global
-// merge pass.
-//
-// FIXME: This pass may be useful for other targets too.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-promote-const"
-#include "ARM64.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-// Stress testing mode - disable heuristics.
-static cl::opt<bool> Stress("arm64-stress-promote-const", cl::Hidden,
-                            cl::desc("Promote all vector constants"));
-
-STATISTIC(NumPromoted, "Number of promoted constants");
-STATISTIC(NumPromotedUses, "Number of promoted constants uses");
-
-//===----------------------------------------------------------------------===//
-//                       ARM64PromoteConstant
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// Promotes interesting constant into global variables.
-/// The motivating example is:
-/// static const uint16_t TableA[32] = {
-///   41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768,
-///   31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215,
-///   25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846,
-///   21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725,
-/// };
-///
-/// uint8x16x4_t LoadStatic(void) {
-///   uint8x16x4_t ret;
-///   ret.val[0] = vld1q_u16(TableA +  0);
-///   ret.val[1] = vld1q_u16(TableA +  8);
-///   ret.val[2] = vld1q_u16(TableA + 16);
-///   ret.val[3] = vld1q_u16(TableA + 24);
-///   return ret;
-/// }
-///
-/// The constants in that example are folded into the uses. Thus, 4 different
-/// constants are created.
-/// As their type is vector the cheapest way to create them is to load them
-/// for the memory.
-/// Therefore the final assembly final has 4 different load.
-/// With this pass enabled, only one load is issued for the constants.
-class ARM64PromoteConstant : public ModulePass {
-
-public:
-  static char ID;
-  ARM64PromoteConstant() : ModulePass(ID) {}
-
-  virtual const char *getPassName() const { return "ARM64 Promote Constant"; }
-
-  /// Iterate over the functions and promote the interesting constants into
-  /// global variables with module scope.
-  bool runOnModule(Module &M) {
-    DEBUG(dbgs() << getPassName() << '\n');
-    bool Changed = false;
-    for (auto &MF: M) {
-      Changed |= runOnFunction(MF);
-    }
-    return Changed;
-  }
-
-private:
-  /// Look for interesting constants used within the given function.
-  /// Promote them into global variables, load these global variables within
-  /// the related function, so that the number of inserted load is minimal.
-  bool runOnFunction(Function &F);
-
-  // This transformation requires dominator info
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesCFG();
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addPreserved<DominatorTreeWrapperPass>();
-  }
-
-  /// Type to store a list of User
-  typedef SmallVector<Value::user_iterator, 4> Users;
-  /// Map an insertion point to all the uses it dominates.
-  typedef DenseMap<Instruction *, Users> InsertionPoints;
-  /// Map a function to the required insertion point of load for a
-  /// global variable
-  typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc;
-
-  /// Find the closest point that dominates the given Use.
-  Instruction *findInsertionPoint(Value::user_iterator &Use);
-
-  /// Check if the given insertion point is dominated by an existing
-  /// insertion point.
-  /// If true, the given use is added to the list of dominated uses for
-  /// the related existing point.
-  /// \param NewPt the insertion point to be checked
-  /// \param UseIt the use to be added into the list of dominated uses
-  /// \param InsertPts existing insertion points
-  /// \pre NewPt and all instruction in InsertPts belong to the same function
-  /// \return true if one of the insertion point in InsertPts dominates NewPt,
-  ///         false otherwise
-  bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt,
-                   InsertionPoints &InsertPts);
-
-  /// Check if the given insertion point can be merged with an existing
-  /// insertion point in a common dominator.
-  /// If true, the given use is added to the list of the created insertion
-  /// point.
-  /// \param NewPt the insertion point to be checked
-  /// \param UseIt the use to be added into the list of dominated uses
-  /// \param InsertPts existing insertion points
-  /// \pre NewPt and all instruction in InsertPts belong to the same function
-  /// \pre isDominated returns false for the exact same parameters.
-  /// \return true if it exists an insertion point in InsertPts that could
-  ///         have been merged with NewPt in a common dominator,
-  ///         false otherwise
-  bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt,
-                   InsertionPoints &InsertPts);
-
-  /// Compute the minimal insertion points to dominates all the interesting
-  /// uses of value.
-  /// Insertion points are group per function and each insertion point
-  /// contains a list of all the uses it dominates within the related function
-  /// \param Val constant to be examined
-  /// \param[out] InsPtsPerFunc output storage of the analysis
-  void computeInsertionPoints(Constant *Val,
-                              InsertionPointsPerFunc &InsPtsPerFunc);
-
-  /// Insert a definition of a new global variable at each point contained in
-  /// InsPtsPerFunc and update the related uses (also contained in
-  /// InsPtsPerFunc).
-  bool insertDefinitions(Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc);
-
-  /// Compute the minimal insertion points to dominate all the interesting
-  /// uses of Val and insert a definition of a new global variable
-  /// at these points.
-  /// Also update the uses of Val accordingly.
-  /// Currently a use of Val is considered interesting if:
-  /// - Val is not UndefValue
-  /// - Val is not zeroinitialized
-  /// - Replacing Val per a load of a global variable is valid.
-  /// \see shouldConvert for more details
-  bool computeAndInsertDefinitions(Constant *Val);
-
-  /// Promote the given constant into a global variable if it is expected to
-  /// be profitable.
-  /// \return true if Cst has been promoted
-  bool promoteConstant(Constant *Cst);
-
-  /// Transfer the list of dominated uses of IPI to NewPt in InsertPts.
-  /// Append UseIt to this list and delete the entry of IPI in InsertPts.
-  static void appendAndTransferDominatedUses(Instruction *NewPt,
-                                             Value::user_iterator &UseIt,
-                                             InsertionPoints::iterator &IPI,
-                                             InsertionPoints &InsertPts) {
-    // Record the dominated use
-    IPI->second.push_back(UseIt);
-    // Transfer the dominated uses of IPI to NewPt
-    // Inserting into the DenseMap may invalidate existing iterator.
-    // Keep a copy of the key to find the iterator to erase.
-    Instruction *OldInstr = IPI->first;
-    InsertPts.insert(InsertionPoints::value_type(NewPt, IPI->second));
-    // Erase IPI
-    IPI = InsertPts.find(OldInstr);
-    InsertPts.erase(IPI);
-  }
-};
-} // end anonymous namespace
-
-char ARM64PromoteConstant::ID = 0;
-
-namespace llvm {
-void initializeARM64PromoteConstantPass(PassRegistry &);
-}
-
-INITIALIZE_PASS_BEGIN(ARM64PromoteConstant, "arm64-promote-const",
-                      "ARM64 Promote Constant Pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ARM64PromoteConstant, "arm64-promote-const",
-                    "ARM64 Promote Constant Pass", false, false)
-
-ModulePass *llvm::createARM64PromoteConstantPass() {
-  return new ARM64PromoteConstant();
-}
-
-/// Check if the given type uses a vector type.
-static bool isConstantUsingVectorTy(const Type *CstTy) {
-  if (CstTy->isVectorTy())
-    return true;
-  if (CstTy->isStructTy()) {
-    for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements();
-         EltIdx < EndEltIdx; ++EltIdx)
-      if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx)))
-        return true;
-  } else if (CstTy->isArrayTy())
-    return isConstantUsingVectorTy(CstTy->getArrayElementType());
-  return false;
-}
-
-/// Check if the given use (Instruction + OpIdx) of Cst should be converted into
-/// a load of a global variable initialized with Cst.
-/// A use should be converted if it is legal to do so.
-/// For instance, it is not legal to turn the mask operand of a shuffle vector
-/// into a load of a global variable.
-static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr,
-                             unsigned OpIdx) {
-  // shufflevector instruction expects a const for the mask argument, i.e., the
-  // third argument. Do not promote this use in that case.
-  if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2)
-    return false;
-
-  // extractvalue instruction expects a const idx
-  if (isa<const ExtractValueInst>(Instr) && OpIdx > 0)
-    return false;
-
-  // extractvalue instruction expects a const idx
-  if (isa<const InsertValueInst>(Instr) && OpIdx > 1)
-    return false;
-
-  if (isa<const AllocaInst>(Instr) && OpIdx > 0)
-    return false;
-
-  // Alignment argument must be constant
-  if (isa<const LoadInst>(Instr) && OpIdx > 0)
-    return false;
-
-  // Alignment argument must be constant
-  if (isa<const StoreInst>(Instr) && OpIdx > 1)
-    return false;
-
-  // Index must be constant
-  if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0)
-    return false;
-
-  // Personality function and filters must be constant.
-  // Give up on that instruction.
-  if (isa<const LandingPadInst>(Instr))
-    return false;
-
-  // switch instruction expects constants to compare to
-  if (isa<const SwitchInst>(Instr))
-    return false;
-
-  // Expected address must be a constant
-  if (isa<const IndirectBrInst>(Instr))
-    return false;
-
-  // Do not mess with intrinsic
-  if (isa<const IntrinsicInst>(Instr))
-    return false;
-
-  // Do not mess with inline asm
-  const CallInst *CI = dyn_cast<const CallInst>(Instr);
-  if (CI && isa<const InlineAsm>(CI->getCalledValue()))
-    return false;
-
-  return true;
-}
-
-/// Check if the given Cst should be converted into
-/// a load of a global variable initialized with Cst.
-/// A constant should be converted if it is likely that the materialization of
-/// the constant will be tricky. Thus, we give up on zero or undef values.
-///
-/// \todo Currently, accept only vector related types.
-/// Also we give up on all simple vector type to keep the existing
-/// behavior. Otherwise, we should push here all the check of the lowering of
-/// BUILD_VECTOR. By giving up, we lose the potential benefit of merging
-/// constant via global merge and the fact that the same constant is stored
-/// only once with this method (versus, as many function that uses the constant
-/// for the regular approach, even for float).
-/// Again, the simplest solution would be to promote every
-/// constant and rematerialize them when they are actually cheap to create.
-static bool shouldConvert(const Constant *Cst) {
-  if (isa<const UndefValue>(Cst))
-    return false;
-
-  // FIXME: In some cases, it may be interesting to promote in memory
-  // a zero initialized constant.
-  // E.g., when the type of Cst require more instructions than the
-  // adrp/add/load sequence or when this sequence can be shared by several
-  // instances of Cst.
-  // Ideally, we could promote this into a global and rematerialize the constant
-  // when it was a bad idea.
-  if (Cst->isZeroValue())
-    return false;
-
-  if (Stress)
-    return true;
-
-  // FIXME: see function \todo
-  if (Cst->getType()->isVectorTy())
-    return false;
-  return isConstantUsingVectorTy(Cst->getType());
-}
-
-Instruction *
-ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
-  // If this user is a phi, the insertion point is in the related
-  // incoming basic block
-  PHINode *PhiInst = dyn_cast<PHINode>(*Use);
-  Instruction *InsertionPoint;
-  if (PhiInst)
-    InsertionPoint =
-        PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator();
-  else
-    InsertionPoint = dyn_cast<Instruction>(*Use);
-  assert(InsertionPoint && "User is not an instruction!");
-  return InsertionPoint;
-}
-
-bool ARM64PromoteConstant::isDominated(Instruction *NewPt,
-                                       Value::user_iterator &UseIt,
-                                       InsertionPoints &InsertPts) {
-
-  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
-      *NewPt->getParent()->getParent()).getDomTree();
-
-  // Traverse all the existing insertion point and check if one is dominating
-  // NewPt
-  for (InsertionPoints::iterator IPI = InsertPts.begin(),
-                                 EndIPI = InsertPts.end();
-       IPI != EndIPI; ++IPI) {
-    if (NewPt == IPI->first || DT.dominates(IPI->first, NewPt) ||
-        // When IPI->first is a terminator instruction, DT may think that
-        // the result is defined on the edge.
-        // Here we are testing the insertion point, not the definition.
-        (IPI->first->getParent() != NewPt->getParent() &&
-         DT.dominates(IPI->first->getParent(), NewPt->getParent()))) {
-      // No need to insert this point
-      // Record the dominated use
-      DEBUG(dbgs() << "Insertion point dominated by:\n");
-      DEBUG(IPI->first->print(dbgs()));
-      DEBUG(dbgs() << '\n');
-      IPI->second.push_back(UseIt);
-      return true;
-    }
-  }
-  return false;
-}
-
-bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt,
-                                       Value::user_iterator &UseIt,
-                                       InsertionPoints &InsertPts) {
-  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
-      *NewPt->getParent()->getParent()).getDomTree();
-  BasicBlock *NewBB = NewPt->getParent();
-
-  // Traverse all the existing insertion point and check if one is dominated by
-  // NewPt and thus useless or can be combined with NewPt into a common
-  // dominator
-  for (InsertionPoints::iterator IPI = InsertPts.begin(),
-                                 EndIPI = InsertPts.end();
-       IPI != EndIPI; ++IPI) {
-    BasicBlock *CurBB = IPI->first->getParent();
-    if (NewBB == CurBB) {
-      // Instructions are in the same block.
-      // By construction, NewPt is dominating the other.
-      // Indeed, isDominated returned false with the exact same arguments.
-      DEBUG(dbgs() << "Merge insertion point with:\n");
-      DEBUG(IPI->first->print(dbgs()));
-      DEBUG(dbgs() << "\nat considered insertion point.\n");
-      appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
-      return true;
-    }
-
-    // Look for a common dominator
-    BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB);
-    // If none exists, we cannot merge these two points
-    if (!CommonDominator)
-      continue;
-
-    if (CommonDominator != NewBB) {
-      // By construction, the CommonDominator cannot be CurBB
-      assert(CommonDominator != CurBB &&
-             "Instruction has not been rejected during isDominated check!");
-      // Take the last instruction of the CommonDominator as insertion point
-      NewPt = CommonDominator->getTerminator();
-    }
-    // else, CommonDominator is the block of NewBB, hence NewBB is the last
-    // possible insertion point in that block
-    DEBUG(dbgs() << "Merge insertion point with:\n");
-    DEBUG(IPI->first->print(dbgs()));
-    DEBUG(dbgs() << '\n');
-    DEBUG(NewPt->print(dbgs()));
-    DEBUG(dbgs() << '\n');
-    appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts);
-    return true;
-  }
-  return false;
-}
-
-void ARM64PromoteConstant::computeInsertionPoints(
-    Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) {
-  DEBUG(dbgs() << "** Compute insertion points **\n");
-  for (Value::user_iterator UseIt = Val->user_begin(),
-                            EndUseIt = Val->user_end();
-       UseIt != EndUseIt; ++UseIt) {
-    // If the user is not an Instruction, we cannot modify it
-    if (!isa<Instruction>(*UseIt))
-      continue;
-
-    // Filter out uses that should not be converted
-    if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo()))
-      continue;
-
-    DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n");
-    DEBUG((*UseIt)->print(dbgs()));
-    DEBUG(dbgs() << '\n');
-
-    Instruction *InsertionPoint = findInsertionPoint(UseIt);
-
-    DEBUG(dbgs() << "Considered insertion point:\n");
-    DEBUG(InsertionPoint->print(dbgs()));
-    DEBUG(dbgs() << '\n');
-
-    // Check if the current insertion point is useless, i.e., it is dominated
-    // by another one.
-    InsertionPoints &InsertPts =
-        InsPtsPerFunc[InsertionPoint->getParent()->getParent()];
-    if (isDominated(InsertionPoint, UseIt, InsertPts))
-      continue;
-    // This insertion point is useful, check if we can merge some insertion
-    // point in a common dominator or if NewPt dominates an existing one.
-    if (tryAndMerge(InsertionPoint, UseIt, InsertPts))
-      continue;
-
-    DEBUG(dbgs() << "Keep considered insertion point\n");
-
-    // It is definitely useful by its own
-    InsertPts[InsertionPoint].push_back(UseIt);
-  }
-}
-
-bool
-ARM64PromoteConstant::insertDefinitions(Constant *Cst,
-                                        InsertionPointsPerFunc &InsPtsPerFunc) {
-  // We will create one global variable per Module
-  DenseMap<Module *, GlobalVariable *> ModuleToMergedGV;
-  bool HasChanged = false;
-
-  // Traverse all insertion points in all the function
-  for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(),
-                                        EndIt = InsPtsPerFunc.end();
-       FctToInstPtsIt != EndIt; ++FctToInstPtsIt) {
-    InsertionPoints &InsertPts = FctToInstPtsIt->second;
-// Do more check for debug purposes
-#ifndef NDEBUG
-    DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
-        *FctToInstPtsIt->first).getDomTree();
-#endif
-    GlobalVariable *PromotedGV;
-    assert(!InsertPts.empty() && "Empty uses does not need a definition");
-
-    Module *M = FctToInstPtsIt->first->getParent();
-    DenseMap<Module *, GlobalVariable *>::iterator MapIt =
-        ModuleToMergedGV.find(M);
-    if (MapIt == ModuleToMergedGV.end()) {
-      PromotedGV = new GlobalVariable(
-          *M, Cst->getType(), true, GlobalValue::InternalLinkage, 0,
-          "_PromotedConst", 0, GlobalVariable::NotThreadLocal);
-      PromotedGV->setInitializer(Cst);
-      ModuleToMergedGV[M] = PromotedGV;
-      DEBUG(dbgs() << "Global replacement: ");
-      DEBUG(PromotedGV->print(dbgs()));
-      DEBUG(dbgs() << '\n');
-      ++NumPromoted;
-      HasChanged = true;
-    } else {
-      PromotedGV = MapIt->second;
-    }
-
-    for (InsertionPoints::iterator IPI = InsertPts.begin(),
-                                   EndIPI = InsertPts.end();
-         IPI != EndIPI; ++IPI) {
-      // Create the load of the global variable
-      IRBuilder<> Builder(IPI->first->getParent(), IPI->first);
-      LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
-      DEBUG(dbgs() << "**********\n");
-      DEBUG(dbgs() << "New def: ");
-      DEBUG(LoadedCst->print(dbgs()));
-      DEBUG(dbgs() << '\n');
-
-      // Update the dominated uses
-      Users &DominatedUsers = IPI->second;
-      for (Users::iterator UseIt = DominatedUsers.begin(),
-                           EndIt = DominatedUsers.end();
-           UseIt != EndIt; ++UseIt) {
-#ifndef NDEBUG
-        assert((DT.dominates(LoadedCst, cast<Instruction>(**UseIt)) ||
-                (isa<PHINode>(**UseIt) &&
-                 DT.dominates(LoadedCst, findInsertionPoint(*UseIt)))) &&
-               "Inserted definition does not dominate all its uses!");
-#endif
-        DEBUG(dbgs() << "Use to update " << UseIt->getOperandNo() << ":");
-        DEBUG((*UseIt)->print(dbgs()));
-        DEBUG(dbgs() << '\n');
-        (*UseIt)->setOperand(UseIt->getOperandNo(), LoadedCst);
-        ++NumPromotedUses;
-      }
-    }
-  }
-  return HasChanged;
-}
-
-bool ARM64PromoteConstant::computeAndInsertDefinitions(Constant *Val) {
-  InsertionPointsPerFunc InsertPtsPerFunc;
-  computeInsertionPoints(Val, InsertPtsPerFunc);
-  return insertDefinitions(Val, InsertPtsPerFunc);
-}
-
-bool ARM64PromoteConstant::promoteConstant(Constant *Cst) {
-  assert(Cst && "Given variable is not a valid constant.");
-
-  if (!shouldConvert(Cst))
-    return false;
-
-  DEBUG(dbgs() << "******************************\n");
-  DEBUG(dbgs() << "Candidate constant: ");
-  DEBUG(Cst->print(dbgs()));
-  DEBUG(dbgs() << '\n');
-
-  return computeAndInsertDefinitions(Cst);
-}
-
-bool ARM64PromoteConstant::runOnFunction(Function &F) {
-  // Look for instructions using constant vector
-  // Promote that constant to a global variable.
-  // Create as few load of this variable as possible and update the uses
-  // accordingly
-  bool LocalChange = false;
-  SmallSet<Constant *, 8> AlreadyChecked;
-
-  for (auto &MBB : F) {
-    for (auto &MI: MBB) {
-      // Traverse the operand, looking for constant vectors
-      // Replace them by a load of a global variable of type constant vector
-      for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands();
-           OpIdx != EndOpIdx; ++OpIdx) {
-        Constant *Cst = dyn_cast<Constant>(MI.getOperand(OpIdx));
-        // There is no point is promoting global value, they are already global.
-        // Do not promote constant expression, as they may require some code
-        // expansion.
-        if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
-            AlreadyChecked.insert(Cst))
-          LocalChange |= promoteConstant(Cst);
-      }
-    }
-  }
-  return LocalChange;
-}
diff --git a/lib/Target/ARM64/ARM64RegisterInfo.cpp b/lib/Target/ARM64/ARM64RegisterInfo.cpp
deleted file mode 100644
index 4c7fc8a..0000000
--- a/lib/Target/ARM64/ARM64RegisterInfo.cpp
+++ /dev/null
@@ -1,400 +0,0 @@
-//===- ARM64RegisterInfo.cpp - ARM64 Register Information -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64RegisterInfo.h"
-#include "ARM64FrameLowering.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64Subtarget.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetOptions.h"
-
-#define GET_REGINFO_TARGET_DESC
-#include "ARM64GenRegisterInfo.inc"
-
-using namespace llvm;
-
-ARM64RegisterInfo::ARM64RegisterInfo(const ARM64InstrInfo *tii,
-                                     const ARM64Subtarget *sti)
-    : ARM64GenRegisterInfo(ARM64::LR), TII(tii), STI(sti) {}
-
-const uint16_t *
-ARM64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  assert(MF && "Invalid MachineFunction pointer.");
-  if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
-    return CSR_ARM64_AllRegs_SaveList;
-  else
-    return CSR_ARM64_AAPCS_SaveList;
-}
-
-const uint32_t *
-ARM64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
-  if (CC == CallingConv::AnyReg)
-    return CSR_ARM64_AllRegs_RegMask;
-  else
-    return CSR_ARM64_AAPCS_RegMask;
-}
-
-const uint32_t *ARM64RegisterInfo::getTLSCallPreservedMask() const {
-  if (STI->isTargetDarwin())
-    return CSR_ARM64_TLS_Darwin_RegMask;
-
-  assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
-  return CSR_ARM64_TLS_ELF_RegMask;
-}
-
-const uint32_t *
-ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
-  // This should return a register mask that is the same as that returned by
-  // getCallPreservedMask but that additionally preserves the register used for
-  // the first i64 argument (which must also be the register used to return a
-  // single i64 return value)
-  //
-  // In case that the calling convention does not use the same register for
-  // both, the function should return NULL (does not currently apply)
-  return CSR_ARM64_AAPCS_ThisReturn_RegMask;
-}
-
-BitVector ARM64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  // FIXME: avoid re-calculating this everytime.
-  BitVector Reserved(getNumRegs());
-  Reserved.set(ARM64::SP);
-  Reserved.set(ARM64::XZR);
-  Reserved.set(ARM64::WSP);
-  Reserved.set(ARM64::WZR);
-
-  if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
-    Reserved.set(ARM64::FP);
-    Reserved.set(ARM64::W29);
-  }
-
-  if (STI->isTargetDarwin()) {
-    Reserved.set(ARM64::X18); // Platform register
-    Reserved.set(ARM64::W18);
-  }
-
-  if (hasBasePointer(MF)) {
-    Reserved.set(ARM64::X19);
-    Reserved.set(ARM64::W19);
-  }
-
-  return Reserved;
-}
-
-bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF,
-                                      unsigned Reg) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  switch (Reg) {
-  default:
-    break;
-  case ARM64::SP:
-  case ARM64::XZR:
-  case ARM64::WSP:
-  case ARM64::WZR:
-    return true;
-  case ARM64::X18:
-  case ARM64::W18:
-    return STI->isTargetDarwin();
-  case ARM64::FP:
-  case ARM64::W29:
-    return TFI->hasFP(MF) || STI->isTargetDarwin();
-  case ARM64::W19:
-  case ARM64::X19:
-    return hasBasePointer(MF);
-  }
-
-  return false;
-}
-
-const TargetRegisterClass *
-ARM64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
-                                      unsigned Kind) const {
-  return &ARM64::GPR64RegClass;
-}
-
-const TargetRegisterClass *
-ARM64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
-  if (RC == &ARM64::CCRRegClass)
-    return NULL; // Can't copy CPSR.
-  return RC;
-}
-
-unsigned ARM64RegisterInfo::getBaseRegister() const { return ARM64::X19; }
-
-bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  // In the presence of variable sized objects, if the fixed stack size is
-  // large enough that referencing from the FP won't result in things being
-  // in range relatively often, we can use a base pointer to allow access
-  // from the other direction like the SP normally works.
-  if (MFI->hasVarSizedObjects()) {
-    // Conservatively estimate whether the negative offset from the frame
-    // pointer will be sufficient to reach. If a function has a smallish
-    // frame, it's less likely to have lots of spills and callee saved
-    // space, so it's all more likely to be within range of the frame pointer.
-    // If it's wrong, we'll materialize the constant and still get to the
-    // object; it's just suboptimal. Negative offsets use the unscaled
-    // load/store instructions, which have a 9-bit signed immediate.
-    if (MFI->getLocalFrameSize() < 256)
-      return false;
-    return true;
-  }
-
-  return false;
-}
-
-unsigned ARM64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  return TFI->hasFP(MF) ? ARM64::FP : ARM64::SP;
-}
-
-bool
-ARM64RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
-  return true;
-}
-
-bool ARM64RegisterInfo::requiresVirtualBaseRegisters(const MachineFunction &MF)
-    const {
-  return true;
-}
-
-bool
-ARM64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  // ARM64FrameLowering::resolveFrameIndexReference() can always fall back
-  // to the stack pointer, so only put the emergency spill slot next to the
-  // FP when there's no better way to access it (SP or base pointer).
-  return MFI->hasVarSizedObjects() && !hasBasePointer(MF);
-}
-
-bool ARM64RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF)
-    const {
-  return true;
-}
-
-bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  // Only consider eliminating leaf frames.
-  if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) &&
-                          MFI->adjustsStack()))
-    return true;
-  return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
-}
-
-/// needsFrameBaseReg - Returns true if the instruction's frame index
-/// reference would be better served by a base register other than FP
-/// or SP. Used by LocalStackFrameAllocation to determine which frame index
-/// references it should create new base registers for.
-bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
-                                          int64_t Offset) const {
-  for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i)
-    assert(i < MI->getNumOperands() &&
-           "Instr doesn't have FrameIndex operand!");
-
-  // It's the load/store FI references that cause issues, as it can be difficult
-  // to materialize the offset if it won't fit in the literal field. Estimate
-  // based on the size of the local frame and some conservative assumptions
-  // about the rest of the stack frame (note, this is pre-regalloc, so
-  // we don't know everything for certain yet) whether this offset is likely
-  // to be out of range of the immediate. Return true if so.
-
-  // We only generate virtual base registers for loads and stores, so
-  // return false for everything else.
-  if (!MI->mayLoad() && !MI->mayStore())
-    return false;
-
-  // Without a virtual base register, if the function has variable sized
-  // objects, all fixed-size local references will be via the frame pointer,
-  // Approximate the offset and see if it's legal for the instruction.
-  // Note that the incoming offset is based on the SP value at function entry,
-  // so it'll be negative.
-  MachineFunction &MF = *MI->getParent()->getParent();
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  // Estimate an offset from the frame pointer.
-  // Conservatively assume all GPR callee-saved registers get pushed.
-  // FP, LR, X19-X28, D8-D15. 64-bits each.
-  int64_t FPOffset = Offset - 16 * 20;
-  // Estimate an offset from the stack pointer.
-  // The incoming offset is relating to the SP at the start of the function,
-  // but when we access the local it'll be relative to the SP after local
-  // allocation, so adjust our SP-relative offset by that allocation size.
-  Offset += MFI->getLocalFrameSize();
-  // Assume that we'll have at least some spill slots allocated.
-  // FIXME: This is a total SWAG number. We should run some statistics
-  //        and pick a real one.
-  Offset += 128; // 128 bytes of spill slots
-
-  // If there is a frame pointer, try using it.
-  // The FP is only available if there is no dynamic realignment. We
-  // don't know for sure yet whether we'll need that, so we guess based
-  // on whether there are any local variables that would trigger it.
-  if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
-    return false;
-
-  // If we can reference via the stack pointer or base pointer, try that.
-  // FIXME: This (and the code that resolves the references) can be improved
-  //        to only disallow SP relative references in the live range of
-  //        the VLA(s). In practice, it's unclear how much difference that
-  //        would make, but it may be worth doing.
-  if (isFrameOffsetLegal(MI, Offset))
-    return false;
-
-  // The offset likely isn't legal; we want to allocate a virtual base register.
-  return true;
-}
-
-bool ARM64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
-                                           int64_t Offset) const {
-  assert(Offset <= INT_MAX && "Offset too big to fit in int.");
-  assert(MI && "Unable to get the legal offset for nil instruction.");
-  int SaveOffset = Offset;
-  return isARM64FrameOffsetLegal(*MI, SaveOffset) & ARM64FrameOffsetIsLegal;
-}
-
-/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
-/// at the beginning of the basic block.
-void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
-                                                     unsigned BaseReg,
-                                                     int FrameIdx,
-                                                     int64_t Offset) const {
-  MachineBasicBlock::iterator Ins = MBB->begin();
-  DebugLoc DL; // Defaults to "unknown"
-  if (Ins != MBB->end())
-    DL = Ins->getDebugLoc();
-
-  const MCInstrDesc &MCID = TII->get(ARM64::ADDXri);
-  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  const MachineFunction &MF = *MBB->getParent();
-  MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
-  unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0);
-
-  BuildMI(*MBB, Ins, DL, MCID, BaseReg)
-      .addFrameIndex(FrameIdx)
-      .addImm(Offset)
-      .addImm(Shifter);
-}
-
-void ARM64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
-                                          int64_t Offset) const {
-  int Off = Offset; // ARM doesn't need the general 64-bit offsets
-  unsigned i = 0;
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-  bool Done = rewriteARM64FrameIndex(MI, i, BaseReg, Off, TII);
-  assert(Done && "Unable to resolve frame index!");
-  (void)Done;
-}
-
-void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                            int SPAdj, unsigned FIOperandNum,
-                                            RegScavenger *RS) const {
-  assert(SPAdj == 0 && "Unexpected");
-
-  MachineInstr &MI = *II;
-  MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction &MF = *MBB.getParent();
-  const ARM64FrameLowering *TFI = static_cast<const ARM64FrameLowering *>(
-      MF.getTarget().getFrameLowering());
-
-  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-  unsigned FrameReg;
-  int Offset;
-
-  // Special handling of dbg_value, stackmap and patchpoint instructions.
-  if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
-      MI.getOpcode() == TargetOpcode::PATCHPOINT) {
-    Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
-                                             /*PreferFP=*/true);
-    Offset += MI.getOperand(FIOperandNum + 1).getImm();
-    MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
-    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
-    return;
-  }
-
-  // Modify MI as necessary to handle as much of 'Offset' as possible
-  Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
-  if (rewriteARM64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
-    return;
-
-  assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
-         "Emergency spill slot is out of reach");
-
-  // If we get here, the immediate doesn't fit into the instruction.  We folded
-  // as much as possible above.  Handle the rest, providing a register that is
-  // SP+LargeImm.
-  unsigned ScratchReg =
-      MF.getRegInfo().createVirtualRegister(&ARM64::GPR64RegClass);
-  emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
-  MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
-}
-
-namespace llvm {
-
-unsigned ARM64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
-                                                MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  switch (RC->getID()) {
-  default:
-    return 0;
-  case ARM64::GPR32RegClassID:
-  case ARM64::GPR32spRegClassID:
-  case ARM64::GPR32allRegClassID:
-  case ARM64::GPR64spRegClassID:
-  case ARM64::GPR64allRegClassID:
-  case ARM64::GPR64RegClassID:
-  case ARM64::GPR32commonRegClassID:
-  case ARM64::GPR64commonRegClassID:
-    return 32 - 1                                      // XZR/SP
-           - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
-           - STI->isTargetDarwin() // X18 reserved as platform register
-           - hasBasePointer(MF);   // X19
-  case ARM64::FPR8RegClassID:
-  case ARM64::FPR16RegClassID:
-  case ARM64::FPR32RegClassID:
-  case ARM64::FPR64RegClassID:
-  case ARM64::FPR128RegClassID:
-    return 32;
-
-  case ARM64::DDRegClassID:
-  case ARM64::DDDRegClassID:
-  case ARM64::DDDDRegClassID:
-  case ARM64::QQRegClassID:
-  case ARM64::QQQRegClassID:
-  case ARM64::QQQQRegClassID:
-    return 32;
-
-  case ARM64::FPR128_loRegClassID:
-    return 16;
-  }
-}
-
-} // namespace llvm
diff --git a/lib/Target/ARM64/ARM64RegisterInfo.h b/lib/Target/ARM64/ARM64RegisterInfo.h
deleted file mode 100644
index 31d9242..0000000
--- a/lib/Target/ARM64/ARM64RegisterInfo.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- ARM64RegisterInfo.h - ARM64 Register Information Impl ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of the MRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ARM64REGISTERINFO_H
-#define LLVM_TARGET_ARM64REGISTERINFO_H
-
-#define GET_REGINFO_HEADER
-#include "ARM64GenRegisterInfo.inc"
-
-namespace llvm {
-
-class ARM64InstrInfo;
-class ARM64Subtarget;
-class MachineFunction;
-class RegScavenger;
-class TargetRegisterClass;
-
-struct ARM64RegisterInfo : public ARM64GenRegisterInfo {
-private:
-  const ARM64InstrInfo *TII;
-  const ARM64Subtarget *STI;
-
-public:
-  ARM64RegisterInfo(const ARM64InstrInfo *tii, const ARM64Subtarget *sti);
-
-  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
-
-  /// Code Generation virtual methods...
-  const uint16_t *
-  getCalleeSavedRegs(const MachineFunction *MF = 0) const override;
-  const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
-
-  unsigned getCSRFirstUseCost() const {
-    // The cost will be compared against BlockFrequency where entry has the
-    // value of 1 << 14. A value of 5 will choose to spill or split really
-    // cold path instead of using a callee-saved register.
-    return 5;
-  }
-
-  // Calls involved in thread-local variable lookup save more registers than
-  // normal calls, so they need a different mask to represent this.
-  const uint32_t *getTLSCallPreservedMask() const;
-
-  /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
-  /// case that 'returned' is on an i64 first argument if the calling convention
-  /// is one that can (partially) model this attribute with a preserved mask
-  /// (i.e. it is a calling convention that uses the same register for the first
-  /// i64 argument and an i64 return value)
-  ///
-  /// Should return NULL in the case that the calling convention does not have
-  /// this property
-  const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
-
-  BitVector getReservedRegs(const MachineFunction &MF) const override;
-  const TargetRegisterClass *
-  getPointerRegClass(const MachineFunction &MF,
-                     unsigned Kind = 0) const override;
-  const TargetRegisterClass *
-  getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
-
-  bool requiresRegisterScavenging(const MachineFunction &MF) const override;
-  bool useFPForScavengingIndex(const MachineFunction &MF) const override;
-  bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
-
-  bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
-  bool isFrameOffsetLegal(const MachineInstr *MI,
-                          int64_t Offset) const override;
-  void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
-                                    int FrameIdx,
-                                    int64_t Offset) const override;
-  void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
-                         int64_t Offset) const override;
-  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                           unsigned FIOperandNum,
-                           RegScavenger *RS = NULL) const override;
-  bool cannotEliminateFrame(const MachineFunction &MF) const;
-
-  bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override;
-  bool hasBasePointer(const MachineFunction &MF) const;
-  unsigned getBaseRegister() const;
-
-  // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
-
-  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
-                               MachineFunction &MF) const override;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_TARGET_ARM64REGISTERINFO_H
diff --git a/lib/Target/ARM64/ARM64RegisterInfo.td b/lib/Target/ARM64/ARM64RegisterInfo.td
deleted file mode 100644
index 96001c5..0000000
--- a/lib/Target/ARM64/ARM64RegisterInfo.td
+++ /dev/null
@@ -1,561 +0,0 @@
-//===- ARM64RegisterInfo.td - Describe the ARM64 Regisers --*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-
-class ARM64Reg<bits<16> enc, string n, list<Register> subregs = [],
-               list<string> altNames = []>
-        : Register<n, altNames> {
-  let HWEncoding = enc;
-  let Namespace = "ARM64";
-  let SubRegs = subregs;
-}
-
-let Namespace = "ARM64" in {
-  def sub_32 : SubRegIndex<32>;
-
-  def bsub : SubRegIndex<8>;
-  def hsub : SubRegIndex<16>;
-  def ssub : SubRegIndex<32>;
-  def dsub : SubRegIndex<32>;
-  def qhisub : SubRegIndex<64>;
-  def qsub : SubRegIndex<64>;
-  // Note: Code depends on these having consecutive numbers
-  def dsub0 : SubRegIndex<64>;
-  def dsub1 : SubRegIndex<64>;
-  def dsub2 : SubRegIndex<64>;
-  def dsub3 : SubRegIndex<64>;
-  // Note: Code depends on these having consecutive numbers
-  def qsub0 : SubRegIndex<128>;
-  def qsub1 : SubRegIndex<128>;
-  def qsub2 : SubRegIndex<128>;
-  def qsub3 : SubRegIndex<128>;
-}
-
-let Namespace = "ARM64" in {
-  def vreg : RegAltNameIndex;
-  def vlist1 : RegAltNameIndex;
-}
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-def W0    : ARM64Reg<0,   "w0" >, DwarfRegNum<[0]>;
-def W1    : ARM64Reg<1,   "w1" >, DwarfRegNum<[1]>;
-def W2    : ARM64Reg<2,   "w2" >, DwarfRegNum<[2]>;
-def W3    : ARM64Reg<3,   "w3" >, DwarfRegNum<[3]>;
-def W4    : ARM64Reg<4,   "w4" >, DwarfRegNum<[4]>;
-def W5    : ARM64Reg<5,   "w5" >, DwarfRegNum<[5]>;
-def W6    : ARM64Reg<6,   "w6" >, DwarfRegNum<[6]>;
-def W7    : ARM64Reg<7,   "w7" >, DwarfRegNum<[7]>;
-def W8    : ARM64Reg<8,   "w8" >, DwarfRegNum<[8]>;
-def W9    : ARM64Reg<9,   "w9" >, DwarfRegNum<[9]>;
-def W10   : ARM64Reg<10, "w10">, DwarfRegNum<[10]>;
-def W11   : ARM64Reg<11, "w11">, DwarfRegNum<[11]>;
-def W12   : ARM64Reg<12, "w12">, DwarfRegNum<[12]>;
-def W13   : ARM64Reg<13, "w13">, DwarfRegNum<[13]>;
-def W14   : ARM64Reg<14, "w14">, DwarfRegNum<[14]>;
-def W15   : ARM64Reg<15, "w15">, DwarfRegNum<[15]>;
-def W16   : ARM64Reg<16, "w16">, DwarfRegNum<[16]>;
-def W17   : ARM64Reg<17, "w17">, DwarfRegNum<[17]>;
-def W18   : ARM64Reg<18, "w18">, DwarfRegNum<[18]>;
-def W19   : ARM64Reg<19, "w19">, DwarfRegNum<[19]>;
-def W20   : ARM64Reg<20, "w20">, DwarfRegNum<[20]>;
-def W21   : ARM64Reg<21, "w21">, DwarfRegNum<[21]>;
-def W22   : ARM64Reg<22, "w22">, DwarfRegNum<[22]>;
-def W23   : ARM64Reg<23, "w23">, DwarfRegNum<[23]>;
-def W24   : ARM64Reg<24, "w24">, DwarfRegNum<[24]>;
-def W25   : ARM64Reg<25, "w25">, DwarfRegNum<[25]>;
-def W26   : ARM64Reg<26, "w26">, DwarfRegNum<[26]>;
-def W27   : ARM64Reg<27, "w27">, DwarfRegNum<[27]>;
-def W28   : ARM64Reg<28, "w28">, DwarfRegNum<[28]>;
-def W29   : ARM64Reg<29, "w29">, DwarfRegNum<[29]>;
-def W30   : ARM64Reg<30, "w30">, DwarfRegNum<[30]>;
-def WSP   : ARM64Reg<31, "wsp">, DwarfRegNum<[31]>;
-def WZR   : ARM64Reg<31, "wzr">, DwarfRegAlias<WSP>;
-
-let SubRegIndices = [sub_32] in {
-def X0    : ARM64Reg<0,   "x0",  [W0]>, DwarfRegAlias<W0>;
-def X1    : ARM64Reg<1,   "x1",  [W1]>, DwarfRegAlias<W1>;
-def X2    : ARM64Reg<2,   "x2",  [W2]>, DwarfRegAlias<W2>;
-def X3    : ARM64Reg<3,   "x3",  [W3]>, DwarfRegAlias<W3>;
-def X4    : ARM64Reg<4,   "x4",  [W4]>, DwarfRegAlias<W4>;
-def X5    : ARM64Reg<5,   "x5",  [W5]>, DwarfRegAlias<W5>;
-def X6    : ARM64Reg<6,   "x6",  [W6]>, DwarfRegAlias<W6>;
-def X7    : ARM64Reg<7,   "x7",  [W7]>, DwarfRegAlias<W7>;
-def X8    : ARM64Reg<8,   "x8",  [W8]>, DwarfRegAlias<W8>;
-def X9    : ARM64Reg<9,   "x9",  [W9]>, DwarfRegAlias<W9>;
-def X10   : ARM64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>;
-def X11   : ARM64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>;
-def X12   : ARM64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>;
-def X13   : ARM64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>;
-def X14   : ARM64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>;
-def X15   : ARM64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>;
-def X16   : ARM64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>;
-def X17   : ARM64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>;
-def X18   : ARM64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>;
-def X19   : ARM64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>;
-def X20   : ARM64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>;
-def X21   : ARM64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>;
-def X22   : ARM64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>;
-def X23   : ARM64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>;
-def X24   : ARM64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>;
-def X25   : ARM64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>;
-def X26   : ARM64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>;
-def X27   : ARM64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>;
-def X28   : ARM64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>;
-def FP    : ARM64Reg<29, "fp",  [W29]>, DwarfRegAlias<W29>;
-def LR    : ARM64Reg<30, "lr",  [W30]>, DwarfRegAlias<W30>;
-def SP    : ARM64Reg<31, "sp",  [WSP]>, DwarfRegAlias<WSP>;
-def XZR   : ARM64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>;
-}
-
-// Condition code register.
-def CPSR  : ARM64Reg<0, "cpsr">;
-
-// GPR register classes with the intersections of GPR32/GPR32sp and
-// GPR64/GPR64sp for use by the coalescer.
-def GPR32common : RegisterClass<"ARM64", [i32], 32, (sequence "W%u", 0, 30)> {
-  let AltOrders = [(rotl GPR32common, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-def GPR64common : RegisterClass<"ARM64", [i64], 64,
-                                (add (sequence "X%u", 0, 28), FP, LR)> {
-  let AltOrders = [(rotl GPR64common, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-// GPR register classes which exclude SP/WSP.
-def GPR32 : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR)> {
-  let AltOrders = [(rotl GPR32, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-def GPR64 : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR)> {
-  let AltOrders = [(rotl GPR64, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-
-// GPR register classes which include SP/WSP.
-def GPR32sp : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WSP)> {
-  let AltOrders = [(rotl GPR32sp, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-def GPR64sp : RegisterClass<"ARM64", [i64], 64, (add GPR64common, SP)> {
-  let AltOrders = [(rotl GPR64sp, 8)];
-  let AltOrderSelect = [{ return 1; }];
-}
-
-// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
-// constraint used by any instructions, it is used as a common super-class.
-def GPR32all : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR, WSP)>;
-def GPR64all : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR, SP)>;
-
-// For tail calls, we can't use callee-saved registers, as they are restored
-// to the saved value before the tail call, which would clobber a call address.
-// This is for indirect tail calls to store the address of the destination.
-def tcGPR64 : RegisterClass<"ARM64", [i64], 64, (sub GPR64common, X19, X20, X21,
-                                                     X22, X23, X24, X25, X26,
-                                                     X27, X28)>;
-
-// GPR register classes for post increment ammount of vector load/store that
-// has alternate printing when Rm=31 and prints a constant immediate value
-// equal to the total number of bytes transferred.
-def GPR64pi1  : RegisterOperand<GPR64, "printPostIncOperand1">;
-def GPR64pi2  : RegisterOperand<GPR64, "printPostIncOperand2">;
-def GPR64pi3  : RegisterOperand<GPR64, "printPostIncOperand3">;
-def GPR64pi4  : RegisterOperand<GPR64, "printPostIncOperand4">;
-def GPR64pi6  : RegisterOperand<GPR64, "printPostIncOperand6">;
-def GPR64pi8  : RegisterOperand<GPR64, "printPostIncOperand8">;
-def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand12">;
-def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand16">;
-def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand24">;
-def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand32">;
-def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand48">;
-def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand64">;
-
-// Condition code regclass.
-def CCR : RegisterClass<"ARM64", [i32], 32, (add CPSR)> {
-  let CopyCost = -1;  // Don't allow copying of status registers.
-
-  // CCR is not allocatable.
-  let isAllocatable = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Floating Point Scalar Registers
-//===----------------------------------------------------------------------===//
-
-def B0    : ARM64Reg<0,   "b0">, DwarfRegNum<[64]>;
-def B1    : ARM64Reg<1,   "b1">, DwarfRegNum<[65]>;
-def B2    : ARM64Reg<2,   "b2">, DwarfRegNum<[66]>;
-def B3    : ARM64Reg<3,   "b3">, DwarfRegNum<[67]>;
-def B4    : ARM64Reg<4,   "b4">, DwarfRegNum<[68]>;
-def B5    : ARM64Reg<5,   "b5">, DwarfRegNum<[69]>;
-def B6    : ARM64Reg<6,   "b6">, DwarfRegNum<[70]>;
-def B7    : ARM64Reg<7,   "b7">, DwarfRegNum<[71]>;
-def B8    : ARM64Reg<8,   "b8">, DwarfRegNum<[72]>;
-def B9    : ARM64Reg<9,   "b9">, DwarfRegNum<[73]>;
-def B10   : ARM64Reg<10, "b10">, DwarfRegNum<[74]>;
-def B11   : ARM64Reg<11, "b11">, DwarfRegNum<[75]>;
-def B12   : ARM64Reg<12, "b12">, DwarfRegNum<[76]>;
-def B13   : ARM64Reg<13, "b13">, DwarfRegNum<[77]>;
-def B14   : ARM64Reg<14, "b14">, DwarfRegNum<[78]>;
-def B15   : ARM64Reg<15, "b15">, DwarfRegNum<[79]>;
-def B16   : ARM64Reg<16, "b16">, DwarfRegNum<[80]>;
-def B17   : ARM64Reg<17, "b17">, DwarfRegNum<[81]>;
-def B18   : ARM64Reg<18, "b18">, DwarfRegNum<[82]>;
-def B19   : ARM64Reg<19, "b19">, DwarfRegNum<[83]>;
-def B20   : ARM64Reg<20, "b20">, DwarfRegNum<[84]>;
-def B21   : ARM64Reg<21, "b21">, DwarfRegNum<[85]>;
-def B22   : ARM64Reg<22, "b22">, DwarfRegNum<[86]>;
-def B23   : ARM64Reg<23, "b23">, DwarfRegNum<[87]>;
-def B24   : ARM64Reg<24, "b24">, DwarfRegNum<[88]>;
-def B25   : ARM64Reg<25, "b25">, DwarfRegNum<[89]>;
-def B26   : ARM64Reg<26, "b26">, DwarfRegNum<[90]>;
-def B27   : ARM64Reg<27, "b27">, DwarfRegNum<[91]>;
-def B28   : ARM64Reg<28, "b28">, DwarfRegNum<[92]>;
-def B29   : ARM64Reg<29, "b29">, DwarfRegNum<[93]>;
-def B30   : ARM64Reg<30, "b30">, DwarfRegNum<[94]>;
-def B31   : ARM64Reg<31, "b31">, DwarfRegNum<[95]>;
-
-let SubRegIndices = [bsub] in {
-def H0    : ARM64Reg<0,   "h0", [B0]>, DwarfRegAlias<B0>;
-def H1    : ARM64Reg<1,   "h1", [B1]>, DwarfRegAlias<B1>;
-def H2    : ARM64Reg<2,   "h2", [B2]>, DwarfRegAlias<B2>;
-def H3    : ARM64Reg<3,   "h3", [B3]>, DwarfRegAlias<B3>;
-def H4    : ARM64Reg<4,   "h4", [B4]>, DwarfRegAlias<B4>;
-def H5    : ARM64Reg<5,   "h5", [B5]>, DwarfRegAlias<B5>;
-def H6    : ARM64Reg<6,   "h6", [B6]>, DwarfRegAlias<B6>;
-def H7    : ARM64Reg<7,   "h7", [B7]>, DwarfRegAlias<B7>;
-def H8    : ARM64Reg<8,   "h8", [B8]>, DwarfRegAlias<B8>;
-def H9    : ARM64Reg<9,   "h9", [B9]>, DwarfRegAlias<B9>;
-def H10   : ARM64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>;
-def H11   : ARM64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>;
-def H12   : ARM64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>;
-def H13   : ARM64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>;
-def H14   : ARM64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>;
-def H15   : ARM64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>;
-def H16   : ARM64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>;
-def H17   : ARM64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>;
-def H18   : ARM64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>;
-def H19   : ARM64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>;
-def H20   : ARM64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>;
-def H21   : ARM64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>;
-def H22   : ARM64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>;
-def H23   : ARM64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>;
-def H24   : ARM64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>;
-def H25   : ARM64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>;
-def H26   : ARM64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>;
-def H27   : ARM64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>;
-def H28   : ARM64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>;
-def H29   : ARM64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>;
-def H30   : ARM64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>;
-def H31   : ARM64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>;
-}
-
-let SubRegIndices = [hsub] in {
-def S0    : ARM64Reg<0,   "s0", [H0]>, DwarfRegAlias<B0>;
-def S1    : ARM64Reg<1,   "s1", [H1]>, DwarfRegAlias<B1>;
-def S2    : ARM64Reg<2,   "s2", [H2]>, DwarfRegAlias<B2>;
-def S3    : ARM64Reg<3,   "s3", [H3]>, DwarfRegAlias<B3>;
-def S4    : ARM64Reg<4,   "s4", [H4]>, DwarfRegAlias<B4>;
-def S5    : ARM64Reg<5,   "s5", [H5]>, DwarfRegAlias<B5>;
-def S6    : ARM64Reg<6,   "s6", [H6]>, DwarfRegAlias<B6>;
-def S7    : ARM64Reg<7,   "s7", [H7]>, DwarfRegAlias<B7>;
-def S8    : ARM64Reg<8,   "s8", [H8]>, DwarfRegAlias<B8>;
-def S9    : ARM64Reg<9,   "s9", [H9]>, DwarfRegAlias<B9>;
-def S10   : ARM64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>;
-def S11   : ARM64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>;
-def S12   : ARM64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>;
-def S13   : ARM64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>;
-def S14   : ARM64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>;
-def S15   : ARM64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>;
-def S16   : ARM64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>;
-def S17   : ARM64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>;
-def S18   : ARM64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>;
-def S19   : ARM64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>;
-def S20   : ARM64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>;
-def S21   : ARM64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>;
-def S22   : ARM64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>;
-def S23   : ARM64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>;
-def S24   : ARM64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>;
-def S25   : ARM64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>;
-def S26   : ARM64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>;
-def S27   : ARM64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>;
-def S28   : ARM64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>;
-def S29   : ARM64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>;
-def S30   : ARM64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>;
-def S31   : ARM64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>;
-}
-
-let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in {
-def D0    : ARM64Reg<0,   "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>;
-def D1    : ARM64Reg<1,   "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>;
-def D2    : ARM64Reg<2,   "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>;
-def D3    : ARM64Reg<3,   "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>;
-def D4    : ARM64Reg<4,   "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>;
-def D5    : ARM64Reg<5,   "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>;
-def D6    : ARM64Reg<6,   "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>;
-def D7    : ARM64Reg<7,   "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>;
-def D8    : ARM64Reg<8,   "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>;
-def D9    : ARM64Reg<9,   "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>;
-def D10   : ARM64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>;
-def D11   : ARM64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>;
-def D12   : ARM64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>;
-def D13   : ARM64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>;
-def D14   : ARM64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>;
-def D15   : ARM64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>;
-def D16   : ARM64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>;
-def D17   : ARM64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>;
-def D18   : ARM64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>;
-def D19   : ARM64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>;
-def D20   : ARM64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>;
-def D21   : ARM64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>;
-def D22   : ARM64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>;
-def D23   : ARM64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>;
-def D24   : ARM64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>;
-def D25   : ARM64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>;
-def D26   : ARM64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>;
-def D27   : ARM64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>;
-def D28   : ARM64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>;
-def D29   : ARM64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>;
-def D30   : ARM64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>;
-def D31   : ARM64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>;
-}
-
-let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in {
-def Q0    : ARM64Reg<0,   "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>;
-def Q1    : ARM64Reg<1,   "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>;
-def Q2    : ARM64Reg<2,   "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>;
-def Q3    : ARM64Reg<3,   "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>;
-def Q4    : ARM64Reg<4,   "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>;
-def Q5    : ARM64Reg<5,   "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>;
-def Q6    : ARM64Reg<6,   "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>;
-def Q7    : ARM64Reg<7,   "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>;
-def Q8    : ARM64Reg<8,   "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>;
-def Q9    : ARM64Reg<9,   "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>;
-def Q10   : ARM64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>;
-def Q11   : ARM64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>;
-def Q12   : ARM64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>;
-def Q13   : ARM64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>;
-def Q14   : ARM64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>;
-def Q15   : ARM64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>;
-def Q16   : ARM64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>;
-def Q17   : ARM64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>;
-def Q18   : ARM64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>;
-def Q19   : ARM64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>;
-def Q20   : ARM64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>;
-def Q21   : ARM64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>;
-def Q22   : ARM64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>;
-def Q23   : ARM64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>;
-def Q24   : ARM64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>;
-def Q25   : ARM64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>;
-def Q26   : ARM64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>;
-def Q27   : ARM64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>;
-def Q28   : ARM64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>;
-def Q29   : ARM64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>;
-def Q30   : ARM64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
-def Q31   : ARM64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
-}
-
-def FPR8  : RegisterClass<"ARM64", [untyped], 8, (sequence "B%u", 0, 31)> {
-  let Size = 8;
-}
-def FPR16 : RegisterClass<"ARM64", [untyped], 16, (sequence "H%u", 0, 31)> {
-  let Size = 16;
-}
-def FPR32 : RegisterClass<"ARM64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
-def FPR64 : RegisterClass<"ARM64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
-                                    v1i64],
-                                    64, (sequence "D%u", 0, 31)>;
-// We don't (yet) have an f128 legal type, so don't use that here. We
-// normalize 128-bit vectors to v2f64 for arg passing and such, so use
-// that here.
-def FPR128 : RegisterClass<"ARM64",
-                           [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
-                           128, (sequence "Q%u", 0, 31)>;
-
-// The lower 16 vector registers.  Some instructions can only take registers
-// in this range.
-def FPR128_lo : RegisterClass<"ARM64",
-                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-                              128, (trunc FPR128, 16)>;
-
-// Pairs, triples, and quads of 64-bit vector registers.
-def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
-def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
-                                 [(rotl FPR64, 0), (rotl FPR64, 1),
-                                  (rotl FPR64, 2)]>;
-def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3],
-                               [(rotl FPR64, 0), (rotl FPR64, 1),
-                                (rotl FPR64, 2), (rotl FPR64, 3)]>;
-def DD   : RegisterClass<"ARM64", [untyped], 64, (add DSeqPairs)> {
-  let Size = 128;
-}
-def DDD  : RegisterClass<"ARM64", [untyped], 64, (add DSeqTriples)> {
-  let Size = 196;
-}
-def DDDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqQuads)> {
-  let Size = 256;
-}
-
-// Pairs, triples, and quads of 128-bit vector registers.
-def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>;
-def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2],
-                                 [(rotl FPR128, 0), (rotl FPR128, 1),
-                                  (rotl FPR128, 2)]>;
-def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3],
-                               [(rotl FPR128, 0), (rotl FPR128, 1),
-                                (rotl FPR128, 2), (rotl FPR128, 3)]>;
-def QQ   : RegisterClass<"ARM64", [untyped], 128, (add QSeqPairs)> {
-  let Size = 256;
-}
-def QQQ  : RegisterClass<"ARM64", [untyped], 128, (add QSeqTriples)> {
-  let Size = 384;
-}
-def QQQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqQuads)> {
-  let Size = 512;
-}
-
-
-// Vector operand versions of the FP registers. Alternate name printing and
-// assmebler matching.
-def VectorRegAsmOperand : AsmOperandClass { let Name = "VectorReg"; }
-let ParserMatchClass = VectorRegAsmOperand in {
-def V64  : RegisterOperand<FPR64, "printVRegOperand">;
-def V128 : RegisterOperand<FPR128, "printVRegOperand">;
-def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand">;
-}
-
-class TypedVecListAsmOperand<int count, int regsize, int lanes, string kind>
-    : AsmOperandClass {
-  let Name = "TypedVectorList" # count # "_" # lanes # kind;
-
-  let PredicateMethod
-      = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>";
-  let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">";
-}
-
-class TypedVecListRegOperand<RegisterClass Reg, int lanes, string kind>
-    : RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '"
-                                                   # kind # "'>">;
-
-multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> {
-  // With implicit types (probably on instruction instead). E.g. { v0, v1 }
-  def _64AsmOperand : AsmOperandClass {
-    let Name = NAME # "64";
-    let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
-    let RenderMethod = "addVectorList64Operands<" # count # ">";
-  }
-
-  def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand");
-  }
-
-  def _128AsmOperand : AsmOperandClass {
-    let Name = NAME # "128";
-    let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
-    let RenderMethod = "addVectorList128Operands<" # count # ">";
-  }
-
-  def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand");
-  }
-
-  // 64-bit register lists with explicit type.
-
-  // { v0.8b, v1.8b }
-  def _8bAsmOperand : TypedVecListAsmOperand<count, 64, 8, "b">;
-  def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand");
-  }
-
-  // { v0.4h, v1.4h }
-  def _4hAsmOperand : TypedVecListAsmOperand<count, 64, 4, "h">;
-  def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand");
-  }
-
-  // { v0.2s, v1.2s }
-  def _2sAsmOperand : TypedVecListAsmOperand<count, 64, 2, "s">;
-  def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand");
-  }
-
-  // { v0.1d, v1.1d }
-  def _1dAsmOperand : TypedVecListAsmOperand<count, 64, 1, "d">;
-  def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand");
-  }
-
-  // 128-bit register lists with explicit type
-
-  // { v0.16b, v1.16b }
-  def _16bAsmOperand : TypedVecListAsmOperand<count, 128, 16, "b">;
-  def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand");
-  }
-
-  // { v0.8h, v1.8h }
-  def _8hAsmOperand : TypedVecListAsmOperand<count, 128, 8, "h">;
-  def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand");
-  }
-
-  // { v0.4s, v1.4s }
-  def _4sAsmOperand : TypedVecListAsmOperand<count, 128, 4, "s">;
-  def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand");
-  }
-
-  // { v0.2d, v1.2d }
-  def _2dAsmOperand : TypedVecListAsmOperand<count, 128, 2, "d">;
-  def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand");
-  }
-
-  // { v0.b, v1.b }
-  def _bAsmOperand : TypedVecListAsmOperand<count, 128, 0, "b">;
-  def "b" : TypedVecListRegOperand<Reg128, 0, "b"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand");
-  }
-
-  // { v0.h, v1.h }
-  def _hAsmOperand : TypedVecListAsmOperand<count, 128, 0, "h">;
-  def "h" : TypedVecListRegOperand<Reg128, 0, "h"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand");
-  }
-
-  // { v0.s, v1.s }
-  def _sAsmOperand : TypedVecListAsmOperand<count, 128, 0, "s">;
-  def "s" : TypedVecListRegOperand<Reg128, 0, "s"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand");
-  }
-
-  // { v0.d, v1.d }
-  def _dAsmOperand : TypedVecListAsmOperand<count, 128, 0, "d">;
-  def "d" : TypedVecListRegOperand<Reg128, 0, "d"> {
-    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand");
-  }
-
-
-}
-
-defm VecListOne   : VectorList<1, FPR64, FPR128>;
-defm VecListTwo   : VectorList<2, DD,    QQ>;
-defm VecListThree : VectorList<3, DDD,   QQQ>;
-defm VecListFour  : VectorList<4, DDDD,  QQQQ>;
-
-
-// Register operand versions of the scalar FP registers.
-def FPR16Op : RegisterOperand<FPR16, "printOperand">;
-def FPR32Op : RegisterOperand<FPR32, "printOperand">;
-def FPR64Op : RegisterOperand<FPR64, "printOperand">;
-def FPR128Op : RegisterOperand<FPR128, "printOperand">;
diff --git a/lib/Target/ARM64/ARM64SchedCyclone.td b/lib/Target/ARM64/ARM64SchedCyclone.td
deleted file mode 100644
index 65c68b3..0000000
--- a/lib/Target/ARM64/ARM64SchedCyclone.td
+++ /dev/null
@@ -1,852 +0,0 @@
-//=- ARMSchedCyclone.td - ARM64 Cyclone Scheduling Defs ------*- tablegen -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the machine model for ARM64 Cyclone to support
-// instruction scheduling and other instruction cost heuristics.
-//
-//===----------------------------------------------------------------------===//
-
-def CycloneModel : SchedMachineModel {
-  let IssueWidth = 6; // 6 micro-ops are dispatched per cycle.
-  let MicroOpBufferSize = 192; // Based on the reorder buffer.
-  let LoadLatency = 4; // Optimistic load latency.
-  let MispredictPenalty = 16; // 14-19 cycles are typical.
-}
-
-//===----------------------------------------------------------------------===//
-// Define each kind of processor resource and number available on Cyclone.
-
-// 4 integer pipes
-def CyUnitI : ProcResource<4> {
-  let BufferSize = 48;
-}
-
-// 2 branch units: I[0..1]
-def CyUnitB : ProcResource<2> {
-  let Super  = CyUnitI;
-  let BufferSize = 24;
-}
-
-// 1 indirect-branch unit: I[0]
-def CyUnitBR : ProcResource<1> {
-  let Super  = CyUnitB;
-}
-
-// 2 shifter pipes: I[2..3]
-// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI
-def CyUnitIS : ProcResource<2> {
-  let Super = CyUnitI;
-  let BufferSize = 24;
-}
-
-// 1 mul pipe: I[0]
-def CyUnitIM : ProcResource<1> {
-  let Super = CyUnitBR;
-  let BufferSize = 32;
-}
-
-// 1 div pipe: I[1]
-def CyUnitID : ProcResource<1> {
-  let Super = CyUnitB;
-  let BufferSize = 16;
-}
-
-// 1 integer division unit. This is driven by the ID pipe, but only
-// consumes the pipe for one cycle at issue and another cycle at writeback.
-def CyUnitIntDiv : ProcResource<1>;
-
-// 2 ld/st pipes.
-def CyUnitLS : ProcResource<2> {
-  let BufferSize = 28;
-}
-
-// 3 fp/vector pipes.
-def CyUnitV : ProcResource<3> {
-  let BufferSize = 48;
-}
-// 2 fp/vector arithmetic and multiply pipes: V[0-1]
-def CyUnitVM : ProcResource<2> {
-  let Super = CyUnitV;
-  let BufferSize = 32;
-}
-// 1 fp/vector division/sqrt pipe: V[2]
-def CyUnitVD : ProcResource<1> {
-  let Super = CyUnitV;
-  let BufferSize = 16;
-}
-// 1 fp compare pipe: V[0]
-def CyUnitVC : ProcResource<1> {
-  let Super = CyUnitVM;
-  let BufferSize = 16;
-}
-
-// 2 fp division/square-root units.  These are driven by the VD pipe,
-// but only consume the pipe for one cycle at issue and a cycle at writeback.
-def CyUnitFloatDiv : ProcResource<2>;
-
-//===----------------------------------------------------------------------===//
-// Define scheduler read/write resources and latency on Cyclone.
-// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1.
-
-let SchedModel = CycloneModel in {
-
-//---
-// 7.8.1. Moves
-//---
-
-// A single nop micro-op (uX).
-def WriteX : SchedWriteRes<[]> { let Latency = 0; }
-
-// Move zero is a register rename (to machine register zero).
-// The move is replaced by a single nop micro-op.
-// MOVZ Rd, #0
-// AND Rd, Rzr, #imm
-def WriteZPred : SchedPredicate<[{TII->isGPRZero(MI)}]>;
-def WriteImmZ  : SchedWriteVariant<[
-                   SchedVar<WriteZPred, [WriteX]>,
-                   SchedVar<NoSchedPred, [WriteImm]>]>;
-def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>;
-
-// Move GPR is a register rename and single nop micro-op.
-// ORR Xd, XZR, Xm
-// ADD Xd, Xn, #0
-def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(MI)}]>;
-def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(MI)}]>;
-def WriteMov      : SchedWriteVariant<[
-                      SchedVar<WriteIMovPred, [WriteX]>,
-                      SchedVar<WriteVMovPred, [WriteX]>,
-                      SchedVar<NoSchedPred,   [WriteI]>]>;
-def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>;
-
-// Move non-zero immediate is an integer ALU op.
-// MOVN,MOVZ,MOVK
-def : WriteRes<WriteImm, [CyUnitI]>;
-
-//---
-// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional,
-//              Shifts and Bitfield Operations
-//---
-
-// ADR,ADRP
-// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri
-// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr
-// ADC(S),SBC(S)
-// Aliases: CMN, CMP, TST
-//
-// Conditional operations.
-// CCMNi,CCMPi,CCMNr,CCMPr,
-// CSEL,CSINC,CSINV,CSNEG
-//
-// Bit counting and reversal operations.
-// CLS,CLZ,RBIT,REV,REV16,REV32
-def : WriteRes<WriteI, [CyUnitI]>;
-
-// ADD with shifted register operand is a single micro-op that
-// consumes a shift pipeline for two cycles.
-// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs
-// EXAMPLE: ADDrs Xn, Xm LSL #imm
-def : WriteRes<WriteISReg, [CyUnitIS]> {
-  let Latency = 2;
-  let ResourceCycles = [2];
-}
-
-// ADD with extended register operand is the same as shifted reg operand.
-// ADD(S)re,SUB(S)re
-// EXAMPLE: ADDXre Xn, Xm, UXTB #1
-def : WriteRes<WriteIEReg, [CyUnitIS]> {
-  let Latency = 2;
-  let ResourceCycles = [2];
-}
-
-// Variable shift and bitfield operations.
-// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM
-def : WriteRes<WriteIS, [CyUnitIS]>;
-
-// EXTR Shifts a pair of registers and requires two micro-ops.
-// The second micro-op is delayed, as modeled by ReadExtrHi.
-// EXTR Xn, Xm, #imm
-def : WriteRes<WriteExtr, [CyUnitIS, CyUnitIS]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-}
-
-// EXTR's first register read is delayed by one cycle, effectively
-// shortening its writer's latency.
-// EXTR Xn, Xm, #imm
-def : ReadAdvance<ReadExtrHi, 1>;
-
-//---
-// 7.8.6. Multiplies
-//---
-
-// MUL/MNEG are aliases for MADD/MSUB.
-// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL
-def : WriteRes<WriteIM32, [CyUnitIM]> {
-  let Latency = 4;
-}
-// MADDX,MSUBX,SMULH,UMULH
-def : WriteRes<WriteIM64, [CyUnitIM]> {
-  let Latency = 5;
-}
-
-//---
-// 7.8.7. Divide
-//---
-
-// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient.
-// The ID pipe is consumed for 2 cycles: issue and writeback.
-// SDIVW,UDIVW
-def : WriteRes<WriteID32, [CyUnitID, CyUnitIntDiv]> {
-  let Latency = 10;
-  let ResourceCycles = [2, 10];
-}
-// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient.
-// The ID pipe is consumed for 2 cycles: issue and writeback.
-// SDIVX,UDIVX
-def : WriteRes<WriteID64, [CyUnitID, CyUnitIntDiv]> {
-  let Latency = 13;
-  let ResourceCycles = [2, 13];
-}
-
-//---
-// 7.8.8,7.8.10. Load/Store, single element
-//---
-
-// Integer loads take 4 cycles and use one LS unit for one cycle.
-def : WriteRes<WriteLD, [CyUnitLS]> {
-  let Latency = 4;
-}
-
-// Store-load forwarding is 4 cycles.
-//
-// Note: The store-exclusive sequence incorporates this
-// latency. However, general heuristics should not model the
-// dependence between a store and subsequent may-alias load because
-// hardware speculation works.
-def : WriteRes<WriteST, [CyUnitLS]> {
-  let Latency = 4;
-}
-
-// Load from base address plus an optionally scaled register offset.
-// Rt latency is latency WriteIS + WriteLD.
-// EXAMPLE: LDR Xn, Xm [, lsl 3]
-def CyWriteLDIdx : SchedWriteVariant<[
-  SchedVar<ScaledIdxPred, [WriteIS, WriteLD]>, // Load from scaled register.
-  SchedVar<NoSchedPred,   [WriteLD]>]>;        // Load from register offset.
-def : SchedAlias<WriteLDIdx, CyWriteLDIdx>;    // Map ARM64->Cyclone type.
-
-// EXAMPLE: STR Xn, Xm [, lsl 3]
-def CyWriteSTIdx : SchedWriteVariant<[
-  SchedVar<ScaledIdxPred, [WriteIS, WriteST]>, // Store to scaled register.
-  SchedVar<NoSchedPred,   [WriteST]>]>;        // Store to register offset.
-def : SchedAlias<WriteSTIdx, CyWriteSTIdx>;    // Map ARM64->Cyclone type.
-
-// Read the (unshifted) base register Xn in the second micro-op one cycle later.
-// EXAMPLE: LDR Xn, Xm [, lsl 3]
-def ReadBaseRS : SchedReadAdvance<1>;
-def CyReadAdrBase : SchedReadVariant<[
-  SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
-  SchedVar<NoSchedPred,   [ReadDefault]>]>;   // Read base reg with no shift.
-def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map ARM64->Cyclone type.
-
-//---
-// 7.8.9,7.8.11. Load/Store, paired
-//---
-
-// Address pre/post increment is a simple ALU op with one cycle latency.
-def : WriteRes<WriteAdr, [CyUnitI]>;
-
-// LDP high register write is fused with the load, but a nop micro-op remains.
-def : WriteRes<WriteLDHi, []> {
-  let Latency = 4;
-}
-
-// STP is a vector op and store, except for QQ, which is just two stores.
-def : SchedAlias<WriteSTP, WriteVSTShuffle>;
-def : InstRW<[WriteST, WriteST], (instrs STPQi)>;
-
-//---
-// 7.8.13. Branches
-//---
-
-// Branches take a single micro-op.
-// The misprediction penalty is defined as a SchedMachineModel property.
-def : WriteRes<WriteBr,    [CyUnitB]>  {let Latency = 0;}
-def : WriteRes<WriteBrReg, [CyUnitBR]> {let Latency = 0;}
-
-//---
-// 7.8.14. Never-issued Instructions, Barrier and Hint Operations
-//---
-
-// NOP,SEV,SEVL,WFE,WFI,YIELD
-def : WriteRes<WriteHint, []> {let Latency = 0;}
-// ISB
-def : InstRW<[WriteI], (instrs ISB)>;
-// SLREX,DMB,DSB
-def : WriteRes<WriteBarrier, [CyUnitLS]>;
-
-// System instructions get an invalid latency because the latency of
-// other operations across them is meaningless.
-def : WriteRes<WriteSys, []> {let Latency = -1;}
-
-//===----------------------------------------------------------------------===//
-// 7.9 Vector Unit Instructions
-
-// Simple vector operations take 2 cycles.
-def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
-
-// Define some longer latency vector op types for Cyclone.
-def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
-def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;}
-def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;}
-def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;}
-
-// Simple floating-point operations take 2 cycles.
-def : WriteRes<WriteF, [CyUnitV]> {let Latency = 2;}
-
-//---
-// 7.9.1 Vector Moves
-//---
-
-// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently
-// generates expensive int-float conversion instead:
-// FMOVDi Dd, #0.0
-// FMOVv2f64ns Vd.2d, #0.0
-
-// FMOVSi,FMOVDi
-def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
-
-// MOVI,MVNI are WriteV
-// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV
-
-// Move FPR is a register rename and single nop micro-op.
-// ORR.16b Vd,Vn,Vn
-// COPY is handled above in the WriteMov Variant.
-def WriteVMov    : SchedWriteVariant<[
-                     SchedVar<WriteVMovPred, [WriteX]>,
-                     SchedVar<NoSchedPred,   [WriteV]>]>;
-def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
-
-// FMOVSr,FMOVDr are WriteF.
-
-// MOV V,V is a WriteV.
-
-// CPY D,V[x] is a WriteV
-
-// INS V[x],V[y] is a WriteV.
-
-// FMOVWSr,FMOVXDr,FMOVXDHighr
-def : SchedAlias<WriteFCopy, WriteVLD>;
-
-// FMOVSWr,FMOVDXr
-def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
-
-// INS V[x],R
-def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
-def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
-
-// SMOV,UMOV R,V[x]
-def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>;
-def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>;
-
-// DUP V,R
-def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>;
-
-// DUP V,V[x] is a WriteV.
-
-//---
-// 7.9.2 Integer Arithmetic, Logical, and Comparisons
-//---
-
-// BIC,ORR V,#imm are WriteV
-
-def : InstRW<[CyWriteV3], (instregex "ABSv")>;
-
-// MVN,NEG,NOT are WriteV
-
-def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>;
-
-// ADDP is a WriteV.
-def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;}
-def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>;
-
-def : InstRW<[CyWriteV3],
-             (instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>;
-
-def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>;
-
-// ADD,SUB are WriteV
-
-// Forward declare.
-def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
-
-// Add/Diff and accumulate uses the vector multiply unit.
-def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;}
-def CyReadVAccum  : SchedReadAdvance<1,
-                    [CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>;
-
-def : InstRW<[CyWriteVAccum, CyReadVAccum],
-             (instregex "SADALP","UADALP")>;
-
-def : InstRW<[CyWriteVAccum, CyReadVAccum],
-             (instregex "SABAv","UABAv","SABALv","UABALv")>;
-
-def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>;
-
-def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>;
-
-def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>;
-
-// WriteV includes:
-// AND,BIC,CMTST,EOR,ORN,ORR
-// ADDP
-// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD
-// SADDL,SSUBL,UADDL,USUBL
-// SADDW,SSUBW,UADDW,USUBW
-
-def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv",
-                                     "CMLEv","CMLTv",
-                                     "CMHIv","CMHSv")>;
-
-def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv",
-                                     "SMAXPv","SMINPv","UMAXPv","UMINPv")>;
-
-def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv",
-                                       "SABDLv","UABDLv")>;
-
-//---
-// 7.9.3 Floating Point Arithmetic and Comparisons
-//---
-
-// FABS,FNEG are WriteF
-
-def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>;
-def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>;
-
-def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i",
-                                     "FMINPv2i","FMINNMPv2i")>;
-
-def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>;
-
-def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32,
-                                  FSUBSrr,FSUBv2f32,FSUBv4f32,
-                                  FADDPv2f32,FADDPv4f32,
-                                  FABD32,FABDv2f32,FABDv4f32)>;
-def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64,
-                                  FSUBDrr,FSUBv2f64,
-                                  FADDPv2f64,
-                                  FABD64,FABDv2f64)>;
-
-def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>;
-
-def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT",
-                                     "FMAXS","FMAXD","FMAXv",
-                                     "FMINS","FMIND","FMINv",
-                                     "FMAXNMS","FMAXNMD","FMAXNMv",
-                                     "FMINNMS","FMINNMD","FMINNMv",
-                                     "FMAXPv2f","FMAXPv4f",
-                                     "FMINPv2f","FMINPv4f",
-                                     "FMAXNMPv2f","FMAXNMPv4f",
-                                     "FMINNMPv2f","FMINNMPv4f")>;
-
-// FCMP,FCMPE,FCCMP,FCCMPE
-def : WriteRes<WriteFCmp, [CyUnitVC]> {let Latency = 4;}
-
-// FCSEL is a WriteF.
-
-//---
-// 7.9.4 Shifts and Bitfield Operations
-//---
-
-// SHL is a WriteV
-
-def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;}
-def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>;
-
-def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
-def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>;
-
-// Shift and accumulate uses the vector multiply unit.
-def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;}
-def CyReadVShiftAcc  : SchedReadAdvance<1,
-                        [CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>;
-def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc],
-             (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>;
-
-// SSHL,USHL are WriteV.
-
-def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>;
-
-// SQSHL,SQSHLU,UQSHL are WriteV.
-
-def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>;
-
-// WriteV includes:
-// SHLL,SSHLL,USHLL
-// SLI,SRI
-// BIF,BIT,BSL
-// EXT
-// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN
-// XTN2
-
-def : InstRW<[CyWriteV4],
-             (instregex "RSHRNv","SHRNv",
-                        "SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv",
-                        "UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
-
-//---
-// 7.9.5 Multiplication
-//---
-
-def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;}
-def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv",
-                             "SQDMULLv","SQDMULHv","SQRDMULHv")>;
-
-// FMUL,FMULX,FNMUL default to WriteFMul.
-def : WriteRes<WriteFMul, [CyUnitVM]> { let Latency = 4;}
-
-def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;}
-def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed,
-                               FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>;
-
-def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>;
-def : InstRW<[CyWriteVMul, CyReadVMulAcc],
-             (instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL",
-              "SQDMLAL","SQDMLSL")>;
-
-def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;}
-def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;}
-def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>;
-def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>;
-
-def : InstRW<[CyWriteSMul, CyReadSMul],
-             (instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr,
-              FMLAv2f32,FMLAv4f32,
-              FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>;
-def : InstRW<[CyWriteDMul, CyReadDMul],
-             (instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr,
-              FMLAv2f64,FMLAv2i64_indexed,
-              FMLSv2f64,FMLSv2i64_indexed)>;
-
-def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; }
-def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>;
-
-//---
-// 7.9.6 Divide and Square Root
-//---
-
-// FDIV,FSQRT
-// TODO: Add 64-bit variant with 19 cycle latency.
-// TODO: Specialize FSQRT for longer latency.
-def : WriteRes<WriteFDiv, [CyUnitVD, CyUnitFloatDiv]> {
-  let Latency = 17;
-  let ResourceCycles = [2, 17];
-}
-
-def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>;
-
-def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; }
-def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>;
-
-def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; }
-def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; }
-def : InstRW<[WriteFRECPS],  (instregex "FRECPSv")>;
-def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
-
-//---
-// 7.9.7 Integer-FP Conversions
-//---
-
-// FCVT lengthen f16/s32
-def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
-
-// FCVT,FCVTN,FCVTXN
-// SCVTF,UCVTF V,V
-// FRINT(AIMNPXZ) V,V
-def : WriteRes<WriteFCvt, [CyUnitV]> {let Latency = 4;}
-
-// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles.
-def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>;
-def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>;
-
-// FCVT Rd, S/D = V6+LD4: 10 cycles
-def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>;
-def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>;
-
-// FCVTL is a WriteV
-
-//---
-// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup
-//---
-
-def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;}
-def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr,
-                                       AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr,
-                                       SHA1SU0rrr)>;
-
-def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;}
-def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>;
-
-def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;}
-def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr,
-                                       SHA256Hrrr,SHA256H2rrr)>;
-
-// TRN,UZP,ZUP are WriteV.
-
-// TBL,TBX are WriteV.
-
-//---
-// 7.9.11-7.9.14 Load/Store, single element and paired
-//---
-
-// Loading into the vector unit takes 5 cycles vs 4 for integer loads.
-def : WriteRes<WriteVLD, [CyUnitLS]> {
-  let Latency = 5;
-}
-
-// Store-load forwarding is 4 cycles.
-def : WriteRes<WriteVST, [CyUnitLS]> {
-  let Latency = 4;
-}
-
-// WriteVLDPair/VSTPair sequences are expanded by the target description.
-
-//---
-// 7.9.15 Load, element operations
-//---
-
-// Only the first WriteVLD and WriteAdr for writeback matches def operands.
-// Subsequent WriteVLDs consume resources. Since all loaded values have the
-// same latency, this is acceptable.
-
-// Vd is read 5 cycles after issuing the vector load.
-def : ReadAdvance<ReadVLD, 5>;
-
-def : InstRW<[WriteVLD],
-             (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD, WriteAdr],
-             (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
-
-// Register writes from the load's high half are fused micro-ops.
-def : InstRW<[WriteVLD],
-             (instregex "LD1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteVLD, WriteAdr],
-             (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
-def : InstRW<[WriteVLD, WriteVLD],
-             (instregex "LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
-             (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVLD, WriteVLD],
-             (instregex "LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
-             (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
-def : InstRW<[WriteVLD, WriteVLD, WriteVLD],
-             (instregex "LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD],
-             (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVLD, WriteVLD],
-             (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
-             (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
-def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD],
-             (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD],
-             (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVLDShuffle, ReadVLD],
-             (instregex "LD1i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],
-             (instregex "LD1i(8|16|32)_POST")>;
-
-def : InstRW<[WriteVLDShuffle, ReadVLD],          (instrs LD1i64)>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>;
-
-def : InstRW<[WriteVLDShuffle],
-             (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr],
-             (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[WriteVLDShuffle, WriteV],
-             (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
-             (instregex "LD2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
-             (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
-             (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
-             (instregex "LD2i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
-             (instregex "LD2i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
-             (instregex "LD2i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
-             (instregex "LD2i64_POST")>;
-
-def : InstRW<[WriteVLDShuffle, WriteV],
-             (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
-             (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
-             (instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
-             (instregex "LD3Threev(8b|4h|2s)_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
-             (instregex "LD3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
-             (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
-             (instregex "LD3i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
-             (instregex "LD3i(8|16|32)_POST")>;
-
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
-             (instregex "LD3i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
-             (instregex "LD3i64_POST")>;
-
-def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
-             (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
-             (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
-
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
-             (instrs LD3Rv1d,LD3Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
-             (instrs LD3Rv2d_POST,LD3Rv2d_POST)>;
-
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
-             (instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
-             (instregex "LD4Fourv(8b|4h|2s)_POST")>;
-def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
-              WriteVLDPairShuffle, WriteVLDPairShuffle],
-             (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
-              WriteVLDPairShuffle, WriteVLDPairShuffle],
-             (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
-             (instregex "LD4i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
-             (instregex "LD4i(8|16|32)_POST")>;
-
-
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
-             (instrs LD4i64)>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
-             (instrs LD4i64_POST)>;
-
-def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
-             (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
-             (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
-
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
-             (instrs LD4Rv1d,LD4Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
-             (instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
-
-//---
-// 7.9.16 Store, element operations
-//---
-
-// Only the WriteAdr for writeback matches a def operands.
-// Subsequent WriteVLDs only consume resources.
-
-def : InstRW<[WriteVST],
-             (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, WriteVST],
-             (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVSTShuffle],
-             (instregex "ST1Twov(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteAdr, WriteVSTShuffle],
-             (instregex "ST1Twov(8b|4h|2s|1d)_POST")>;
-def : InstRW<[WriteVST, WriteVST],
-             (instregex "ST1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, WriteVST, WriteVST],
-             (instregex "ST1Twov(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVSTShuffle, WriteVST],
-             (instregex "ST1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST],
-             (instregex "ST1Threev(8b|4h|2s|1d)_POST")>;
-def : InstRW<[WriteVST, WriteVST, WriteVST],
-             (instregex "ST1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST],
-             (instregex "ST1Threev(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
-             (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
-             (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>;
-def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST],
-             (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST],
-             (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVSTShuffle],           (instregex "ST1i(8|16|32)$")>;
-def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>;
-
-def : InstRW<[WriteVSTShuffle],           (instrs ST1i64)>;
-def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>;
-
-def : InstRW<[WriteVSTShuffle],
-             (instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[WriteAdr, WriteVSTShuffle],
-             (instregex "ST2Twov(8b|4h|2s)_POST")>;
-def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
-             (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
-             (instregex "ST2Twov(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVSTShuffle],           (instregex "ST2i(8|16|32)$")>;
-def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>;
-def : InstRW<[WriteVSTShuffle],           (instrs ST2i64)>;
-def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>;
-
-def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
-             (instregex "ST3Threev(8b|4h|2s)$")>;
-def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
-             (instregex "ST3Threev(8b|4h|2s)_POST")>;
-def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle],
-             (instregex "ST3Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle],
-             (instregex "ST3Threev(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVSTShuffle],           (instregex "ST3i(8|16|32)$")>;
-def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>;
-
-def :InstRW<[WriteVSTShuffle, WriteVSTShuffle],           (instrs ST3i64)>;
-def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>;
-
-def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle],
-            (instregex "ST4Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle],
-            (instregex "ST4Fourv(8b|4h|2s|1d)_POST")>;
-def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle,
-              WriteVSTPairShuffle, WriteVSTPairShuffle],
-             (instregex "ST4Fourv(16b|8h|4s|2d)$")>;
-def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle,
-              WriteVSTPairShuffle, WriteVSTPairShuffle],
-             (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>;
-
-def : InstRW<[WriteVSTPairShuffle],           (instregex "ST4i(8|16|32)$")>;
-def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>;
-
-def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],          (instrs ST4i64)>;
-def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>;
-
-} // SchedModel = CycloneModel
diff --git a/lib/Target/ARM64/ARM64Schedule.td b/lib/Target/ARM64/ARM64Schedule.td
deleted file mode 100644
index 52f9262..0000000
--- a/lib/Target/ARM64/ARM64Schedule.td
+++ /dev/null
@@ -1,92 +0,0 @@
-//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-// Define TII for use in SchedVariant Predicates.
-// const MachineInstr *MI and const TargetSchedModel *SchedModel
-// are defined by default.
-def : PredicateProlog<[{
-  const ARM64InstrInfo *TII =
-    static_cast<const ARM64InstrInfo*>(SchedModel->getInstrInfo());
-  (void)TII;
-}]>;
-
-// ARM64 Scheduler Definitions
-
-def WriteImm       : SchedWrite; // MOVN, MOVZ
-// TODO: Provide variants for MOV32/64imm Pseudos that dynamically
-// select the correct sequence of WriteImms.
-
-def WriteI         : SchedWrite; // ALU
-def WriteISReg     : SchedWrite; // ALU of Shifted-Reg
-def WriteIEReg     : SchedWrite; // ALU of Extended-Reg
-def WriteExtr      : SchedWrite; // EXTR shifts a reg pair
-def ReadExtrHi     : SchedRead;  // Read the high reg of the EXTR pair
-def WriteIS        : SchedWrite; // Shift/Scale
-def WriteID32      : SchedWrite; // 32-bit Divide
-def WriteID64      : SchedWrite; // 64-bit Divide
-def WriteIM32      : SchedWrite; // 32-bit Multiply
-def WriteIM64      : SchedWrite; // 64-bit Multiply
-def WriteBr        : SchedWrite; // Branch
-def WriteBrReg     : SchedWrite; // Indirect Branch
-
-def WriteLD        : SchedWrite; // Load from base addr plus immediate offset
-def WriteST        : SchedWrite; // Store to base addr plus immediate offset
-def WriteSTP       : SchedWrite; // Store a register pair.
-def WriteAdr       : SchedWrite; // Address pre/post increment.
-
-def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
-def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
-def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
-
-// ScaledIdxPred is true if a WriteLDIdx operand will be
-// scaled. Subtargets can use this to dynamically select resources and
-// latency for WriteLDIdx and ReadAdrBase.
-def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(MI)}]>;
-
-// Serialized two-level address load.
-// EXAMPLE: LOADGot
-def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
-
-// Serialized two-level address lookup.
-// EXAMPLE: MOVaddr...
-def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>;
-
-// The second register of a load-pair.
-// LDP,LDPSW,LDNP,LDXP,LDAXP
-def WriteLDHi : SchedWrite;
-
-// Store-exclusive is a store followed by a dependent load.
-def WriteSTX : WriteSequence<[WriteST, WriteLD]>;
-
-def WriteSys     : SchedWrite; // Long, variable latency system ops.
-def WriteBarrier : SchedWrite; // Memory barrier.
-def WriteHint    : SchedWrite; // Hint instruction.
-
-def WriteF       : SchedWrite; // General floating-point ops.
-def WriteFCmp    : SchedWrite; // Floating-point compare.
-def WriteFCvt    : SchedWrite; // Float conversion.
-def WriteFCopy   : SchedWrite; // Float-int register copy.
-def WriteFImm    : SchedWrite; // Floating-point immediate.
-def WriteFMul    : SchedWrite; // Floating-point multiply.
-def WriteFDiv    : SchedWrite; // Floating-point division.
-
-def WriteV   : SchedWrite; // Vector ops.
-def WriteVLD : SchedWrite; // Vector loads.
-def WriteVST : SchedWrite; // Vector stores.
-
-// Read the unwritten lanes of the VLD's destination registers.
-def ReadVLD : SchedRead;
-
-// Sequential vector load and shuffle.
-def WriteVLDShuffle     : WriteSequence<[WriteVLD, WriteV]>;
-def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
-
-// Store a shuffled vector.
-def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
-def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp b/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp
deleted file mode 100644
index 79d507f..0000000
--- a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-//===-- ARM64SelectionDAGInfo.cpp - ARM64 SelectionDAG Info ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ARM64SelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-selectiondag-info"
-#include "ARM64TargetMachine.h"
-using namespace llvm;
-
-ARM64SelectionDAGInfo::ARM64SelectionDAGInfo(const TargetMachine &TM)
-    : TargetSelectionDAGInfo(TM),
-      Subtarget(&TM.getSubtarget<ARM64Subtarget>()) {}
-
-ARM64SelectionDAGInfo::~ARM64SelectionDAGInfo() {}
-
-SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset(
-    SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
-    SDValue Size, unsigned Align, bool isVolatile,
-    MachinePointerInfo DstPtrInfo) const {
-  // Check to see if there is a specialized entry-point for memory zeroing.
-  ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
-  ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
-  const char *bzeroEntry =
-      (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : 0;
-  // For small size (< 256), it is not beneficial to use bzero
-  // instead of memset.
-  if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
-    const ARM64TargetLowering &TLI = *static_cast<const ARM64TargetLowering *>(
-                                          DAG.getTarget().getTargetLowering());
-
-    EVT IntPtr = TLI.getPointerTy();
-    Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
-    TargetLowering::ArgListTy Args;
-    TargetLowering::ArgListEntry Entry;
-    Entry.Node = Dst;
-    Entry.Ty = IntPtrTy;
-    Args.push_back(Entry);
-    Entry.Node = Size;
-    Args.push_back(Entry);
-    TargetLowering::CallLoweringInfo CLI(
-        Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false,
-        0, CallingConv::C, /*isTailCall=*/false,
-        /*doesNotRet=*/false, /*isReturnValueUsed=*/false,
-        DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
-    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-    return CallResult.second;
-  }
-  return SDValue();
-}
diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.h b/lib/Target/ARM64/ARM64SelectionDAGInfo.h
deleted file mode 100644
index 770775f..0000000
--- a/lib/Target/ARM64/ARM64SelectionDAGInfo.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- ARM64SelectionDAGInfo.h - ARM64 SelectionDAG Info -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ARM64 subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64SELECTIONDAGINFO_H
-#define ARM64SELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class ARM64SelectionDAGInfo : public TargetSelectionDAGInfo {
-  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
-  /// make the right decision when generating code for different targets.
-  const ARM64Subtarget *Subtarget;
-
-public:
-  explicit ARM64SelectionDAGInfo(const TargetMachine &TM);
-  ~ARM64SelectionDAGInfo();
-
-  SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
-                                  SDValue Dst, SDValue Src, SDValue Size,
-                                  unsigned Align, bool isVolatile,
-                                  MachinePointerInfo DstPtrInfo) const override;
-};
-}
-
-#endif
diff --git a/lib/Target/ARM64/ARM64StorePairSuppress.cpp b/lib/Target/ARM64/ARM64StorePairSuppress.cpp
deleted file mode 100644
index 6521d13..0000000
--- a/lib/Target/ARM64/ARM64StorePairSuppress.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-//===---- ARM64StorePairSuppress.cpp --- Suppress store pair formation ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass identifies floating point stores that should not be combined into
-// store pairs. Later we may do the same for floating point loads.
-// ===---------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-stp-suppress"
-#include "ARM64InstrInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineTraceMetrics.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-class ARM64StorePairSuppress : public MachineFunctionPass {
-  const ARM64InstrInfo *TII;
-  const TargetRegisterInfo *TRI;
-  const MachineRegisterInfo *MRI;
-  MachineFunction *MF;
-  TargetSchedModel SchedModel;
-  MachineTraceMetrics *Traces;
-  MachineTraceMetrics::Ensemble *MinInstr;
-
-public:
-  static char ID;
-  ARM64StorePairSuppress() : MachineFunctionPass(ID) {}
-
-  virtual const char *getPassName() const override {
-    return "ARM64 Store Pair Suppression";
-  }
-
-  bool runOnMachineFunction(MachineFunction &F) override;
-
-private:
-  bool shouldAddSTPToBlock(const MachineBasicBlock *BB);
-
-  bool isNarrowFPStore(const MachineInstr &MI);
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    AU.addRequired<MachineTraceMetrics>();
-    AU.addPreserved<MachineTraceMetrics>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-};
-char ARM64StorePairSuppress::ID = 0;
-} // anonymous
-
-FunctionPass *llvm::createARM64StorePairSuppressPass() {
-  return new ARM64StorePairSuppress();
-}
-
-/// Return true if an STP can be added to this block without increasing the
-/// critical resource height. STP is good to form in Ld/St limited blocks and
-/// bad to form in float-point limited blocks. This is true independent of the
-/// critical path. If the critical path is longer than the resource height, the
-/// extra vector ops can limit physreg renaming. Otherwise, it could simply
-/// oversaturate the vector units.
-bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
-  if (!MinInstr)
-    MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
-
-  MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
-  unsigned ResLength = BBTrace.getResourceLength();
-
-  // Get the machine model's scheduling class for STPQi.
-  // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
-  unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass();
-  const MCSchedClassDesc *SCDesc =
-      SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
-
-  // If a subtarget does not define resources for STPQi, bail here.
-  if (SCDesc->isValid() && !SCDesc->isVariant()) {
-    unsigned ResLenWithSTP = BBTrace.getResourceLength(
-        ArrayRef<const MachineBasicBlock *>(), SCDesc);
-    if (ResLenWithSTP > ResLength) {
-      DEBUG(dbgs() << "  Suppress STP in BB: " << BB->getNumber()
-                   << " resources " << ResLength << " -> " << ResLenWithSTP
-                   << "\n");
-      return false;
-    }
-  }
-  return true;
-}
-
-/// Return true if this is a floating-point store smaller than the V reg. On
-/// cyclone, these require a vector shuffle before storing a pair.
-/// Ideally we would call getMatchingPairOpcode() and have the machine model
-/// tell us if it's profitable with no cpu knowledge here.
-///
-/// FIXME: We plan to develop a decent Target abstraction for simple loads and
-/// stores. Until then use a nasty switch similar to ARM64LoadStoreOptimizer.
-bool ARM64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  case ARM64::STRSui:
-  case ARM64::STRDui:
-  case ARM64::STURSi:
-  case ARM64::STURDi:
-    return true;
-  }
-}
-
-bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
-  MF = &mf;
-  TII = static_cast<const ARM64InstrInfo *>(MF->getTarget().getInstrInfo());
-  TRI = MF->getTarget().getRegisterInfo();
-  MRI = &MF->getRegInfo();
-  const TargetSubtargetInfo &ST =
-      MF->getTarget().getSubtarget<TargetSubtargetInfo>();
-  SchedModel.init(*ST.getSchedModel(), &ST, TII);
-
-  Traces = &getAnalysis<MachineTraceMetrics>();
-  MinInstr = 0;
-
-  DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n');
-
-  if (!SchedModel.hasInstrSchedModel()) {
-    DEBUG(dbgs() << "  Skipping pass: no machine model present.\n");
-    return false;
-  }
-
-  // Check for a sequence of stores to the same base address. We don't need to
-  // precisely determine whether a store pair can be formed. But we do want to
-  // filter out most situations where we can't form store pairs to avoid
-  // computing trace metrics in those cases.
-  for (auto &MBB: *MF) {
-    bool SuppressSTP = false;
-    unsigned PrevBaseReg = 0;
-    for (auto &MI: MBB) {
-      if (!isNarrowFPStore(MI))
-        continue;
-      unsigned BaseReg;
-      unsigned Offset;
-      if (TII->getLdStBaseRegImmOfs(&MI, BaseReg, Offset, TRI)) {
-        if (PrevBaseReg == BaseReg) {
-          // If this block can take STPs, skip ahead to the next block.
-          if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
-            break;
-          // Otherwise, continue unpairing the stores in this block.
-          DEBUG(dbgs() << "Unpairing store " << MI << "\n");
-          SuppressSTP = true;
-          TII->suppressLdStPair(&MI);
-        }
-        PrevBaseReg = BaseReg;
-      } else
-        PrevBaseReg = 0;
-    }
-  }
-  // This pass just sets some internal MachineMemOperand flags. It can't really
-  // invalidate anything.
-  return false;
-}
diff --git a/lib/Target/ARM64/ARM64Subtarget.cpp b/lib/Target/ARM64/ARM64Subtarget.cpp
deleted file mode 100644
index 14b5444..0000000
--- a/lib/Target/ARM64/ARM64Subtarget.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-//===-- ARM64Subtarget.cpp - ARM64 Subtarget Information --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ARM64 specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64InstrInfo.h"
-#include "ARM64Subtarget.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_SUBTARGETINFO_CTOR
-#define GET_SUBTARGETINFO_TARGET_DESC
-#include "ARM64GenSubtargetInfo.inc"
-
-using namespace llvm;
-
-ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU,
-                               const std::string &FS)
-    : ARM64GenSubtargetInfo(TT, CPU, FS), HasZeroCycleRegMove(false),
-      HasZeroCycleZeroing(false), CPUString(CPU), TargetTriple(TT) {
-  // Determine default and user-specified characteristics
-
-  if (CPUString.empty())
-    // We default to Cyclone for now.
-    CPUString = "cyclone";
-
-  ParseSubtargetFeatures(CPUString, FS);
-}
-
-/// ClassifyGlobalReference - Find the target operand flags that describe
-/// how a global value should be referenced for the current subtarget.
-unsigned char
-ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
-                                        const TargetMachine &TM) const {
-
-  // Determine whether this is a reference to a definition or a declaration.
-  // Materializable GVs (in JIT lazy compilation mode) do not require an extra
-  // load from stub.
-  bool isDecl = GV->hasAvailableExternallyLinkage();
-  if (GV->isDeclaration() && !GV->isMaterializable())
-    isDecl = true;
-
-  // MachO large model always goes via a GOT, simply to get a single 8-byte
-  // absolute relocation on all global addresses.
-  if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
-    return ARM64II::MO_GOT;
-
-  // The small code mode's direct accesses use ADRP, which cannot necessarily
-  // produce the value 0 (if the code is above 4GB). Therefore they must use the
-  // GOT.
-  if (TM.getCodeModel() == CodeModel::Small && GV->isWeakForLinker() && isDecl)
-    return ARM64II::MO_GOT;
-
-  // If symbol visibility is hidden, the extra load is not needed if
-  // the symbol is definitely defined in the current translation unit.
-
-  // The handling of non-hidden symbols in PIC mode is rather target-dependent:
-  //   + On MachO, if the symbol is defined in this module the GOT can be
-  //     skipped.
-  //   + On ELF, the R_AARCH64_COPY relocation means that even symbols actually
-  //     defined could end up in unexpected places. Use a GOT.
-  if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) {
-    if (isTargetMachO())
-      return (isDecl || GV->isWeakForLinker()) ? ARM64II::MO_GOT
-                                               : ARM64II::MO_NO_FLAG;
-    else
-      return ARM64II::MO_GOT;
-  }
-
-  return ARM64II::MO_NO_FLAG;
-}
-
-/// This function returns the name of a function which has an interface
-/// like the non-standard bzero function, if such a function exists on
-/// the current subtarget and it is considered prefereable over
-/// memset with zero passed as the second argument. Otherwise it
-/// returns null.
-const char *ARM64Subtarget::getBZeroEntry() const {
-  // At the moment, always prefer bzero.
-  return "bzero";
-}
-
-void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
-                                         MachineInstr *begin, MachineInstr *end,
-                                         unsigned NumRegionInstrs) const {
-  // LNT run (at least on Cyclone) showed reasonably significant gains for
-  // bi-directional scheduling. 253.perlbmk.
-  Policy.OnlyTopDown = false;
-  Policy.OnlyBottomUp = false;
-}
diff --git a/lib/Target/ARM64/ARM64Subtarget.h b/lib/Target/ARM64/ARM64Subtarget.h
deleted file mode 100644
index 1cbd79e..0000000
--- a/lib/Target/ARM64/ARM64Subtarget.h
+++ /dev/null
@@ -1,87 +0,0 @@
-//=====---- ARM64Subtarget.h - Define Subtarget for the ARM64 -*- C++ -*--====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the ARM64 specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64SUBTARGET_H
-#define ARM64SUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "ARM64RegisterInfo.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "ARM64GenSubtargetInfo.inc"
-
-namespace llvm {
-class GlobalValue;
-class StringRef;
-
-class ARM64Subtarget : public ARM64GenSubtargetInfo {
-protected:
-  // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
-  bool HasZeroCycleRegMove;
-
-  // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
-  bool HasZeroCycleZeroing;
-
-  /// CPUString - String name of used CPU.
-  std::string CPUString;
-
-  /// TargetTriple - What processor and OS we're targeting.
-  Triple TargetTriple;
-
-public:
-  /// This constructor initializes the data members to match that
-  /// of the specified triple.
-  ARM64Subtarget(const std::string &TT, const std::string &CPU,
-                 const std::string &FS);
-
-  bool enableMachineScheduler() const override { return true; }
-
-  bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
-
-  bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
-
-  bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
-
-  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
-
-  bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
-
-  bool isCyclone() const { return CPUString == "cyclone"; }
-
-  /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
-  /// that still makes it profitable to inline the call.
-  unsigned getMaxInlineSizeThreshold() const { return 64; }
-
-  /// ParseSubtargetFeatures - Parses features string setting specified
-  /// subtarget options.  Definition of function is auto generated by tblgen.
-  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
-  /// ClassifyGlobalReference - Find the target operand flags that describe
-  /// how a global value should be referenced for the current subtarget.
-  unsigned char ClassifyGlobalReference(const GlobalValue *GV,
-                                        const TargetMachine &TM) const;
-
-  /// This function returns the name of a function which has an interface
-  /// like the non-standard bzero function, if such a function exists on
-  /// the current subtarget and it is considered prefereable over
-  /// memset with zero passed as the second argument. Otherwise it
-  /// returns null.
-  const char *getBZeroEntry() const;
-
-  void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin,
-                           MachineInstr *end, unsigned NumRegionInstrs) const;
-};
-} // End llvm namespace
-
-#endif // ARM64SUBTARGET_H
diff --git a/lib/Target/ARM64/ARM64TargetMachine.cpp b/lib/Target/ARM64/ARM64TargetMachine.cpp
deleted file mode 100644
index 101dc25..0000000
--- a/lib/Target/ARM64/ARM64TargetMachine.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-//===-- ARM64TargetMachine.cpp - Define TargetMachine for ARM64 -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64.h"
-#include "ARM64TargetMachine.h"
-#include "llvm/PassManager.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Transforms/Scalar.h"
-using namespace llvm;
-
-static cl::opt<bool> EnableCCMP("arm64-ccmp",
-                                cl::desc("Enable the CCMP formation pass"),
-                                cl::init(true));
-
-static cl::opt<bool> EnableStPairSuppress("arm64-stp-suppress", cl::Hidden,
-                                          cl::desc("Suppress STP for ARM64"),
-                                          cl::init(true));
-
-static cl::opt<bool>
-EnablePromoteConstant("arm64-promote-const", cl::Hidden,
-                      cl::desc("Enable the promote constant pass"),
-                      cl::init(true));
-
-static cl::opt<bool>
-EnableCollectLOH("arm64-collect-loh", cl::Hidden,
-                 cl::desc("Enable the pass that emits the linker"
-                          " optimization hints (LOH)"),
-                 cl::init(true));
-
-extern "C" void LLVMInitializeARM64Target() {
-  // Register the target.
-  RegisterTargetMachine<ARM64TargetMachine> X(TheARM64Target);
-}
-
-/// TargetMachine ctor - Create an ARM64 architecture model.
-///
-ARM64TargetMachine::ARM64TargetMachine(const Target &T, StringRef TT,
-                                       StringRef CPU, StringRef FS,
-                                       const TargetOptions &Options,
-                                       Reloc::Model RM, CodeModel::Model CM,
-                                       CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-      Subtarget(TT, CPU, FS),
-      DL(Subtarget.isTargetMachO() ? "e-m:o-i64:64-i128:128-n32:64-S128"
-                                   : "e-m:e-i64:64-i128:128-n32:64-S128"),
-      InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget),
-      TSInfo(*this) {
-  initAsmInfo();
-}
-
-namespace {
-/// ARM64 Code Generator Pass Configuration Options.
-class ARM64PassConfig : public TargetPassConfig {
-public:
-  ARM64PassConfig(ARM64TargetMachine *TM, PassManagerBase &PM)
-      : TargetPassConfig(TM, PM) {}
-
-  ARM64TargetMachine &getARM64TargetMachine() const {
-    return getTM<ARM64TargetMachine>();
-  }
-
-  virtual bool addPreISel();
-  virtual bool addInstSelector();
-  virtual bool addILPOpts();
-  virtual bool addPreRegAlloc();
-  virtual bool addPostRegAlloc();
-  virtual bool addPreSched2();
-  virtual bool addPreEmitPass();
-};
-} // namespace
-
-void ARM64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  // Add first the target-independent BasicTTI pass, then our ARM64 pass. This
-  // allows the ARM64 pass to delegate to the target independent layer when
-  // appropriate.
-  PM.add(createBasicTargetTransformInfoPass(this));
-  PM.add(createARM64TargetTransformInfoPass(this));
-}
-
-TargetPassConfig *ARM64TargetMachine::createPassConfig(PassManagerBase &PM) {
-  return new ARM64PassConfig(this, PM);
-}
-
-// Pass Pipeline Configuration
-bool ARM64PassConfig::addPreISel() {
-  // Run promote constant before global merge, so that the promoted constants
-  // get a chance to be merged
-  if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
-    addPass(createARM64PromoteConstantPass());
-  if (TM->getOptLevel() != CodeGenOpt::None)
-    addPass(createGlobalMergePass(TM));
-  if (TM->getOptLevel() != CodeGenOpt::None)
-    addPass(createARM64AddressTypePromotionPass());
-  return false;
-}
-
-bool ARM64PassConfig::addInstSelector() {
-  addPass(createARM64ISelDag(getARM64TargetMachine(), getOptLevel()));
-
-  // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
-  // references to _TLS_MODULE_BASE_ as possible.
-  if (TM->getSubtarget<ARM64Subtarget>().isTargetELF() &&
-      getOptLevel() != CodeGenOpt::None)
-    addPass(createARM64CleanupLocalDynamicTLSPass());
-
-  return false;
-}
-
-bool ARM64PassConfig::addILPOpts() {
-  if (EnableCCMP)
-    addPass(createARM64ConditionalCompares());
-  addPass(&EarlyIfConverterID);
-  if (EnableStPairSuppress)
-    addPass(createARM64StorePairSuppressPass());
-  return true;
-}
-
-bool ARM64PassConfig::addPreRegAlloc() {
-  // Use AdvSIMD scalar instructions whenever profitable.
-  addPass(createARM64AdvSIMDScalar());
-  return true;
-}
-
-bool ARM64PassConfig::addPostRegAlloc() {
-  // Change dead register definitions to refer to the zero register.
-  addPass(createARM64DeadRegisterDefinitions());
-  return true;
-}
-
-bool ARM64PassConfig::addPreSched2() {
-  // Expand some pseudo instructions to allow proper scheduling.
-  addPass(createARM64ExpandPseudoPass());
-  // Use load/store pair instructions when possible.
-  addPass(createARM64LoadStoreOptimizationPass());
-  return true;
-}
-
-bool ARM64PassConfig::addPreEmitPass() {
-  // Relax conditional branch instructions if they're otherwise out of
-  // range of their destination.
-  addPass(createARM64BranchRelaxation());
-  if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH)
-    addPass(createARM64CollectLOHPass());
-  return true;
-}
diff --git a/lib/Target/ARM64/ARM64TargetMachine.h b/lib/Target/ARM64/ARM64TargetMachine.h
deleted file mode 100644
index 8274550..0000000
--- a/lib/Target/ARM64/ARM64TargetMachine.h
+++ /dev/null
@@ -1,69 +0,0 @@
-//===-- ARM64TargetMachine.h - Define TargetMachine for ARM64 ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the ARM64 specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64TARGETMACHINE_H
-#define ARM64TARGETMACHINE_H
-
-#include "ARM64InstrInfo.h"
-#include "ARM64ISelLowering.h"
-#include "ARM64Subtarget.h"
-#include "ARM64FrameLowering.h"
-#include "ARM64SelectionDAGInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCStreamer.h"
-
-namespace llvm {
-
-class ARM64TargetMachine : public LLVMTargetMachine {
-protected:
-  ARM64Subtarget Subtarget;
-
-private:
-  const DataLayout DL;
-  ARM64InstrInfo InstrInfo;
-  ARM64TargetLowering TLInfo;
-  ARM64FrameLowering FrameLowering;
-  ARM64SelectionDAGInfo TSInfo;
-
-public:
-  ARM64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
-                     const TargetOptions &Options, Reloc::Model RM,
-                     CodeModel::Model CM, CodeGenOpt::Level OL);
-
-  const ARM64Subtarget *getSubtargetImpl() const override { return &Subtarget; }
-  const ARM64TargetLowering *getTargetLowering() const override {
-    return &TLInfo;
-  }
-  const DataLayout *getDataLayout() const override { return &DL; }
-  const ARM64FrameLowering *getFrameLowering() const override {
-    return &FrameLowering;
-  }
-  const ARM64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const ARM64RegisterInfo *getRegisterInfo() const override {
-    return &InstrInfo.getRegisterInfo();
-  }
-  const ARM64SelectionDAGInfo *getSelectionDAGInfo() const override {
-    return &TSInfo;
-  }
-
-  // Pass Pipeline Configuration
-  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-
-  /// \brief Register ARM64 analysis passes with a pass manager.
-  void addAnalysisPasses(PassManagerBase &PM) override;
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.cpp b/lib/Target/ARM64/ARM64TargetObjectFile.cpp
deleted file mode 100644
index cde01e5..0000000
--- a/lib/Target/ARM64/ARM64TargetObjectFile.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- ARM64TargetObjectFile.cpp - ARM64 Object Info ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64TargetObjectFile.h"
-#include "ARM64TargetMachine.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/Dwarf.h"
-using namespace llvm;
-using namespace dwarf;
-
-void ARM64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
-                                           const TargetMachine &TM) {
-  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
-  InitializeELF(TM.Options.UseInitArray);
-}
-
-const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference(
-    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
-    const TargetMachine &TM, MachineModuleInfo *MMI,
-    MCStreamer &Streamer) const {
-  // On Darwin, we can reference dwarf symbols with foo@GOT-., which
-  // is an indirect pc-relative reference. The default implementation
-  // won't reference using the GOT, so we need this target-specific
-  // version.
-  if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
-    const MCSymbol *Sym = TM.getSymbol(GV, Mang);
-    const MCExpr *Res =
-        MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
-    MCSymbol *PCSym = getContext().CreateTempSymbol();
-    Streamer.EmitLabel(PCSym);
-    const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
-    return MCBinaryExpr::CreateSub(Res, PC, getContext());
-  }
-
-  return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
-      GV, Encoding, Mang, TM, MMI, Streamer);
-}
-
-MCSymbol *ARM64_MachoTargetObjectFile::getCFIPersonalitySymbol(
-    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
-    MachineModuleInfo *MMI) const {
-  return TM.getSymbol(GV, Mang);
-}
diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.h b/lib/Target/ARM64/ARM64TargetObjectFile.h
deleted file mode 100644
index 62446f9..0000000
--- a/lib/Target/ARM64/ARM64TargetObjectFile.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- ARM64TargetObjectFile.h - ARM64 Object Info -*- C++ -------------*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
-#define LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
-
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-
-namespace llvm {
-class ARM64TargetMachine;
-
-/// This implementation is used for AArch64 ELF targets (Linux in particular).
-class ARM64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
-  void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
-};
-
-/// ARM64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
-class ARM64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
-public:
-  const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
-                                        unsigned Encoding, Mangler &Mang,
-                                        const TargetMachine &TM,
-                                        MachineModuleInfo *MMI,
-                                        MCStreamer &Streamer) const override;
-
-  MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
-                                    const TargetMachine &TM,
-                                    MachineModuleInfo *MMI) const override;
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp
deleted file mode 100644
index 9b598d7..0000000
--- a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp
+++ /dev/null
@@ -1,326 +0,0 @@
-//===-- ARM64TargetTransformInfo.cpp - ARM64 specific TTI pass ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements a TargetTransformInfo analysis pass specific to the
-/// ARM64 target machine. It uses the target's detailed information to provide
-/// more precise answers to certain TTI queries, while letting the target
-/// independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64tti"
-#include "ARM64.h"
-#include "ARM64TargetMachine.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/CostTable.h"
-#include "llvm/Target/TargetLowering.h"
-using namespace llvm;
-
-// Declare the pass initialization routine locally as target-specific passes
-// don't havve a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeARM64TTIPass(PassRegistry &);
-}
-
-namespace {
-
-class ARM64TTI final : public ImmutablePass, public TargetTransformInfo {
-  const ARM64TargetMachine *TM;
-  const ARM64Subtarget *ST;
-  const ARM64TargetLowering *TLI;
-
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-public:
-  ARM64TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
-    llvm_unreachable("This pass cannot be directly constructed");
-  }
-
-  ARM64TTI(const ARM64TargetMachine *TM)
-      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
-        TLI(TM->getTargetLowering()) {
-    initializeARM64TTIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void initializePass() override { pushTTIStack(this); }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    TargetTransformInfo::getAnalysisUsage(AU);
-  }
-
-  /// Pass identification.
-  static char ID;
-
-  /// Provide necessary pointer adjustments for the two base classes.
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo *)this;
-    return this;
-  }
-
-  /// \name Scalar TTI Implementations
-  /// @{
-
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
-  PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
-
-  /// @}
-
-  /// \name Vector TTI Implementations
-  /// @{
-
-  unsigned getNumberOfRegisters(bool Vector) const override {
-    if (Vector)
-      return 32;
-
-    return 31;
-  }
-
-  unsigned getRegisterBitWidth(bool Vector) const override {
-    if (Vector)
-      return 128;
-
-    return 64;
-  }
-
-  unsigned getMaximumUnrollFactor() const override { return 2; }
-
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
-      override;
-
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
-      override;
-
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                  OperandValueKind Opd1Info = OK_AnyValue,
-                                  OperandValueKind Opd2Info = OK_AnyValue) const
-      override;
-
-  unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
-
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const
-      override;
-
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) const override;
-  /// @}
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(ARM64TTI, TargetTransformInfo, "arm64tti",
-                   "ARM64 Target Transform Info", true, true, false)
-char ARM64TTI::ID = 0;
-
-ImmutablePass *
-llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) {
-  return new ARM64TTI(TM);
-}
-
-unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
-  assert(Ty->isIntegerTy());
-
-  unsigned BitSize = Ty->getPrimitiveSizeInBits();
-  if (BitSize == 0)
-    return ~0U;
-
-  int64_t Val = Imm.getSExtValue();
-  if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize))
-    return 1;
-
-  if ((int64_t)Val < 0)
-    Val = ~Val;
-  if (BitSize == 32)
-    Val &= (1LL << 32) - 1;
-
-  unsigned LZ = countLeadingZeros((uint64_t)Val);
-  unsigned Shift = (63 - LZ) / 16;
-  // MOVZ is free so return true for one or fewer MOVK.
-  return (Shift == 0) ? 1 : Shift;
-}
-
-ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const {
-  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
-  if (TyWidth == 32 || TyWidth == 64)
-    return PSK_FastHardware;
-  // TODO: ARM64TargetLowering::LowerCTPOP() supports 128bit popcount.
-  return PSK_Software;
-}
-
-unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
-                                    Type *Src) const {
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  EVT SrcTy = TLI->getValueType(Src);
-  EVT DstTy = TLI->getValueType(Dst);
-
-  if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
-
-  static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
-    // LowerVectorINT_TO_FP:
-    { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
-    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
-    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
-    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
-    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
-    { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
-    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
-    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
-    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
-    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
-    // LowerVectorFP_TO_INT
-    { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
-    { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
-    { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
-    { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
-    { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
-    { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
-    { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
-    { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
-  };
-
-  int Idx = ConvertCostTableLookup<MVT>(
-      ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
-      SrcTy.getSimpleVT());
-  if (Idx != -1)
-    return ConversionTbl[Idx].Cost;
-
-  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
-}
-
-unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                      unsigned Index) const {
-  assert(Val->isVectorTy() && "This must be a vector type");
-
-  if (Index != -1U) {
-    // Legalize the type.
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
-
-    // This type is legalized to a scalar type.
-    if (!LT.second.isVector())
-      return 0;
-
-    // The type may be split. Normalize the index to the new type.
-    unsigned Width = LT.second.getVectorNumElements();
-    Index = Index % Width;
-
-    // The element at index zero is already inside the vector.
-    if (Index == 0)
-      return 0;
-  }
-
-  // All other insert/extracts cost this much.
-  return 2;
-}
-
-unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                          OperandValueKind Opd1Info,
-                                          OperandValueKind Opd2Info) const {
-  // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
-
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-
-  switch (ISD) {
-  default:
-    return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info,
-                                                       Opd2Info);
-  case ISD::ADD:
-  case ISD::MUL:
-  case ISD::XOR:
-  case ISD::OR:
-  case ISD::AND:
-    // These nodes are marked as 'custom' for combining purposes only.
-    // We know that they are legal. See LowerAdd in ISelLowering.
-    return 1 * LT.first;
-  }
-}
-
-unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
-  // Address computations in vectorized code with non-consecutive addresses will
-  // likely result in more instructions compared to scalar code where the
-  // computation can more often be merged into the index mode. The resulting
-  // extra micro-ops can significantly decrease throughput.
-  unsigned NumVectorInstToHideOverhead = 10;
-
-  if (Ty->isVectorTy() && IsComplex)
-    return NumVectorInstToHideOverhead;
-
-  // In many cases the address computation is not merged into the instruction
-  // addressing mode.
-  return 1;
-}
-
-unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                      Type *CondTy) const {
-
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  // We don't lower vector selects well that are wider than the register width.
-  if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
-    // We would need this many instructions to hide the scalarization happening.
-    unsigned AmortizationCost = 20;
-    static const TypeConversionCostTblEntry<MVT::SimpleValueType>
-    VectorSelectTbl[] = {
-      { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
-      { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
-      { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
-      { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
-      { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
-      { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
-    };
-
-    EVT SelCondTy = TLI->getValueType(CondTy);
-    EVT SelValTy = TLI->getValueType(ValTy);
-    if (SelCondTy.isSimple() && SelValTy.isSimple()) {
-      int Idx =
-          ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
-                                 SelValTy.getSimpleVT());
-      if (Idx != -1)
-        return VectorSelectTbl[Idx].Cost;
-    }
-  }
-  return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
-}
-
-unsigned ARM64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                   unsigned Alignment,
-                                   unsigned AddressSpace) const {
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
-
-  if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
-      Src->getVectorElementType()->isIntegerTy(64)) {
-    // Unaligned stores are extremely inefficient. We don't split
-    // unaligned v2i64 stores because the negative impact that has shown in
-    // practice on inlined memcpy code.
-    // We make v2i64 stores expensive so that we will only vectorize if there
-    // are 6 other instructions getting vectorized.
-    unsigned AmortizationCost = 6;
-
-    return LT.first * 2 * AmortizationCost;
-  }
-
-  if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
-      Src->getVectorNumElements() < 8) {
-    // We scalarize the loads/stores because there is not v.4b register and we
-    // have to promote the elements to v.4h.
-    unsigned NumVecElts = Src->getVectorNumElements();
-    unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
-    // We generate 2 instructions per vector element.
-    return NumVectorizableInstsToAmortize * NumVecElts * 2;
-  }
-
-  return LT.first;
-}
diff --git a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
deleted file mode 100644
index 38a61d8..0000000
--- a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
+++ /dev/null
@@ -1,4832 +0,0 @@
-//===-- ARM64AsmParser.cpp - Parse ARM64 assembly to MCInst instructions --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
-#include <cstdio>
-using namespace llvm;
-
-namespace {
-
-class ARM64Operand;
-
-class ARM64AsmParser : public MCTargetAsmParser {
-public:
-  typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector;
-
-private:
-  StringRef Mnemonic; ///< Instruction mnemonic.
-  MCSubtargetInfo &STI;
-  MCAsmParser &Parser;
-
-  MCAsmParser &getParser() const { return Parser; }
-  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
-  SMLoc getLoc() const { return Parser.getTok().getLoc(); }
-
-  bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
-  unsigned parseCondCodeString(StringRef Cond);
-  bool parseCondCode(OperandVector &Operands, bool invertCondCode);
-  int tryParseRegister();
-  int tryMatchVectorRegister(StringRef &Kind);
-  bool parseOptionalShift(OperandVector &Operands);
-  bool parseOptionalExtend(OperandVector &Operands);
-  bool parseRegister(OperandVector &Operands);
-  bool parseMemory(OperandVector &Operands);
-  bool parseSymbolicImmVal(const MCExpr *&ImmVal);
-  bool parseVectorList(OperandVector &Operands);
-  bool parseOperand(OperandVector &Operands, bool isCondCode,
-                    bool invertCondCode);
-
-  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
-  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
-  bool showMatchError(SMLoc Loc, unsigned ErrCode);
-
-  bool parseDirectiveWord(unsigned Size, SMLoc L);
-  bool parseDirectiveTLSDescCall(SMLoc L);
-
-  bool parseDirectiveLOH(StringRef LOH, SMLoc L);
-
-  bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
-  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
-                               OperandVector &Operands, MCStreamer &Out,
-                               unsigned &ErrorInfo, bool MatchingInlineAsm);
-/// @name Auto-generated Match Functions
-/// {
-
-#define GET_ASSEMBLER_HEADER
-#include "ARM64GenAsmMatcher.inc"
-
-  /// }
-
-  OperandMatchResultTy tryParseNoIndexMemory(OperandVector &Operands);
-  OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
-  OperandMatchResultTy tryParseSystemRegister(OperandVector &Operands);
-  OperandMatchResultTy tryParseCPSRField(OperandVector &Operands);
-  OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
-  OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
-  OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
-  OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
-  OperandMatchResultTy tryParseFPImm(OperandVector &Operands);
-  bool tryParseVectorRegister(OperandVector &Operands);
-
-public:
-  enum ARM64MatchResultTy {
-    Match_InvalidSuffix = FIRST_TARGET_MATCH_RESULT_TY,
-#define GET_OPERAND_DIAGNOSTIC_TYPES
-#include "ARM64GenAsmMatcher.inc"
-  };
-  ARM64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
-                 const MCInstrInfo &MII)
-      : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
-    MCAsmParserExtension::Initialize(_Parser);
-  }
-
-  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
-                                SMLoc NameLoc, OperandVector &Operands);
-  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
-  virtual bool ParseDirective(AsmToken DirectiveID);
-  unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
-
-  static bool classifySymbolRef(const MCExpr *Expr,
-                                ARM64MCExpr::VariantKind &ELFRefKind,
-                                MCSymbolRefExpr::VariantKind &DarwinRefKind,
-                                const MCConstantExpr *&Addend);
-};
-} // end anonymous namespace
-
-namespace {
-
-/// ARM64Operand - Instances of this class represent a parsed ARM64 machine
-/// instruction.
-class ARM64Operand : public MCParsedAsmOperand {
-public:
-  enum MemIdxKindTy {
-    ImmediateOffset, // pre-indexed, no writeback
-    RegisterOffset   // register offset, with optional extend
-  };
-
-private:
-  enum KindTy {
-    k_Immediate,
-    k_Memory,
-    k_Register,
-    k_VectorList,
-    k_VectorIndex,
-    k_Token,
-    k_SysCR,
-    k_Prefetch,
-    k_Shifter,
-    k_Extend,
-    k_FPImm,
-    k_Barrier,
-    k_SystemRegister,
-    k_CPSRField
-  } Kind;
-
-  SMLoc StartLoc, EndLoc, OffsetLoc;
-
-  struct TokOp {
-    const char *Data;
-    unsigned Length;
-    bool IsSuffix; // Is the operand actually a suffix on the mnemonic.
-  };
-
-  struct RegOp {
-    unsigned RegNum;
-    bool isVector;
-  };
-
-  struct VectorListOp {
-    unsigned RegNum;
-    unsigned Count;
-    unsigned NumElements;
-    unsigned ElementKind;
-  };
-
-  struct VectorIndexOp {
-    unsigned Val;
-  };
-
-  struct ImmOp {
-    const MCExpr *Val;
-  };
-
-  struct FPImmOp {
-    unsigned Val; // Encoded 8-bit representation.
-  };
-
-  struct BarrierOp {
-    unsigned Val; // Not the enum since not all values have names.
-  };
-
-  struct SystemRegisterOp {
-    // 16-bit immediate, usually from the ARM64SYS::SystermRegister enum,
-    // but not limited to those values.
-    uint16_t Val;
-  };
-
-  struct CPSRFieldOp {
-    ARM64SYS::CPSRField Field;
-  };
-
-  struct SysCRImmOp {
-    unsigned Val;
-  };
-
-  struct PrefetchOp {
-    unsigned Val;
-  };
-
-  struct ShifterOp {
-    unsigned Val;
-  };
-
-  struct ExtendOp {
-    unsigned Val;
-  };
-
-  // This is for all forms of ARM64 address expressions
-  struct MemOp {
-    unsigned BaseRegNum, OffsetRegNum;
-    ARM64_AM::ExtendType ExtType;
-    unsigned ShiftVal;
-    bool ExplicitShift;
-    const MCExpr *OffsetImm;
-    MemIdxKindTy Mode;
-  };
-
-  union {
-    struct TokOp Tok;
-    struct RegOp Reg;
-    struct VectorListOp VectorList;
-    struct VectorIndexOp VectorIndex;
-    struct ImmOp Imm;
-    struct FPImmOp FPImm;
-    struct BarrierOp Barrier;
-    struct SystemRegisterOp SystemRegister;
-    struct CPSRFieldOp CPSRField;
-    struct SysCRImmOp SysCRImm;
-    struct PrefetchOp Prefetch;
-    struct ShifterOp Shifter;
-    struct ExtendOp Extend;
-    struct MemOp Mem;
-  };
-
-  // Keep the MCContext around as the MCExprs may need manipulated during
-  // the add<>Operands() calls.
-  MCContext &Ctx;
-
-  ARM64Operand(KindTy K, MCContext &_Ctx)
-      : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
-
-public:
-  ARM64Operand(const ARM64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
-    Kind = o.Kind;
-    StartLoc = o.StartLoc;
-    EndLoc = o.EndLoc;
-    switch (Kind) {
-    case k_Token:
-      Tok = o.Tok;
-      break;
-    case k_Immediate:
-      Imm = o.Imm;
-      break;
-    case k_FPImm:
-      FPImm = o.FPImm;
-      break;
-    case k_Barrier:
-      Barrier = o.Barrier;
-      break;
-    case k_SystemRegister:
-      SystemRegister = o.SystemRegister;
-      break;
-    case k_CPSRField:
-      CPSRField = o.CPSRField;
-      break;
-    case k_Register:
-      Reg = o.Reg;
-      break;
-    case k_VectorList:
-      VectorList = o.VectorList;
-      break;
-    case k_VectorIndex:
-      VectorIndex = o.VectorIndex;
-      break;
-    case k_SysCR:
-      SysCRImm = o.SysCRImm;
-      break;
-    case k_Prefetch:
-      Prefetch = o.Prefetch;
-      break;
-    case k_Memory:
-      Mem = o.Mem;
-      break;
-    case k_Shifter:
-      Shifter = o.Shifter;
-      break;
-    case k_Extend:
-      Extend = o.Extend;
-      break;
-    }
-  }
-
-  /// getStartLoc - Get the location of the first token of this operand.
-  SMLoc getStartLoc() const { return StartLoc; }
-  /// getEndLoc - Get the location of the last token of this operand.
-  SMLoc getEndLoc() const { return EndLoc; }
-  /// getOffsetLoc - Get the location of the offset of this memory operand.
-  SMLoc getOffsetLoc() const { return OffsetLoc; }
-
-  StringRef getToken() const {
-    assert(Kind == k_Token && "Invalid access!");
-    return StringRef(Tok.Data, Tok.Length);
-  }
-
-  bool isTokenSuffix() const {
-    assert(Kind == k_Token && "Invalid access!");
-    return Tok.IsSuffix;
-  }
-
-  const MCExpr *getImm() const {
-    assert(Kind == k_Immediate && "Invalid access!");
-    return Imm.Val;
-  }
-
-  unsigned getFPImm() const {
-    assert(Kind == k_FPImm && "Invalid access!");
-    return FPImm.Val;
-  }
-
-  unsigned getBarrier() const {
-    assert(Kind == k_Barrier && "Invalid access!");
-    return Barrier.Val;
-  }
-
-  uint16_t getSystemRegister() const {
-    assert(Kind == k_SystemRegister && "Invalid access!");
-    return SystemRegister.Val;
-  }
-
-  ARM64SYS::CPSRField getCPSRField() const {
-    assert(Kind == k_CPSRField && "Invalid access!");
-    return CPSRField.Field;
-  }
-
-  unsigned getReg() const {
-    assert(Kind == k_Register && "Invalid access!");
-    return Reg.RegNum;
-  }
-
-  unsigned getVectorListStart() const {
-    assert(Kind == k_VectorList && "Invalid access!");
-    return VectorList.RegNum;
-  }
-
-  unsigned getVectorListCount() const {
-    assert(Kind == k_VectorList && "Invalid access!");
-    return VectorList.Count;
-  }
-
-  unsigned getVectorIndex() const {
-    assert(Kind == k_VectorIndex && "Invalid access!");
-    return VectorIndex.Val;
-  }
-
-  unsigned getSysCR() const {
-    assert(Kind == k_SysCR && "Invalid access!");
-    return SysCRImm.Val;
-  }
-
-  unsigned getPrefetch() const {
-    assert(Kind == k_Prefetch && "Invalid access!");
-    return Prefetch.Val;
-  }
-
-  unsigned getShifter() const {
-    assert(Kind == k_Shifter && "Invalid access!");
-    return Shifter.Val;
-  }
-
-  unsigned getExtend() const {
-    assert(Kind == k_Extend && "Invalid access!");
-    return Extend.Val;
-  }
-
-  bool isImm() const { return Kind == k_Immediate; }
-  bool isSImm9() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= -256 && Val < 256);
-  }
-  bool isSImm7s4() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= -256 && Val <= 252 && (Val & 3) == 0);
-  }
-  bool isSImm7s8() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= -512 && Val <= 504 && (Val & 7) == 0);
-  }
-  bool isSImm7s16() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= -1024 && Val <= 1008 && (Val & 15) == 0);
-  }
-  bool isImm0_7() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 0 && Val < 8);
-  }
-  bool isImm1_8() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val > 0 && Val < 9);
-  }
-  bool isImm0_15() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 0 && Val < 16);
-  }
-  bool isImm1_16() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val > 0 && Val < 17);
-  }
-  bool isImm0_31() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 0 && Val < 32);
-  }
-  bool isImm1_31() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 1 && Val < 32);
-  }
-  bool isImm1_32() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 1 && Val < 33);
-  }
-  bool isImm0_63() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 0 && Val < 64);
-  }
-  bool isImm1_63() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 1 && Val < 64);
-  }
-  bool isImm1_64() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 1 && Val < 65);
-  }
-  bool isImm0_127() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 0 && Val < 128);
-  }
-  bool isImm0_255() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 0 && Val < 256);
-  }
-  bool isImm0_65535() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    int64_t Val = MCE->getValue();
-    return (Val >= 0 && Val < 65536);
-  }
-  bool isLogicalImm32() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    return ARM64_AM::isLogicalImmediate(MCE->getValue(), 32);
-  }
-  bool isLogicalImm64() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    return ARM64_AM::isLogicalImmediate(MCE->getValue(), 64);
-  }
-  bool isSIMDImmType10() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return false;
-    return ARM64_AM::isAdvSIMDModImmType10(MCE->getValue());
-  }
-  bool isBranchTarget26() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return true;
-    int64_t Val = MCE->getValue();
-    if (Val & 0x3)
-      return false;
-    return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
-  }
-  bool isBranchTarget19() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return true;
-    int64_t Val = MCE->getValue();
-    if (Val & 0x3)
-      return false;
-    return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
-  }
-  bool isBranchTarget14() const {
-    if (!isImm())
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE)
-      return true;
-    int64_t Val = MCE->getValue();
-    if (Val & 0x3)
-      return false;
-    return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2));
-  }
-
-  bool isMovWSymbol(ArrayRef<ARM64MCExpr::VariantKind> AllowedModifiers) const {
-    if (!isImm())
-      return false;
-
-    ARM64MCExpr::VariantKind ELFRefKind;
-    MCSymbolRefExpr::VariantKind DarwinRefKind;
-    const MCConstantExpr *Addend;
-    if (!ARM64AsmParser::classifySymbolRef(getImm(), ELFRefKind, DarwinRefKind,
-                                           Addend)) {
-      return false;
-    }
-    if (DarwinRefKind != MCSymbolRefExpr::VK_None)
-      return false;
-
-    for (unsigned i = 0; i != AllowedModifiers.size(); ++i) {
-      if (ELFRefKind == AllowedModifiers[i])
-        return Addend == 0;
-    }
-
-    return false;
-  }
-
-  bool isMovZSymbolG3() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G3 };
-    return isMovWSymbol(Variants);
-  }
-
-  bool isMovZSymbolG2() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2,
-                                                   ARM64MCExpr::VK_TPREL_G2,
-                                                   ARM64MCExpr::VK_DTPREL_G2 };
-    return isMovWSymbol(Variants);
-  }
-
-  bool isMovZSymbolG1() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G1,
-                                                   ARM64MCExpr::VK_GOTTPREL_G1,
-                                                   ARM64MCExpr::VK_TPREL_G1,
-                                                   ARM64MCExpr::VK_DTPREL_G1, };
-    return isMovWSymbol(Variants);
-  }
-
-  bool isMovZSymbolG0() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G0,
-                                                   ARM64MCExpr::VK_TPREL_G0,
-                                                   ARM64MCExpr::VK_DTPREL_G0 };
-    return isMovWSymbol(Variants);
-  }
-
-  bool isMovKSymbolG2() const {
-    static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2_NC };
-    return isMovWSymbol(Variants);
-  }
-
-  bool isMovKSymbolG1() const {
-    static ARM64MCExpr::VariantKind Variants[] = {
-      ARM64MCExpr::VK_ABS_G1_NC, ARM64MCExpr::VK_TPREL_G1_NC,
-      ARM64MCExpr::VK_DTPREL_G1_NC
-    };
-    return isMovWSymbol(Variants);
-  }
-
-  bool isMovKSymbolG0() const {
-    static ARM64MCExpr::VariantKind Variants[] = {
-      ARM64MCExpr::VK_ABS_G0_NC,   ARM64MCExpr::VK_GOTTPREL_G0_NC,
-      ARM64MCExpr::VK_TPREL_G0_NC, ARM64MCExpr::VK_DTPREL_G0_NC
-    };
-    return isMovWSymbol(Variants);
-  }
-
-  bool isFPImm() const { return Kind == k_FPImm; }
-  bool isBarrier() const { return Kind == k_Barrier; }
-  bool isSystemRegister() const {
-    if (Kind == k_SystemRegister)
-      return true;
-    // SPSel is legal for both the system register and the CPSR-field
-    // variants of MSR, so special case that. Fugly.
-    return (Kind == k_CPSRField && getCPSRField() == ARM64SYS::cpsr_SPSel);
-  }
-  bool isSystemCPSRField() const { return Kind == k_CPSRField; }
-  bool isReg() const { return Kind == k_Register && !Reg.isVector; }
-  bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
-
-  /// Is this a vector list with the type implicit (presumably attached to the
-  /// instruction itself)?
-  template <unsigned NumRegs> bool isImplicitlyTypedVectorList() const {
-    return Kind == k_VectorList && VectorList.Count == NumRegs &&
-           !VectorList.ElementKind;
-  }
-
-  template <unsigned NumRegs, unsigned NumElements, char ElementKind>
-  bool isTypedVectorList() const {
-    if (Kind != k_VectorList)
-      return false;
-    if (VectorList.Count != NumRegs)
-      return false;
-    if (VectorList.ElementKind != ElementKind)
-      return false;
-    return VectorList.NumElements == NumElements;
-  }
-
-  bool isVectorIndexB() const {
-    return Kind == k_VectorIndex && VectorIndex.Val < 16;
-  }
-  bool isVectorIndexH() const {
-    return Kind == k_VectorIndex && VectorIndex.Val < 8;
-  }
-  bool isVectorIndexS() const {
-    return Kind == k_VectorIndex && VectorIndex.Val < 4;
-  }
-  bool isVectorIndexD() const {
-    return Kind == k_VectorIndex && VectorIndex.Val < 2;
-  }
-  bool isToken() const { return Kind == k_Token; }
-  bool isTokenEqual(StringRef Str) const {
-    return Kind == k_Token && getToken() == Str;
-  }
-  bool isMem() const { return Kind == k_Memory; }
-  bool isSysCR() const { return Kind == k_SysCR; }
-  bool isPrefetch() const { return Kind == k_Prefetch; }
-  bool isShifter() const { return Kind == k_Shifter; }
-  bool isExtend() const {
-    // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
-    if (isShifter()) {
-      ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
-      return ST == ARM64_AM::LSL;
-    }
-    return Kind == k_Extend;
-  }
-  bool isExtend64() const {
-    if (Kind != k_Extend)
-      return false;
-    // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class).
-    ARM64_AM::ExtendType ET = ARM64_AM::getArithExtendType(Extend.Val);
-    return ET != ARM64_AM::UXTX && ET != ARM64_AM::SXTX;
-  }
-  bool isExtendLSL64() const {
-    // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
-    if (isShifter()) {
-      ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
-      return ST == ARM64_AM::LSL;
-    }
-    if (Kind != k_Extend)
-      return false;
-    ARM64_AM::ExtendType ET = ARM64_AM::getArithExtendType(Extend.Val);
-    return ET == ARM64_AM::UXTX || ET == ARM64_AM::SXTX;
-  }
-
-  bool isArithmeticShifter() const {
-    if (!isShifter())
-      return false;
-
-    // An arithmetic shifter is LSL, LSR, or ASR.
-    ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
-    return ST == ARM64_AM::LSL || ST == ARM64_AM::LSR || ST == ARM64_AM::ASR;
-  }
-
-  bool isMovImm32Shifter() const {
-    if (!isShifter())
-      return false;
-
-    // A MOVi shifter is LSL of 0, 16, 32, or 48.
-    ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
-    if (ST != ARM64_AM::LSL)
-      return false;
-    uint64_t Val = ARM64_AM::getShiftValue(Shifter.Val);
-    return (Val == 0 || Val == 16);
-  }
-
-  bool isMovImm64Shifter() const {
-    if (!isShifter())
-      return false;
-
-    // A MOVi shifter is LSL of 0 or 16.
-    ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
-    if (ST != ARM64_AM::LSL)
-      return false;
-    uint64_t Val = ARM64_AM::getShiftValue(Shifter.Val);
-    return (Val == 0 || Val == 16 || Val == 32 || Val == 48);
-  }
-
-  bool isAddSubShifter() const {
-    if (!isShifter())
-      return false;
-
-    // An ADD/SUB shifter is either 'lsl #0' or 'lsl #12'.
-    unsigned Val = Shifter.Val;
-    return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
-           (ARM64_AM::getShiftValue(Val) == 0 ||
-            ARM64_AM::getShiftValue(Val) == 12);
-  }
-
-  bool isLogicalVecShifter() const {
-    if (!isShifter())
-      return false;
-
-    // A logical vector shifter is a left shift by 0, 8, 16, or 24.
-    unsigned Val = Shifter.Val;
-    unsigned Shift = ARM64_AM::getShiftValue(Val);
-    return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
-           (Shift == 0 || Shift == 8 || Shift == 16 || Shift == 24);
-  }
-
-  bool isLogicalVecHalfWordShifter() const {
-    if (!isLogicalVecShifter())
-      return false;
-
-    // A logical vector shifter is a left shift by 0 or 8.
-    unsigned Val = Shifter.Val;
-    unsigned Shift = ARM64_AM::getShiftValue(Val);
-    return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
-           (Shift == 0 || Shift == 8);
-  }
-
-  bool isMoveVecShifter() const {
-    if (!isShifter())
-      return false;
-
-    // A logical vector shifter is a left shift by 8 or 16.
-    unsigned Val = Shifter.Val;
-    unsigned Shift = ARM64_AM::getShiftValue(Val);
-    return ARM64_AM::getShiftType(Val) == ARM64_AM::MSL &&
-           (Shift == 8 || Shift == 16);
-  }
-
-  bool isMemoryRegisterOffset8() const {
-    return isMem() && Mem.Mode == RegisterOffset && Mem.ShiftVal == 0;
-  }
-
-  bool isMemoryRegisterOffset16() const {
-    return isMem() && Mem.Mode == RegisterOffset &&
-           (Mem.ShiftVal == 0 || Mem.ShiftVal == 1);
-  }
-
-  bool isMemoryRegisterOffset32() const {
-    return isMem() && Mem.Mode == RegisterOffset &&
-           (Mem.ShiftVal == 0 || Mem.ShiftVal == 2);
-  }
-
-  bool isMemoryRegisterOffset64() const {
-    return isMem() && Mem.Mode == RegisterOffset &&
-           (Mem.ShiftVal == 0 || Mem.ShiftVal == 3);
-  }
-
-  bool isMemoryRegisterOffset128() const {
-    return isMem() && Mem.Mode == RegisterOffset &&
-           (Mem.ShiftVal == 0 || Mem.ShiftVal == 4);
-  }
-
-  bool isMemoryUnscaled() const {
-    if (!isMem())
-      return false;
-    if (Mem.Mode != ImmediateOffset)
-      return false;
-    if (!Mem.OffsetImm)
-      return true;
-    // Make sure the immediate value is valid.
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
-    if (!CE)
-      return false;
-    // The offset must fit in a signed 9-bit unscaled immediate.
-    int64_t Value = CE->getValue();
-    return (Value >= -256 && Value < 256);
-  }
-  // Fallback unscaled operands are for aliases of LDR/STR that fall back
-  // to LDUR/STUR when the offset is not legal for the former but is for
-  // the latter. As such, in addition to checking for being a legal unscaled
-  // address, also check that it is not a legal scaled address. This avoids
-  // ambiguity in the matcher.
-  bool isMemoryUnscaledFB8() const {
-    return isMemoryUnscaled() && !isMemoryIndexed8();
-  }
-  bool isMemoryUnscaledFB16() const {
-    return isMemoryUnscaled() && !isMemoryIndexed16();
-  }
-  bool isMemoryUnscaledFB32() const {
-    return isMemoryUnscaled() && !isMemoryIndexed32();
-  }
-  bool isMemoryUnscaledFB64() const {
-    return isMemoryUnscaled() && !isMemoryIndexed64();
-  }
-  bool isMemoryUnscaledFB128() const {
-    return isMemoryUnscaled() && !isMemoryIndexed128();
-  }
-  bool isMemoryIndexed(unsigned Scale) const {
-    if (!isMem())
-      return false;
-    if (Mem.Mode != ImmediateOffset)
-      return false;
-    if (!Mem.OffsetImm)
-      return true;
-    // Make sure the immediate value is valid.
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
-
-    if (CE) {
-      // The offset must be a positive multiple of the scale and in range of
-      // encoding with a 12-bit immediate.
-      int64_t Value = CE->getValue();
-      return (Value >= 0 && (Value % Scale) == 0 && Value <= (4095 * Scale));
-    }
-
-    // If it's not a constant, check for some expressions we know.
-    const MCExpr *Expr = Mem.OffsetImm;
-    ARM64MCExpr::VariantKind ELFRefKind;
-    MCSymbolRefExpr::VariantKind DarwinRefKind;
-    const MCConstantExpr *Addend;
-    if (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind,
-                                           Addend)) {
-      // If we don't understand the expression, assume the best and
-      // let the fixup and relocation code deal with it.
-      return true;
-    }
-
-    if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
-        ELFRefKind == ARM64MCExpr::VK_LO12 ||
-        ELFRefKind == ARM64MCExpr::VK_GOT_LO12 ||
-        ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 ||
-        ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC ||
-        ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 ||
-        ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC ||
-        ELFRefKind == ARM64MCExpr::VK_GOTTPREL_LO12_NC ||
-        ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) {
-      // Note that we don't range-check the addend. It's adjusted modulo page
-      // size when converted, so there is no "out of range" condition when using
-      // @pageoff.
-      int64_t Value = Addend ? Addend->getValue() : 0;
-      return Value >= 0 && (Value % Scale) == 0;
-    } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF ||
-               DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) {
-      // @gotpageoff/@tlvppageoff can only be used directly, not with an addend.
-      return Addend == 0;
-    }
-
-    return false;
-  }
-  bool isMemoryIndexed128() const { return isMemoryIndexed(16); }
-  bool isMemoryIndexed64() const { return isMemoryIndexed(8); }
-  bool isMemoryIndexed32() const { return isMemoryIndexed(4); }
-  bool isMemoryIndexed16() const { return isMemoryIndexed(2); }
-  bool isMemoryIndexed8() const { return isMemoryIndexed(1); }
-  bool isMemoryNoIndex() const {
-    if (!isMem())
-      return false;
-    if (Mem.Mode != ImmediateOffset)
-      return false;
-    if (!Mem.OffsetImm)
-      return true;
-
-    // Make sure the immediate value is valid. Only zero is allowed.
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
-    if (!CE || CE->getValue() != 0)
-      return false;
-    return true;
-  }
-  bool isMemorySIMDNoIndex() const {
-    if (!isMem())
-      return false;
-    if (Mem.Mode != ImmediateOffset)
-      return false;
-    return Mem.OffsetImm == 0;
-  }
-  bool isMemoryIndexedSImm9() const {
-    if (!isMem() || Mem.Mode != ImmediateOffset)
-      return false;
-    if (!Mem.OffsetImm)
-      return true;
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
-    assert(CE && "Non-constant pre-indexed offset!");
-    int64_t Value = CE->getValue();
-    return Value >= -256 && Value <= 255;
-  }
-  bool isMemoryIndexed32SImm7() const {
-    if (!isMem() || Mem.Mode != ImmediateOffset)
-      return false;
-    if (!Mem.OffsetImm)
-      return true;
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
-    assert(CE && "Non-constant pre-indexed offset!");
-    int64_t Value = CE->getValue();
-    return ((Value % 4) == 0) && Value >= -256 && Value <= 252;
-  }
-  bool isMemoryIndexed64SImm7() const {
-    if (!isMem() || Mem.Mode != ImmediateOffset)
-      return false;
-    if (!Mem.OffsetImm)
-      return true;
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
-    assert(CE && "Non-constant pre-indexed offset!");
-    int64_t Value = CE->getValue();
-    return ((Value % 8) == 0) && Value >= -512 && Value <= 504;
-  }
-  bool isMemoryIndexed128SImm7() const {
-    if (!isMem() || Mem.Mode != ImmediateOffset)
-      return false;
-    if (!Mem.OffsetImm)
-      return true;
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
-    assert(CE && "Non-constant pre-indexed offset!");
-    int64_t Value = CE->getValue();
-    return ((Value % 16) == 0) && Value >= -1024 && Value <= 1008;
-  }
-
-  bool isAdrpLabel() const {
-    // Validation was handled during parsing, so we just sanity check that
-    // something didn't go haywire.
-    return isImm();
-  }
-
-  bool isAdrLabel() const {
-    // Validation was handled during parsing, so we just sanity check that
-    // something didn't go haywire.
-    return isImm();
-  }
-
-  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
-    // Add as immediates when possible.  Null MCExpr = 0.
-    if (Expr == 0)
-      Inst.addOperand(MCOperand::CreateImm(0));
-    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
-    else
-      Inst.addOperand(MCOperand::CreateExpr(Expr));
-  }
-
-  void addRegOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getReg()));
-  }
-
-  void addVectorRegOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(getReg()));
-  }
-
-  template <unsigned NumRegs>
-  void addVectorList64Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    static unsigned FirstRegs[] = { ARM64::D0,       ARM64::D0_D1,
-                                    ARM64::D0_D1_D2, ARM64::D0_D1_D2_D3 };
-    unsigned FirstReg = FirstRegs[NumRegs - 1];
-
-    Inst.addOperand(
-        MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0));
-  }
-
-  template <unsigned NumRegs>
-  void addVectorList128Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    static unsigned FirstRegs[] = { ARM64::Q0,       ARM64::Q0_Q1,
-                                    ARM64::Q0_Q1_Q2, ARM64::Q0_Q1_Q2_Q3 };
-    unsigned FirstReg = FirstRegs[NumRegs - 1];
-
-    Inst.addOperand(
-        MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0));
-  }
-
-  void addVectorIndexBOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
-  }
-
-  void addVectorIndexHOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
-  }
-
-  void addVectorIndexSOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
-  }
-
-  void addVectorIndexDOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
-  }
-
-  void addImmOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    // If this is a pageoff symrefexpr with an addend, adjust the addend
-    // to be only the page-offset portion. Otherwise, just add the expr
-    // as-is.
-    addExpr(Inst, getImm());
-  }
-
-  void addAdrpLabelOperands(MCInst &Inst, unsigned N) const {
-    addImmOperands(Inst, N);
-  }
-
-  void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
-    addImmOperands(Inst, N);
-  }
-
-  void addSImm9Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addSImm7s4Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4));
-  }
-
-  void addSImm7s8Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8));
-  }
-
-  void addSImm7s16Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16));
-  }
-
-  void addImm0_7Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm1_8Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm0_15Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm1_16Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm0_31Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm1_31Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm1_32Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm0_63Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm1_63Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm1_64Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm0_127Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm0_255Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
-  }
-
-  void addLogicalImm32Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid logical immediate operand!");
-    uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 32);
-    Inst.addOperand(MCOperand::CreateImm(encoding));
-  }
-
-  void addLogicalImm64Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid logical immediate operand!");
-    uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 64);
-    Inst.addOperand(MCOperand::CreateImm(encoding));
-  }
-
-  void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    assert(MCE && "Invalid immediate operand!");
-    uint64_t encoding = ARM64_AM::encodeAdvSIMDModImmType10(MCE->getValue());
-    Inst.addOperand(MCOperand::CreateImm(encoding));
-  }
-
-  void addBranchTarget26Operands(MCInst &Inst, unsigned N) const {
-    // Branch operands don't encode the low bits, so shift them off
-    // here. If it's a label, however, just put it on directly as there's
-    // not enough information now to do anything.
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE) {
-      addExpr(Inst, getImm());
-      return;
-    }
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
-  }
-
-  void addBranchTarget19Operands(MCInst &Inst, unsigned N) const {
-    // Branch operands don't encode the low bits, so shift them off
-    // here. If it's a label, however, just put it on directly as there's
-    // not enough information now to do anything.
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE) {
-      addExpr(Inst, getImm());
-      return;
-    }
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
-  }
-
-  void addBranchTarget14Operands(MCInst &Inst, unsigned N) const {
-    // Branch operands don't encode the low bits, so shift them off
-    // here. If it's a label, however, just put it on directly as there's
-    // not enough information now to do anything.
-    assert(N == 1 && "Invalid number of operands!");
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
-    if (!MCE) {
-      addExpr(Inst, getImm());
-      return;
-    }
-    assert(MCE && "Invalid constant immediate operand!");
-    Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
-  }
-
-  void addFPImmOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getFPImm()));
-  }
-
-  void addBarrierOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getBarrier()));
-  }
-
-  void addSystemRegisterOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    if (Kind == k_SystemRegister)
-      Inst.addOperand(MCOperand::CreateImm(getSystemRegister()));
-    else {
-      assert(Kind == k_CPSRField && getCPSRField() == ARM64SYS::cpsr_SPSel);
-      Inst.addOperand(MCOperand::CreateImm(ARM64SYS::SPSel));
-    }
-  }
-
-  void addSystemCPSRFieldOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getCPSRField()));
-  }
-
-  void addSysCROperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getSysCR()));
-  }
-
-  void addPrefetchOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getPrefetch()));
-  }
-
-  void addShifterOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getShifter()));
-  }
-
-  void addArithmeticShifterOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getShifter()));
-  }
-
-  void addMovImm32ShifterOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getShifter()));
-  }
-
-  void addMovImm64ShifterOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getShifter()));
-  }
-
-  void addAddSubShifterOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getShifter()));
-  }
-
-  void addLogicalVecShifterOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getShifter()));
-  }
-
-  void addLogicalVecHalfWordShifterOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getShifter()));
-  }
-
-  void addMoveVecShifterOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getShifter()));
-  }
-
-  void addExtendOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
-    if (isShifter()) {
-      assert(ARM64_AM::getShiftType(getShifter()) == ARM64_AM::LSL);
-      unsigned imm = getArithExtendImm(ARM64_AM::UXTX,
-                                       ARM64_AM::getShiftValue(getShifter()));
-      Inst.addOperand(MCOperand::CreateImm(imm));
-    } else
-      Inst.addOperand(MCOperand::CreateImm(getExtend()));
-  }
-
-  void addExtend64Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getExtend()));
-  }
-
-  void addExtendLSL64Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
-    if (isShifter()) {
-      assert(ARM64_AM::getShiftType(getShifter()) == ARM64_AM::LSL);
-      unsigned imm = getArithExtendImm(ARM64_AM::UXTX,
-                                       ARM64_AM::getShiftValue(getShifter()));
-      Inst.addOperand(MCOperand::CreateImm(imm));
-    } else
-      Inst.addOperand(MCOperand::CreateImm(getExtend()));
-  }
-
-  void addMemoryRegisterOffsetOperands(MCInst &Inst, unsigned N, bool DoShift) {
-    assert(N == 3 && "Invalid number of operands!");
-
-    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
-    Inst.addOperand(MCOperand::CreateReg(Mem.OffsetRegNum));
-    unsigned ExtendImm = ARM64_AM::getMemExtendImm(Mem.ExtType, DoShift);
-    Inst.addOperand(MCOperand::CreateImm(ExtendImm));
-  }
-
-  void addMemoryRegisterOffset8Operands(MCInst &Inst, unsigned N) {
-    addMemoryRegisterOffsetOperands(Inst, N, Mem.ExplicitShift);
-  }
-
-  void addMemoryRegisterOffset16Operands(MCInst &Inst, unsigned N) {
-    addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 1);
-  }
-
-  void addMemoryRegisterOffset32Operands(MCInst &Inst, unsigned N) {
-    addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 2);
-  }
-
-  void addMemoryRegisterOffset64Operands(MCInst &Inst, unsigned N) {
-    addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 3);
-  }
-
-  void addMemoryRegisterOffset128Operands(MCInst &Inst, unsigned N) {
-    addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 4);
-  }
-
-  void addMemoryIndexedOperands(MCInst &Inst, unsigned N,
-                                unsigned Scale) const {
-    // Add the base register operand.
-    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
-
-    if (!Mem.OffsetImm) {
-      // There isn't an offset.
-      Inst.addOperand(MCOperand::CreateImm(0));
-      return;
-    }
-
-    // Add the offset operand.
-    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm)) {
-      assert(CE->getValue() % Scale == 0 &&
-             "Offset operand must be multiple of the scale!");
-
-      // The MCInst offset operand doesn't include the low bits (like the
-      // instruction encoding).
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue() / Scale));
-    }
-
-    // If this is a pageoff symrefexpr with an addend, the linker will
-    // do the scaling of the addend.
-    //
-    // Otherwise we don't know what this is, so just add the scaling divide to
-    // the expression and let the MC fixup evaluation code deal with it.
-    const MCExpr *Expr = Mem.OffsetImm;
-    ARM64MCExpr::VariantKind ELFRefKind;
-    MCSymbolRefExpr::VariantKind DarwinRefKind;
-    const MCConstantExpr *Addend;
-    if (Scale > 1 &&
-        (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind,
-                                            Addend) ||
-         (Addend != 0 && DarwinRefKind != MCSymbolRefExpr::VK_PAGEOFF))) {
-      Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(Scale, Ctx),
-                                     Ctx);
-    }
-
-    Inst.addOperand(MCOperand::CreateExpr(Expr));
-  }
-
-  void addMemoryUnscaledOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 2 && isMemoryUnscaled() && "Invalid number of operands!");
-    // Add the base register operand.
-    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
-
-    // Add the offset operand.
-    if (!Mem.OffsetImm)
-      Inst.addOperand(MCOperand::CreateImm(0));
-    else {
-      // Only constant offsets supported.
-      const MCConstantExpr *CE = cast<MCConstantExpr>(Mem.OffsetImm);
-      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
-    }
-  }
-
-  void addMemoryIndexed128Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 2 && isMemoryIndexed128() && "Invalid number of operands!");
-    addMemoryIndexedOperands(Inst, N, 16);
-  }
-
-  void addMemoryIndexed64Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 2 && isMemoryIndexed64() && "Invalid number of operands!");
-    addMemoryIndexedOperands(Inst, N, 8);
-  }
-
-  void addMemoryIndexed32Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 2 && isMemoryIndexed32() && "Invalid number of operands!");
-    addMemoryIndexedOperands(Inst, N, 4);
-  }
-
-  void addMemoryIndexed16Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 2 && isMemoryIndexed16() && "Invalid number of operands!");
-    addMemoryIndexedOperands(Inst, N, 2);
-  }
-
-  void addMemoryIndexed8Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 2 && isMemoryIndexed8() && "Invalid number of operands!");
-    addMemoryIndexedOperands(Inst, N, 1);
-  }
-
-  void addMemoryNoIndexOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && isMemoryNoIndex() && "Invalid number of operands!");
-    // Add the base register operand (the offset is always zero, so ignore it).
-    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
-  }
-
-  void addMemorySIMDNoIndexOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && isMemorySIMDNoIndex() && "Invalid number of operands!");
-    // Add the base register operand (the offset is always zero, so ignore it).
-    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
-  }
-
-  void addMemoryWritebackIndexedOperands(MCInst &Inst, unsigned N,
-                                         unsigned Scale) const {
-    assert(N == 2 && "Invalid number of operands!");
-
-    // Add the base register operand.
-    Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
-
-    // Add the offset operand.
-    int64_t Offset = 0;
-    if (Mem.OffsetImm) {
-      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
-      assert(CE && "Non-constant indexed offset operand!");
-      Offset = CE->getValue();
-    }
-
-    if (Scale != 1) {
-      assert(Offset % Scale == 0 &&
-             "Offset operand must be a multiple of the scale!");
-      Offset /= Scale;
-    }
-
-    Inst.addOperand(MCOperand::CreateImm(Offset));
-  }
-
-  void addMemoryIndexedSImm9Operands(MCInst &Inst, unsigned N) const {
-    addMemoryWritebackIndexedOperands(Inst, N, 1);
-  }
-
-  void addMemoryIndexed32SImm7Operands(MCInst &Inst, unsigned N) const {
-    addMemoryWritebackIndexedOperands(Inst, N, 4);
-  }
-
-  void addMemoryIndexed64SImm7Operands(MCInst &Inst, unsigned N) const {
-    addMemoryWritebackIndexedOperands(Inst, N, 8);
-  }
-
-  void addMemoryIndexed128SImm7Operands(MCInst &Inst, unsigned N) const {
-    addMemoryWritebackIndexedOperands(Inst, N, 16);
-  }
-
-  virtual void print(raw_ostream &OS) const;
-
-  static ARM64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S,
-                                   MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Token, Ctx);
-    Op->Tok.Data = Str.data();
-    Op->Tok.Length = Str.size();
-    Op->Tok.IsSuffix = IsSuffix;
-    Op->StartLoc = S;
-    Op->EndLoc = S;
-    return Op;
-  }
-
-  static ARM64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S,
-                                 SMLoc E, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Register, Ctx);
-    Op->Reg.RegNum = RegNum;
-    Op->Reg.isVector = isVector;
-    Op->StartLoc = S;
-    Op->EndLoc = E;
-    return Op;
-  }
-
-  static ARM64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
-                                        unsigned NumElements, char ElementKind,
-                                        SMLoc S, SMLoc E, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_VectorList, Ctx);
-    Op->VectorList.RegNum = RegNum;
-    Op->VectorList.Count = Count;
-    Op->VectorList.NumElements = NumElements;
-    Op->VectorList.ElementKind = ElementKind;
-    Op->StartLoc = S;
-    Op->EndLoc = E;
-    return Op;
-  }
-
-  static ARM64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
-                                         MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_VectorIndex, Ctx);
-    Op->VectorIndex.Val = Idx;
-    Op->StartLoc = S;
-    Op->EndLoc = E;
-    return Op;
-  }
-
-  static ARM64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
-                                 MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Immediate, Ctx);
-    Op->Imm.Val = Val;
-    Op->StartLoc = S;
-    Op->EndLoc = E;
-    return Op;
-  }
-
-  static ARM64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_FPImm, Ctx);
-    Op->FPImm.Val = Val;
-    Op->StartLoc = S;
-    Op->EndLoc = S;
-    return Op;
-  }
-
-  static ARM64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Barrier, Ctx);
-    Op->Barrier.Val = Val;
-    Op->StartLoc = S;
-    Op->EndLoc = S;
-    return Op;
-  }
-
-  static ARM64Operand *CreateSystemRegister(uint16_t Val, SMLoc S,
-                                            MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_SystemRegister, Ctx);
-    Op->SystemRegister.Val = Val;
-    Op->StartLoc = S;
-    Op->EndLoc = S;
-    return Op;
-  }
-
-  static ARM64Operand *CreateCPSRField(ARM64SYS::CPSRField Field, SMLoc S,
-                                       MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_CPSRField, Ctx);
-    Op->CPSRField.Field = Field;
-    Op->StartLoc = S;
-    Op->EndLoc = S;
-    return Op;
-  }
-
-  static ARM64Operand *CreateMem(unsigned BaseRegNum, const MCExpr *Off,
-                                 SMLoc S, SMLoc E, SMLoc OffsetLoc,
-                                 MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx);
-    Op->Mem.BaseRegNum = BaseRegNum;
-    Op->Mem.OffsetRegNum = 0;
-    Op->Mem.OffsetImm = Off;
-    Op->Mem.ExtType = ARM64_AM::UXTX;
-    Op->Mem.ShiftVal = 0;
-    Op->Mem.ExplicitShift = false;
-    Op->Mem.Mode = ImmediateOffset;
-    Op->OffsetLoc = OffsetLoc;
-    Op->StartLoc = S;
-    Op->EndLoc = E;
-    return Op;
-  }
-
-  static ARM64Operand *CreateRegOffsetMem(unsigned BaseReg, unsigned OffsetReg,
-                                          ARM64_AM::ExtendType ExtType,
-                                          unsigned ShiftVal, bool ExplicitShift,
-                                          SMLoc S, SMLoc E, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx);
-    Op->Mem.BaseRegNum = BaseReg;
-    Op->Mem.OffsetRegNum = OffsetReg;
-    Op->Mem.OffsetImm = 0;
-    Op->Mem.ExtType = ExtType;
-    Op->Mem.ShiftVal = ShiftVal;
-    Op->Mem.ExplicitShift = ExplicitShift;
-    Op->Mem.Mode = RegisterOffset;
-    Op->StartLoc = S;
-    Op->EndLoc = E;
-    return Op;
-  }
-
-  static ARM64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E,
-                                   MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_SysCR, Ctx);
-    Op->SysCRImm.Val = Val;
-    Op->StartLoc = S;
-    Op->EndLoc = E;
-    return Op;
-  }
-
-  static ARM64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Prefetch, Ctx);
-    Op->Prefetch.Val = Val;
-    Op->StartLoc = S;
-    Op->EndLoc = S;
-    return Op;
-  }
-
-  static ARM64Operand *CreateShifter(ARM64_AM::ShiftType ShOp, unsigned Val,
-                                     SMLoc S, SMLoc E, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Shifter, Ctx);
-    Op->Shifter.Val = ARM64_AM::getShifterImm(ShOp, Val);
-    Op->StartLoc = S;
-    Op->EndLoc = E;
-    return Op;
-  }
-
-  static ARM64Operand *CreateExtend(ARM64_AM::ExtendType ExtOp, unsigned Val,
-                                    SMLoc S, SMLoc E, MCContext &Ctx) {
-    ARM64Operand *Op = new ARM64Operand(k_Extend, Ctx);
-    Op->Extend.Val = ARM64_AM::getArithExtendImm(ExtOp, Val);
-    Op->StartLoc = S;
-    Op->EndLoc = E;
-    return Op;
-  }
-};
-
-} // end anonymous namespace.
-
-void ARM64Operand::print(raw_ostream &OS) const {
-  switch (Kind) {
-  case k_FPImm:
-    OS << "<fpimm " << getFPImm() << "(" << ARM64_AM::getFPImmFloat(getFPImm())
-       << ") >";
-    break;
-  case k_Barrier: {
-    const char *Name =
-        ARM64SYS::getBarrierOptName((ARM64SYS::BarrierOption)getBarrier());
-    OS << "<barrier ";
-    if (Name)
-      OS << Name;
-    else
-      OS << getBarrier();
-    OS << ">";
-    break;
-  }
-  case k_SystemRegister: {
-    const char *Name = ARM64SYS::getSystemRegisterName(
-        (ARM64SYS::SystemRegister)getSystemRegister());
-    OS << "<systemreg ";
-    if (Name)
-      OS << Name;
-    else
-      OS << "#" << getSystemRegister();
-    OS << ">";
-    break;
-  }
-  case k_CPSRField: {
-    const char *Name = ARM64SYS::getCPSRFieldName(getCPSRField());
-    OS << "<cpsrfield " << Name << ">";
-    break;
-  }
-  case k_Immediate:
-    getImm()->print(OS);
-    break;
-  case k_Memory:
-    OS << "<memory>";
-    break;
-  case k_Register:
-    OS << "<register " << getReg() << ">";
-    break;
-  case k_VectorList: {
-    OS << "<vectorlist ";
-    unsigned Reg = getVectorListStart();
-    for (unsigned i = 0, e = getVectorListCount(); i != e; ++i)
-      OS << Reg + i << " ";
-    OS << ">";
-    break;
-  }
-  case k_VectorIndex:
-    OS << "<vectorindex " << getVectorIndex() << ">";
-    break;
-  case k_Token:
-    OS << "'" << getToken() << "'";
-    break;
-  case k_SysCR:
-    OS << "c" << getSysCR();
-    break;
-  case k_Prefetch:
-    OS << "<prfop ";
-    if (ARM64_AM::isNamedPrefetchOp(getPrefetch()))
-      OS << ARM64_AM::getPrefetchOpName((ARM64_AM::PrefetchOp)getPrefetch());
-    else
-      OS << "#" << getPrefetch();
-    OS << ">";
-    break;
-  case k_Shifter: {
-    unsigned Val = getShifter();
-    OS << "<" << ARM64_AM::getShiftName(ARM64_AM::getShiftType(Val)) << " #"
-       << ARM64_AM::getShiftValue(Val) << ">";
-    break;
-  }
-  case k_Extend: {
-    unsigned Val = getExtend();
-    OS << "<" << ARM64_AM::getExtendName(ARM64_AM::getArithExtendType(Val))
-       << " #" << ARM64_AM::getArithShiftValue(Val) << ">";
-    break;
-  }
-  }
-}
-
-/// @name Auto-generated Match Functions
-/// {
-
-static unsigned MatchRegisterName(StringRef Name);
-
-/// }
-
-static unsigned matchVectorRegName(StringRef Name) {
-  return StringSwitch<unsigned>(Name)
-      .Case("v0", ARM64::Q0)
-      .Case("v1", ARM64::Q1)
-      .Case("v2", ARM64::Q2)
-      .Case("v3", ARM64::Q3)
-      .Case("v4", ARM64::Q4)
-      .Case("v5", ARM64::Q5)
-      .Case("v6", ARM64::Q6)
-      .Case("v7", ARM64::Q7)
-      .Case("v8", ARM64::Q8)
-      .Case("v9", ARM64::Q9)
-      .Case("v10", ARM64::Q10)
-      .Case("v11", ARM64::Q11)
-      .Case("v12", ARM64::Q12)
-      .Case("v13", ARM64::Q13)
-      .Case("v14", ARM64::Q14)
-      .Case("v15", ARM64::Q15)
-      .Case("v16", ARM64::Q16)
-      .Case("v17", ARM64::Q17)
-      .Case("v18", ARM64::Q18)
-      .Case("v19", ARM64::Q19)
-      .Case("v20", ARM64::Q20)
-      .Case("v21", ARM64::Q21)
-      .Case("v22", ARM64::Q22)
-      .Case("v23", ARM64::Q23)
-      .Case("v24", ARM64::Q24)
-      .Case("v25", ARM64::Q25)
-      .Case("v26", ARM64::Q26)
-      .Case("v27", ARM64::Q27)
-      .Case("v28", ARM64::Q28)
-      .Case("v29", ARM64::Q29)
-      .Case("v30", ARM64::Q30)
-      .Case("v31", ARM64::Q31)
-      .Default(0);
-}
-
-static bool isValidVectorKind(StringRef Name) {
-  return StringSwitch<bool>(Name.lower())
-      .Case(".8b", true)
-      .Case(".16b", true)
-      .Case(".4h", true)
-      .Case(".8h", true)
-      .Case(".2s", true)
-      .Case(".4s", true)
-      .Case(".1d", true)
-      .Case(".2d", true)
-      .Case(".1q", true)
-      // Accept the width neutral ones, too, for verbose syntax. If those
-      // aren't used in the right places, the token operand won't match so
-      // all will work out.
-      .Case(".b", true)
-      .Case(".h", true)
-      .Case(".s", true)
-      .Case(".d", true)
-      .Default(false);
-}
-
-static void parseValidVectorKind(StringRef Name, unsigned &NumElements,
-                                 char &ElementKind) {
-  assert(isValidVectorKind(Name));
-
-  ElementKind = Name.lower()[Name.size() - 1];
-  NumElements = 0;
-
-  if (Name.size() == 2)
-    return;
-
-  // Parse the lane count
-  Name = Name.drop_front();
-  while (isdigit(Name.front())) {
-    NumElements = 10 * NumElements + (Name.front() - '0');
-    Name = Name.drop_front();
-  }
-}
-
-bool ARM64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
-                                   SMLoc &EndLoc) {
-  StartLoc = getLoc();
-  RegNo = tryParseRegister();
-  EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-  return (RegNo == (unsigned)-1);
-}
-
-/// tryParseRegister - Try to parse a register name. The token must be an
-/// Identifier when called, and if it is a register name the token is eaten and
-/// the register is added to the operand list.
-int ARM64AsmParser::tryParseRegister() {
-  const AsmToken &Tok = Parser.getTok();
-  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
-
-  std::string lowerCase = Tok.getString().lower();
-  unsigned RegNum = MatchRegisterName(lowerCase);
-  // Also handle a few aliases of registers.
-  if (RegNum == 0)
-    RegNum = StringSwitch<unsigned>(lowerCase)
-                 .Case("x29", ARM64::FP)
-                 .Case("x30", ARM64::LR)
-                 .Case("x31", ARM64::XZR)
-                 .Case("w31", ARM64::WZR)
-                 .Default(0);
-
-  if (RegNum == 0)
-    return -1;
-
-  Parser.Lex(); // Eat identifier token.
-  return RegNum;
-}
-
-/// tryMatchVectorRegister - Try to parse a vector register name with optional
-/// kind specifier. If it is a register specifier, eat the token and return it.
-int ARM64AsmParser::tryMatchVectorRegister(StringRef &Kind) {
-  if (Parser.getTok().isNot(AsmToken::Identifier)) {
-    TokError("vector register expected");
-    return -1;
-  }
-
-  StringRef Name = Parser.getTok().getString();
-  // If there is a kind specifier, it's separated from the register name by
-  // a '.'.
-  size_t Start = 0, Next = Name.find('.');
-  StringRef Head = Name.slice(Start, Next);
-  unsigned RegNum = matchVectorRegName(Head);
-  if (RegNum) {
-    if (Next != StringRef::npos) {
-      Kind = Name.slice(Next, StringRef::npos);
-      if (!isValidVectorKind(Kind)) {
-        TokError("invalid vector kind qualifier");
-        return -1;
-      }
-    }
-    Parser.Lex(); // Eat the register token.
-    return RegNum;
-  }
-  return -1;
-}
-
-static int MatchSysCRName(StringRef Name) {
-  // Use the same layout as the tablegen'erated register name matcher. Ugly,
-  // but efficient.
-  switch (Name.size()) {
-  default:
-    break;
-  case 2:
-    if (Name[0] != 'c' && Name[0] != 'C')
-      return -1;
-    switch (Name[1]) {
-    default:
-      return -1;
-    case '0':
-      return 0;
-    case '1':
-      return 1;
-    case '2':
-      return 2;
-    case '3':
-      return 3;
-    case '4':
-      return 4;
-    case '5':
-      return 5;
-    case '6':
-      return 6;
-    case '7':
-      return 7;
-    case '8':
-      return 8;
-    case '9':
-      return 9;
-    }
-    break;
-  case 3:
-    if ((Name[0] != 'c' && Name[0] != 'C') || Name[1] != '1')
-      return -1;
-    switch (Name[2]) {
-    default:
-      return -1;
-    case '0':
-      return 10;
-    case '1':
-      return 11;
-    case '2':
-      return 12;
-    case '3':
-      return 13;
-    case '4':
-      return 14;
-    case '5':
-      return 15;
-    }
-    break;
-  }
-
-  llvm_unreachable("Unhandled SysCR operand string!");
-  return -1;
-}
-
-/// tryParseSysCROperand - Try to parse a system instruction CR operand name.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
-  SMLoc S = getLoc();
-  const AsmToken &Tok = Parser.getTok();
-  if (Tok.isNot(AsmToken::Identifier))
-    return MatchOperand_NoMatch;
-
-  int Num = MatchSysCRName(Tok.getString());
-  if (Num == -1)
-    return MatchOperand_NoMatch;
-
-  Parser.Lex(); // Eat identifier token.
-  Operands.push_back(ARM64Operand::CreateSysCR(Num, S, getLoc(), getContext()));
-  return MatchOperand_Success;
-}
-
-/// tryParsePrefetch - Try to parse a prefetch operand.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) {
-  SMLoc S = getLoc();
-  const AsmToken &Tok = Parser.getTok();
-  // Either an identifier for named values or a 5-bit immediate.
-  if (Tok.is(AsmToken::Hash)) {
-    Parser.Lex(); // Eat hash token.
-    const MCExpr *ImmVal;
-    if (getParser().parseExpression(ImmVal))
-      return MatchOperand_ParseFail;
-
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
-    if (!MCE) {
-      TokError("immediate value expected for prefetch operand");
-      return MatchOperand_ParseFail;
-    }
-    unsigned prfop = MCE->getValue();
-    if (prfop > 31) {
-      TokError("prefetch operand out of range, [0,31] expected");
-      return MatchOperand_ParseFail;
-    }
-
-    Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext()));
-    return MatchOperand_Success;
-  }
-
-  if (Tok.isNot(AsmToken::Identifier)) {
-    TokError("pre-fetch hint expected");
-    return MatchOperand_ParseFail;
-  }
-
-  unsigned prfop = StringSwitch<unsigned>(Tok.getString())
-                       .Case("pldl1keep", ARM64_AM::PLDL1KEEP)
-                       .Case("pldl1strm", ARM64_AM::PLDL1STRM)
-                       .Case("pldl2keep", ARM64_AM::PLDL2KEEP)
-                       .Case("pldl2strm", ARM64_AM::PLDL2STRM)
-                       .Case("pldl3keep", ARM64_AM::PLDL3KEEP)
-                       .Case("pldl3strm", ARM64_AM::PLDL3STRM)
-                       .Case("pstl1keep", ARM64_AM::PSTL1KEEP)
-                       .Case("pstl1strm", ARM64_AM::PSTL1STRM)
-                       .Case("pstl2keep", ARM64_AM::PSTL2KEEP)
-                       .Case("pstl2strm", ARM64_AM::PSTL2STRM)
-                       .Case("pstl3keep", ARM64_AM::PSTL3KEEP)
-                       .Case("pstl3strm", ARM64_AM::PSTL3STRM)
-                       .Default(0xff);
-  if (prfop == 0xff) {
-    TokError("pre-fetch hint expected");
-    return MatchOperand_ParseFail;
-  }
-
-  Parser.Lex(); // Eat identifier token.
-  Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext()));
-  return MatchOperand_Success;
-}
-
-/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
-/// instruction.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
-  SMLoc S = getLoc();
-  const MCExpr *Expr;
-  if (parseSymbolicImmVal(Expr))
-    return MatchOperand_ParseFail;
-
-  ARM64MCExpr::VariantKind ELFRefKind;
-  MCSymbolRefExpr::VariantKind DarwinRefKind;
-  const MCConstantExpr *Addend;
-  if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
-    Error(S, "modified label reference + constant expected");
-    return MatchOperand_ParseFail;
-  }
-
-  if (DarwinRefKind == MCSymbolRefExpr::VK_None &&
-      ELFRefKind == ARM64MCExpr::VK_INVALID) {
-    // No modifier was specified at all; this is the syntax for an ELF basic
-    // ADRP relocation (unfortunately).
-    Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_ABS_PAGE, getContext());
-  } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE ||
-              DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) &&
-             Addend != 0) {
-    Error(S, "gotpage label reference not allowed an addend");
-    return MatchOperand_ParseFail;
-  } else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE &&
-             DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE &&
-             DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
-             ELFRefKind != ARM64MCExpr::VK_GOT_PAGE &&
-             ELFRefKind != ARM64MCExpr::VK_GOTTPREL_PAGE &&
-             ELFRefKind != ARM64MCExpr::VK_TLSDESC_PAGE) {
-    // The operand must be an @page or @gotpage qualified symbolref.
-    Error(S, "page or gotpage label reference expected");
-    return MatchOperand_ParseFail;
-  }
-
-  // We have a label reference possibly with addend. The addend is a raw value
-  // here. The linker will adjust it to only reference the page.
-  SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-  Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
-
-  return MatchOperand_Success;
-}
-
-/// tryParseAdrLabel - Parse and validate a source label for the ADR
-/// instruction.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
-  SMLoc S = getLoc();
-  const MCExpr *Expr;
-  if (getParser().parseExpression(Expr))
-    return MatchOperand_ParseFail;
-
-  // The operand must be an un-qualified assembler local symbolref.
-  // FIXME: wrong for ELF.
-  if (const MCSymbolRefExpr *SRE = dyn_cast<const MCSymbolRefExpr>(Expr)) {
-    // FIXME: Should reference the MachineAsmInfo to get the private prefix.
-    bool isTemporary = SRE->getSymbol().getName().startswith("L");
-    if (!isTemporary || SRE->getKind() != MCSymbolRefExpr::VK_None) {
-      Error(S, "unqualified, assembler-local label name expected");
-      return MatchOperand_ParseFail;
-    }
-  }
-
-  SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-  Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
-
-  return MatchOperand_Success;
-}
-
-/// tryParseFPImm - A floating point immediate expression operand.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseFPImm(OperandVector &Operands) {
-  SMLoc S = getLoc();
-
-  if (Parser.getTok().isNot(AsmToken::Hash))
-    return MatchOperand_NoMatch;
-  Parser.Lex(); // Eat the '#'.
-
-  // Handle negation, as that still comes through as a separate token.
-  bool isNegative = false;
-  if (Parser.getTok().is(AsmToken::Minus)) {
-    isNegative = true;
-    Parser.Lex();
-  }
-  const AsmToken &Tok = Parser.getTok();
-  if (Tok.is(AsmToken::Real)) {
-    APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
-    uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
-    // If we had a '-' in front, toggle the sign bit.
-    IntVal ^= (uint64_t)isNegative << 63;
-    int Val = ARM64_AM::getFP64Imm(APInt(64, IntVal));
-    Parser.Lex(); // Eat the token.
-    // Check for out of range values. As an exception, we let Zero through,
-    // as we handle that special case in post-processing before matching in
-    // order to use the zero register for it.
-    if (Val == -1 && !RealVal.isZero()) {
-      TokError("floating point value out of range");
-      return MatchOperand_ParseFail;
-    }
-    Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext()));
-    return MatchOperand_Success;
-  }
-  if (Tok.is(AsmToken::Integer)) {
-    int64_t Val;
-    if (!isNegative && Tok.getString().startswith("0x")) {
-      Val = Tok.getIntVal();
-      if (Val > 255 || Val < 0) {
-        TokError("encoded floating point value out of range");
-        return MatchOperand_ParseFail;
-      }
-    } else {
-      APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
-      uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
-      // If we had a '-' in front, toggle the sign bit.
-      IntVal ^= (uint64_t)isNegative << 63;
-      Val = ARM64_AM::getFP64Imm(APInt(64, IntVal));
-    }
-    Parser.Lex(); // Eat the token.
-    Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext()));
-    return MatchOperand_Success;
-  }
-
-  TokError("invalid floating point immediate");
-  return MatchOperand_ParseFail;
-}
-
-/// parseCondCodeString - Parse a Condition Code string.
-unsigned ARM64AsmParser::parseCondCodeString(StringRef Cond) {
-  unsigned CC = StringSwitch<unsigned>(Cond)
-                    .Case("eq", ARM64CC::EQ)
-                    .Case("ne", ARM64CC::NE)
-                    .Case("cs", ARM64CC::CS)
-                    .Case("hs", ARM64CC::CS)
-                    .Case("cc", ARM64CC::CC)
-                    .Case("lo", ARM64CC::CC)
-                    .Case("mi", ARM64CC::MI)
-                    .Case("pl", ARM64CC::PL)
-                    .Case("vs", ARM64CC::VS)
-                    .Case("vc", ARM64CC::VC)
-                    .Case("hi", ARM64CC::HI)
-                    .Case("ls", ARM64CC::LS)
-                    .Case("ge", ARM64CC::GE)
-                    .Case("lt", ARM64CC::LT)
-                    .Case("gt", ARM64CC::GT)
-                    .Case("le", ARM64CC::LE)
-                    .Case("al", ARM64CC::AL)
-                // Upper case works too. Not mixed case, though.
-                    .Case("EQ", ARM64CC::EQ)
-                    .Case("NE", ARM64CC::NE)
-                    .Case("CS", ARM64CC::CS)
-                    .Case("HS", ARM64CC::CS)
-                    .Case("CC", ARM64CC::CC)
-                    .Case("LO", ARM64CC::CC)
-                    .Case("MI", ARM64CC::MI)
-                    .Case("PL", ARM64CC::PL)
-                    .Case("VS", ARM64CC::VS)
-                    .Case("VC", ARM64CC::VC)
-                    .Case("HI", ARM64CC::HI)
-                    .Case("LS", ARM64CC::LS)
-                    .Case("GE", ARM64CC::GE)
-                    .Case("LT", ARM64CC::LT)
-                    .Case("GT", ARM64CC::GT)
-                    .Case("LE", ARM64CC::LE)
-                    .Case("AL", ARM64CC::AL)
-                    .Default(~0U);
-  return CC;
-}
-
-/// parseCondCode - Parse a Condition Code operand.
-bool ARM64AsmParser::parseCondCode(OperandVector &Operands,
-                                   bool invertCondCode) {
-  SMLoc S = getLoc();
-  const AsmToken &Tok = Parser.getTok();
-  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
-
-  StringRef Cond = Tok.getString();
-  unsigned CC = parseCondCodeString(Cond);
-  if (CC == ~0U)
-    return TokError("invalid condition code");
-  Parser.Lex(); // Eat identifier token.
-
-  if (invertCondCode)
-    CC = ARM64CC::getInvertedCondCode(ARM64CC::CondCode(CC));
-
-  const MCExpr *CCExpr = MCConstantExpr::Create(CC, getContext());
-  Operands.push_back(
-      ARM64Operand::CreateImm(CCExpr, S, getLoc(), getContext()));
-  return false;
-}
-
-/// ParseOptionalShift - Some operands take an optional shift argument. Parse
-/// them if present.
-bool ARM64AsmParser::parseOptionalShift(OperandVector &Operands) {
-  const AsmToken &Tok = Parser.getTok();
-  ARM64_AM::ShiftType ShOp = StringSwitch<ARM64_AM::ShiftType>(Tok.getString())
-                                 .Case("lsl", ARM64_AM::LSL)
-                                 .Case("lsr", ARM64_AM::LSR)
-                                 .Case("asr", ARM64_AM::ASR)
-                                 .Case("ror", ARM64_AM::ROR)
-                                 .Case("msl", ARM64_AM::MSL)
-                                 .Case("LSL", ARM64_AM::LSL)
-                                 .Case("LSR", ARM64_AM::LSR)
-                                 .Case("ASR", ARM64_AM::ASR)
-                                 .Case("ROR", ARM64_AM::ROR)
-                                 .Case("MSL", ARM64_AM::MSL)
-                                 .Default(ARM64_AM::InvalidShift);
-  if (ShOp == ARM64_AM::InvalidShift)
-    return true;
-
-  SMLoc S = Tok.getLoc();
-  Parser.Lex();
-
-  // We expect a number here.
-  if (getLexer().isNot(AsmToken::Hash))
-    return TokError("immediate value expected for shifter operand");
-  Parser.Lex(); // Eat the '#'.
-
-  SMLoc ExprLoc = getLoc();
-  const MCExpr *ImmVal;
-  if (getParser().parseExpression(ImmVal))
-    return true;
-
-  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
-  if (!MCE)
-    return TokError("immediate value expected for shifter operand");
-
-  if ((MCE->getValue() & 0x3f) != MCE->getValue())
-    return Error(ExprLoc, "immediate value too large for shifter operand");
-
-  SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-  Operands.push_back(
-      ARM64Operand::CreateShifter(ShOp, MCE->getValue(), S, E, getContext()));
-  return false;
-}
-
-/// parseOptionalExtend - Some operands take an optional extend argument. Parse
-/// them if present.
-bool ARM64AsmParser::parseOptionalExtend(OperandVector &Operands) {
-  const AsmToken &Tok = Parser.getTok();
-  ARM64_AM::ExtendType ExtOp =
-      StringSwitch<ARM64_AM::ExtendType>(Tok.getString())
-          .Case("uxtb", ARM64_AM::UXTB)
-          .Case("uxth", ARM64_AM::UXTH)
-          .Case("uxtw", ARM64_AM::UXTW)
-          .Case("uxtx", ARM64_AM::UXTX)
-          .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX
-          .Case("sxtb", ARM64_AM::SXTB)
-          .Case("sxth", ARM64_AM::SXTH)
-          .Case("sxtw", ARM64_AM::SXTW)
-          .Case("sxtx", ARM64_AM::SXTX)
-          .Case("UXTB", ARM64_AM::UXTB)
-          .Case("UXTH", ARM64_AM::UXTH)
-          .Case("UXTW", ARM64_AM::UXTW)
-          .Case("UXTX", ARM64_AM::UXTX)
-          .Case("LSL", ARM64_AM::UXTX) // Alias for UXTX
-          .Case("SXTB", ARM64_AM::SXTB)
-          .Case("SXTH", ARM64_AM::SXTH)
-          .Case("SXTW", ARM64_AM::SXTW)
-          .Case("SXTX", ARM64_AM::SXTX)
-          .Default(ARM64_AM::InvalidExtend);
-  if (ExtOp == ARM64_AM::InvalidExtend)
-    return true;
-
-  SMLoc S = Tok.getLoc();
-  Parser.Lex();
-
-  if (getLexer().is(AsmToken::EndOfStatement) ||
-      getLexer().is(AsmToken::Comma)) {
-    SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-    Operands.push_back(
-        ARM64Operand::CreateExtend(ExtOp, 0, S, E, getContext()));
-    return false;
-  }
-
-  if (getLexer().isNot(AsmToken::Hash)) {
-    SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-    Operands.push_back(
-        ARM64Operand::CreateExtend(ExtOp, 0, S, E, getContext()));
-    return false;
-  }
-
-  Parser.Lex(); // Eat the '#'.
-
-  const MCExpr *ImmVal;
-  if (getParser().parseExpression(ImmVal))
-    return true;
-
-  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
-  if (!MCE)
-    return TokError("immediate value expected for extend operand");
-
-  SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-  Operands.push_back(
-      ARM64Operand::CreateExtend(ExtOp, MCE->getValue(), S, E, getContext()));
-  return false;
-}
-
-/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for
-/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
-bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
-                                   OperandVector &Operands) {
-  if (Name.find('.') != StringRef::npos)
-    return TokError("invalid operand");
-
-  Mnemonic = Name;
-  Operands.push_back(
-      ARM64Operand::CreateToken("sys", false, NameLoc, getContext()));
-
-  const AsmToken &Tok = Parser.getTok();
-  StringRef Op = Tok.getString();
-  SMLoc S = Tok.getLoc();
-
-  const MCExpr *Expr = 0;
-
-#define SYS_ALIAS(op1, Cn, Cm, op2)                                            \
-  do {                                                                         \
-    Expr = MCConstantExpr::Create(op1, getContext());                          \
-    Operands.push_back(                                                        \
-        ARM64Operand::CreateImm(Expr, S, getLoc(), getContext()));             \
-    Operands.push_back(                                                        \
-        ARM64Operand::CreateSysCR(Cn, S, getLoc(), getContext()));             \
-    Operands.push_back(                                                        \
-        ARM64Operand::CreateSysCR(Cm, S, getLoc(), getContext()));             \
-    Expr = MCConstantExpr::Create(op2, getContext());                          \
-    Operands.push_back(                                                        \
-        ARM64Operand::CreateImm(Expr, S, getLoc(), getContext()));             \
-  } while (0)
-
-  if (Mnemonic == "ic") {
-    if (!Op.compare_lower("ialluis")) {
-      // SYS #0, C7, C1, #0
-      SYS_ALIAS(0, 7, 1, 0);
-    } else if (!Op.compare_lower("iallu")) {
-      // SYS #0, C7, C5, #0
-      SYS_ALIAS(0, 7, 5, 0);
-    } else if (!Op.compare_lower("ivau")) {
-      // SYS #3, C7, C5, #1
-      SYS_ALIAS(3, 7, 5, 1);
-    } else {
-      return TokError("invalid operand for IC instruction");
-    }
-  } else if (Mnemonic == "dc") {
-    if (!Op.compare_lower("zva")) {
-      // SYS #3, C7, C4, #1
-      SYS_ALIAS(3, 7, 4, 1);
-    } else if (!Op.compare_lower("ivac")) {
-      // SYS #3, C7, C6, #1
-      SYS_ALIAS(0, 7, 6, 1);
-    } else if (!Op.compare_lower("isw")) {
-      // SYS #0, C7, C6, #2
-      SYS_ALIAS(0, 7, 6, 2);
-    } else if (!Op.compare_lower("cvac")) {
-      // SYS #3, C7, C10, #1
-      SYS_ALIAS(3, 7, 10, 1);
-    } else if (!Op.compare_lower("csw")) {
-      // SYS #0, C7, C10, #2
-      SYS_ALIAS(0, 7, 10, 2);
-    } else if (!Op.compare_lower("cvau")) {
-      // SYS #3, C7, C11, #1
-      SYS_ALIAS(3, 7, 11, 1);
-    } else if (!Op.compare_lower("civac")) {
-      // SYS #3, C7, C14, #1
-      SYS_ALIAS(3, 7, 14, 1);
-    } else if (!Op.compare_lower("cisw")) {
-      // SYS #0, C7, C14, #2
-      SYS_ALIAS(0, 7, 14, 2);
-    } else {
-      return TokError("invalid operand for DC instruction");
-    }
-  } else if (Mnemonic == "at") {
-    if (!Op.compare_lower("s1e1r")) {
-      // SYS #0, C7, C8, #0
-      SYS_ALIAS(0, 7, 8, 0);
-    } else if (!Op.compare_lower("s1e2r")) {
-      // SYS #4, C7, C8, #0
-      SYS_ALIAS(4, 7, 8, 0);
-    } else if (!Op.compare_lower("s1e3r")) {
-      // SYS #6, C7, C8, #0
-      SYS_ALIAS(6, 7, 8, 0);
-    } else if (!Op.compare_lower("s1e1w")) {
-      // SYS #0, C7, C8, #1
-      SYS_ALIAS(0, 7, 8, 1);
-    } else if (!Op.compare_lower("s1e2w")) {
-      // SYS #4, C7, C8, #1
-      SYS_ALIAS(4, 7, 8, 1);
-    } else if (!Op.compare_lower("s1e3w")) {
-      // SYS #6, C7, C8, #1
-      SYS_ALIAS(6, 7, 8, 1);
-    } else if (!Op.compare_lower("s1e0r")) {
-      // SYS #0, C7, C8, #3
-      SYS_ALIAS(0, 7, 8, 2);
-    } else if (!Op.compare_lower("s1e0w")) {
-      // SYS #0, C7, C8, #3
-      SYS_ALIAS(0, 7, 8, 3);
-    } else if (!Op.compare_lower("s12e1r")) {
-      // SYS #4, C7, C8, #4
-      SYS_ALIAS(4, 7, 8, 4);
-    } else if (!Op.compare_lower("s12e1w")) {
-      // SYS #4, C7, C8, #5
-      SYS_ALIAS(4, 7, 8, 5);
-    } else if (!Op.compare_lower("s12e0r")) {
-      // SYS #4, C7, C8, #6
-      SYS_ALIAS(4, 7, 8, 6);
-    } else if (!Op.compare_lower("s12e0w")) {
-      // SYS #4, C7, C8, #7
-      SYS_ALIAS(4, 7, 8, 7);
-    } else {
-      return TokError("invalid operand for AT instruction");
-    }
-  } else if (Mnemonic == "tlbi") {
-    if (!Op.compare_lower("vmalle1is")) {
-      // SYS #0, C8, C3, #0
-      SYS_ALIAS(0, 8, 3, 0);
-    } else if (!Op.compare_lower("alle2is")) {
-      // SYS #4, C8, C3, #0
-      SYS_ALIAS(4, 8, 3, 0);
-    } else if (!Op.compare_lower("alle3is")) {
-      // SYS #6, C8, C3, #0
-      SYS_ALIAS(6, 8, 3, 0);
-    } else if (!Op.compare_lower("vae1is")) {
-      // SYS #0, C8, C3, #1
-      SYS_ALIAS(0, 8, 3, 1);
-    } else if (!Op.compare_lower("vae2is")) {
-      // SYS #4, C8, C3, #1
-      SYS_ALIAS(4, 8, 3, 1);
-    } else if (!Op.compare_lower("vae3is")) {
-      // SYS #6, C8, C3, #1
-      SYS_ALIAS(6, 8, 3, 1);
-    } else if (!Op.compare_lower("aside1is")) {
-      // SYS #0, C8, C3, #2
-      SYS_ALIAS(0, 8, 3, 2);
-    } else if (!Op.compare_lower("vaae1is")) {
-      // SYS #0, C8, C3, #3
-      SYS_ALIAS(0, 8, 3, 3);
-    } else if (!Op.compare_lower("alle1is")) {
-      // SYS #4, C8, C3, #4
-      SYS_ALIAS(4, 8, 3, 4);
-    } else if (!Op.compare_lower("vale1is")) {
-      // SYS #0, C8, C3, #5
-      SYS_ALIAS(0, 8, 3, 5);
-    } else if (!Op.compare_lower("vaale1is")) {
-      // SYS #0, C8, C3, #7
-      SYS_ALIAS(0, 8, 3, 7);
-    } else if (!Op.compare_lower("vmalle1")) {
-      // SYS #0, C8, C7, #0
-      SYS_ALIAS(0, 8, 7, 0);
-    } else if (!Op.compare_lower("alle2")) {
-      // SYS #4, C8, C7, #0
-      SYS_ALIAS(4, 8, 7, 0);
-    } else if (!Op.compare_lower("vale2is")) {
-      // SYS #4, C8, C3, #5
-      SYS_ALIAS(4, 8, 3, 5);
-    } else if (!Op.compare_lower("vale3is")) {
-      // SYS #6, C8, C3, #5
-      SYS_ALIAS(6, 8, 3, 5);
-    } else if (!Op.compare_lower("alle3")) {
-      // SYS #6, C8, C7, #0
-      SYS_ALIAS(6, 8, 7, 0);
-    } else if (!Op.compare_lower("vae1")) {
-      // SYS #0, C8, C7, #1
-      SYS_ALIAS(0, 8, 7, 1);
-    } else if (!Op.compare_lower("vae2")) {
-      // SYS #4, C8, C7, #1
-      SYS_ALIAS(4, 8, 7, 1);
-    } else if (!Op.compare_lower("vae3")) {
-      // SYS #6, C8, C7, #1
-      SYS_ALIAS(6, 8, 7, 1);
-    } else if (!Op.compare_lower("aside1")) {
-      // SYS #0, C8, C7, #2
-      SYS_ALIAS(0, 8, 7, 2);
-    } else if (!Op.compare_lower("vaae1")) {
-      // SYS #0, C8, C7, #3
-      SYS_ALIAS(0, 8, 7, 3);
-    } else if (!Op.compare_lower("alle1")) {
-      // SYS #4, C8, C7, #4
-      SYS_ALIAS(4, 8, 7, 4);
-    } else if (!Op.compare_lower("vale1")) {
-      // SYS #0, C8, C7, #5
-      SYS_ALIAS(0, 8, 7, 5);
-    } else if (!Op.compare_lower("vale2")) {
-      // SYS #4, C8, C7, #5
-      SYS_ALIAS(4, 8, 7, 5);
-    } else if (!Op.compare_lower("vale3")) {
-      // SYS #6, C8, C7, #5
-      SYS_ALIAS(6, 8, 7, 5);
-    } else if (!Op.compare_lower("vaale1")) {
-      // SYS #0, C8, C7, #7
-      SYS_ALIAS(0, 8, 7, 7);
-    } else if (!Op.compare_lower("ipas2e1")) {
-      // SYS #4, C8, C4, #1
-      SYS_ALIAS(4, 8, 4, 1);
-    } else if (!Op.compare_lower("ipas2le1")) {
-      // SYS #4, C8, C4, #5
-      SYS_ALIAS(4, 8, 4, 5);
-    } else if (!Op.compare_lower("vmalls12e1")) {
-      // SYS #4, C8, C7, #6
-      SYS_ALIAS(4, 8, 7, 6);
-    } else if (!Op.compare_lower("vmalls12e1is")) {
-      // SYS #4, C8, C3, #6
-      SYS_ALIAS(4, 8, 3, 6);
-    } else {
-      return TokError("invalid operand for TLBI instruction");
-    }
-  }
-
-#undef SYS_ALIAS
-
-  Parser.Lex(); // Eat operand.
-
-  // Check for the optional register operand.
-  if (getLexer().is(AsmToken::Comma)) {
-    Parser.Lex(); // Eat comma.
-
-    if (Tok.isNot(AsmToken::Identifier) || parseRegister(Operands))
-      return TokError("expected register operand");
-  }
-
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Parser.eatToEndOfStatement();
-    return TokError("unexpected token in argument list");
-  }
-
-  Parser.Lex(); // Consume the EndOfStatement
-  return false;
-}
-
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
-  const AsmToken &Tok = Parser.getTok();
-
-  // Can be either a #imm style literal or an option name
-  if (Tok.is(AsmToken::Hash)) {
-    // Immediate operand.
-    Parser.Lex(); // Eat the '#'
-    const MCExpr *ImmVal;
-    SMLoc ExprLoc = getLoc();
-    if (getParser().parseExpression(ImmVal))
-      return MatchOperand_ParseFail;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
-    if (!MCE) {
-      Error(ExprLoc, "immediate value expected for barrier operand");
-      return MatchOperand_ParseFail;
-    }
-    if (MCE->getValue() < 0 || MCE->getValue() > 15) {
-      Error(ExprLoc, "barrier operand out of range");
-      return MatchOperand_ParseFail;
-    }
-    Operands.push_back(
-        ARM64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
-    return MatchOperand_Success;
-  }
-
-  if (Tok.isNot(AsmToken::Identifier)) {
-    TokError("invalid operand for instruction");
-    return MatchOperand_ParseFail;
-  }
-
-  unsigned Opt = StringSwitch<unsigned>(Tok.getString())
-                     .Case("oshld", ARM64SYS::OSHLD)
-                     .Case("oshst", ARM64SYS::OSHST)
-                     .Case("osh", ARM64SYS::OSH)
-                     .Case("nshld", ARM64SYS::NSHLD)
-                     .Case("nshst", ARM64SYS::NSHST)
-                     .Case("nsh", ARM64SYS::NSH)
-                     .Case("ishld", ARM64SYS::ISHLD)
-                     .Case("ishst", ARM64SYS::ISHST)
-                     .Case("ish", ARM64SYS::ISH)
-                     .Case("ld", ARM64SYS::LD)
-                     .Case("st", ARM64SYS::ST)
-                     .Case("sy", ARM64SYS::SY)
-                     .Default(ARM64SYS::InvalidBarrier);
-  if (Opt == ARM64SYS::InvalidBarrier) {
-    TokError("invalid barrier option name");
-    return MatchOperand_ParseFail;
-  }
-
-  // The only valid named option for ISB is 'sy'
-  if (Mnemonic == "isb" && Opt != ARM64SYS::SY) {
-    TokError("'sy' or #imm operand expected");
-    return MatchOperand_ParseFail;
-  }
-
-  Operands.push_back(ARM64Operand::CreateBarrier(Opt, getLoc(), getContext()));
-  Parser.Lex(); // Consume the option
-
-  return MatchOperand_Success;
-}
-
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseSystemRegister(OperandVector &Operands) {
-  const AsmToken &Tok = Parser.getTok();
-
-  // It can be specified as a symbolic name.
-  if (Tok.isNot(AsmToken::Identifier))
-    return MatchOperand_NoMatch;
-
-  auto ID = Tok.getString().lower();
-  ARM64SYS::SystemRegister Reg =
-      StringSwitch<ARM64SYS::SystemRegister>(ID)
-          .Case("spsr_el1", ARM64SYS::SPSR_svc)
-          .Case("spsr_svc", ARM64SYS::SPSR_svc)
-          .Case("elr_el1", ARM64SYS::ELR_EL1)
-          .Case("sp_el0", ARM64SYS::SP_EL0)
-          .Case("spsel", ARM64SYS::SPSel)
-          .Case("daif", ARM64SYS::DAIF)
-          .Case("currentel", ARM64SYS::CurrentEL)
-          .Case("nzcv", ARM64SYS::NZCV)
-          .Case("fpcr", ARM64SYS::FPCR)
-          .Case("fpsr", ARM64SYS::FPSR)
-          .Case("dspsr", ARM64SYS::DSPSR)
-          .Case("dlr", ARM64SYS::DLR)
-          .Case("spsr_el2", ARM64SYS::SPSR_hyp)
-          .Case("spsr_hyp", ARM64SYS::SPSR_hyp)
-          .Case("elr_el2", ARM64SYS::ELR_EL2)
-          .Case("sp_el1", ARM64SYS::SP_EL1)
-          .Case("spsr_irq", ARM64SYS::SPSR_irq)
-          .Case("spsr_abt", ARM64SYS::SPSR_abt)
-          .Case("spsr_und", ARM64SYS::SPSR_und)
-          .Case("spsr_fiq", ARM64SYS::SPSR_fiq)
-          .Case("spsr_el3", ARM64SYS::SPSR_EL3)
-          .Case("elr_el3", ARM64SYS::ELR_EL3)
-          .Case("sp_el2", ARM64SYS::SP_EL2)
-          .Case("midr_el1", ARM64SYS::MIDR_EL1)
-          .Case("ctr_el0", ARM64SYS::CTR_EL0)
-          .Case("mpidr_el1", ARM64SYS::MPIDR_EL1)
-          .Case("ecoidr_el1", ARM64SYS::ECOIDR_EL1)
-          .Case("dczid_el0", ARM64SYS::DCZID_EL0)
-          .Case("mvfr0_el1", ARM64SYS::MVFR0_EL1)
-          .Case("mvfr1_el1", ARM64SYS::MVFR1_EL1)
-          .Case("id_aa64pfr0_el1", ARM64SYS::ID_AA64PFR0_EL1)
-          .Case("id_aa64pfr1_el1", ARM64SYS::ID_AA64PFR1_EL1)
-          .Case("id_aa64dfr0_el1", ARM64SYS::ID_AA64DFR0_EL1)
-          .Case("id_aa64dfr1_el1", ARM64SYS::ID_AA64DFR1_EL1)
-          .Case("id_aa64isar0_el1", ARM64SYS::ID_AA64ISAR0_EL1)
-          .Case("id_aa64isar1_el1", ARM64SYS::ID_AA64ISAR1_EL1)
-          .Case("id_aa64mmfr0_el1", ARM64SYS::ID_AA64MMFR0_EL1)
-          .Case("id_aa64mmfr1_el1", ARM64SYS::ID_AA64MMFR1_EL1)
-          .Case("ccsidr_el1", ARM64SYS::CCSIDR_EL1)
-          .Case("clidr_el1", ARM64SYS::CLIDR_EL1)
-          .Case("aidr_el1", ARM64SYS::AIDR_EL1)
-          .Case("csselr_el1", ARM64SYS::CSSELR_EL1)
-          .Case("vpidr_el2", ARM64SYS::VPIDR_EL2)
-          .Case("vmpidr_el2", ARM64SYS::VMPIDR_EL2)
-          .Case("sctlr_el1", ARM64SYS::SCTLR_EL1)
-          .Case("sctlr_el2", ARM64SYS::SCTLR_EL2)
-          .Case("sctlr_el3", ARM64SYS::SCTLR_EL3)
-          .Case("actlr_el1", ARM64SYS::ACTLR_EL1)
-          .Case("actlr_el2", ARM64SYS::ACTLR_EL2)
-          .Case("actlr_el3", ARM64SYS::ACTLR_EL3)
-          .Case("cpacr_el1", ARM64SYS::CPACR_EL1)
-          .Case("cptr_el2", ARM64SYS::CPTR_EL2)
-          .Case("cptr_el3", ARM64SYS::CPTR_EL3)
-          .Case("scr_el3", ARM64SYS::SCR_EL3)
-          .Case("hcr_el2", ARM64SYS::HCR_EL2)
-          .Case("mdcr_el2", ARM64SYS::MDCR_EL2)
-          .Case("mdcr_el3", ARM64SYS::MDCR_EL3)
-          .Case("hstr_el2", ARM64SYS::HSTR_EL2)
-          .Case("hacr_el2", ARM64SYS::HACR_EL2)
-          .Case("ttbr0_el1", ARM64SYS::TTBR0_EL1)
-          .Case("ttbr1_el1", ARM64SYS::TTBR1_EL1)
-          .Case("ttbr0_el2", ARM64SYS::TTBR0_EL2)
-          .Case("ttbr0_el3", ARM64SYS::TTBR0_EL3)
-          .Case("vttbr_el2", ARM64SYS::VTTBR_EL2)
-          .Case("tcr_el1", ARM64SYS::TCR_EL1)
-          .Case("tcr_el2", ARM64SYS::TCR_EL2)
-          .Case("tcr_el3", ARM64SYS::TCR_EL3)
-          .Case("vtcr_el2", ARM64SYS::VTCR_EL2)
-          .Case("adfsr_el1", ARM64SYS::ADFSR_EL1)
-          .Case("aifsr_el1", ARM64SYS::AIFSR_EL1)
-          .Case("adfsr_el2", ARM64SYS::ADFSR_EL2)
-          .Case("aifsr_el2", ARM64SYS::AIFSR_EL2)
-          .Case("adfsr_el3", ARM64SYS::ADFSR_EL3)
-          .Case("aifsr_el3", ARM64SYS::AIFSR_EL3)
-          .Case("esr_el1", ARM64SYS::ESR_EL1)
-          .Case("esr_el2", ARM64SYS::ESR_EL2)
-          .Case("esr_el3", ARM64SYS::ESR_EL3)
-          .Case("far_el1", ARM64SYS::FAR_EL1)
-          .Case("far_el2", ARM64SYS::FAR_EL2)
-          .Case("far_el3", ARM64SYS::FAR_EL3)
-          .Case("hpfar_el2", ARM64SYS::HPFAR_EL2)
-          .Case("par_el1", ARM64SYS::PAR_EL1)
-          .Case("mair_el1", ARM64SYS::MAIR_EL1)
-          .Case("mair_el2", ARM64SYS::MAIR_EL2)
-          .Case("mair_el3", ARM64SYS::MAIR_EL3)
-          .Case("amair_el1", ARM64SYS::AMAIR_EL1)
-          .Case("amair_el2", ARM64SYS::AMAIR_EL2)
-          .Case("amair_el3", ARM64SYS::AMAIR_EL3)
-          .Case("vbar_el1", ARM64SYS::VBAR_EL1)
-          .Case("vbar_el2", ARM64SYS::VBAR_EL2)
-          .Case("vbar_el3", ARM64SYS::VBAR_EL3)
-          .Case("rvbar_el1", ARM64SYS::RVBAR_EL1)
-          .Case("rvbar_el2", ARM64SYS::RVBAR_EL2)
-          .Case("rvbar_el3", ARM64SYS::RVBAR_EL3)
-          .Case("isr_el1", ARM64SYS::ISR_EL1)
-          .Case("contextidr_el1", ARM64SYS::CONTEXTIDR_EL1)
-          .Case("tpidr_el0", ARM64SYS::TPIDR_EL0)
-          .Case("tpidrro_el0", ARM64SYS::TPIDRRO_EL0)
-          .Case("tpidr_el1", ARM64SYS::TPIDR_EL1)
-          .Case("tpidr_el2", ARM64SYS::TPIDR_EL2)
-          .Case("tpidr_el3", ARM64SYS::TPIDR_EL3)
-          .Case("teecr32_el1", ARM64SYS::TEECR32_EL1)
-          .Case("cntfrq_el0", ARM64SYS::CNTFRQ_EL0)
-          .Case("cntpct_el0", ARM64SYS::CNTPCT_EL0)
-          .Case("cntvct_el0", ARM64SYS::CNTVCT_EL0)
-          .Case("cntvoff_el2", ARM64SYS::CNTVOFF_EL2)
-          .Case("cntkctl_el1", ARM64SYS::CNTKCTL_EL1)
-          .Case("cnthctl_el2", ARM64SYS::CNTHCTL_EL2)
-          .Case("cntp_tval_el0", ARM64SYS::CNTP_TVAL_EL0)
-          .Case("cntp_ctl_el0", ARM64SYS::CNTP_CTL_EL0)
-          .Case("cntp_cval_el0", ARM64SYS::CNTP_CVAL_EL0)
-          .Case("cntv_tval_el0", ARM64SYS::CNTV_TVAL_EL0)
-          .Case("cntv_ctl_el0", ARM64SYS::CNTV_CTL_EL0)
-          .Case("cntv_cval_el0", ARM64SYS::CNTV_CVAL_EL0)
-          .Case("cnthp_tval_el2", ARM64SYS::CNTHP_TVAL_EL2)
-          .Case("cnthp_ctl_el2", ARM64SYS::CNTHP_CTL_EL2)
-          .Case("cnthp_cval_el2", ARM64SYS::CNTHP_CVAL_EL2)
-          .Case("cntps_tval_el1", ARM64SYS::CNTPS_TVAL_EL1)
-          .Case("cntps_ctl_el1", ARM64SYS::CNTPS_CTL_EL1)
-          .Case("cntps_cval_el1", ARM64SYS::CNTPS_CVAL_EL1)
-          .Case("dacr32_el2", ARM64SYS::DACR32_EL2)
-          .Case("ifsr32_el2", ARM64SYS::IFSR32_EL2)
-          .Case("teehbr32_el1", ARM64SYS::TEEHBR32_EL1)
-          .Case("sder32_el3", ARM64SYS::SDER32_EL3)
-          .Case("fpexc32_el2", ARM64SYS::FPEXC32_EL2)
-          .Case("current_el", ARM64SYS::CurrentEL)
-          .Case("pmevcntr0_el0", ARM64SYS::PMEVCNTR0_EL0)
-          .Case("pmevcntr1_el0", ARM64SYS::PMEVCNTR1_EL0)
-          .Case("pmevcntr2_el0", ARM64SYS::PMEVCNTR2_EL0)
-          .Case("pmevcntr3_el0", ARM64SYS::PMEVCNTR3_EL0)
-          .Case("pmevcntr4_el0", ARM64SYS::PMEVCNTR4_EL0)
-          .Case("pmevcntr5_el0", ARM64SYS::PMEVCNTR5_EL0)
-          .Case("pmevcntr6_el0", ARM64SYS::PMEVCNTR6_EL0)
-          .Case("pmevcntr7_el0", ARM64SYS::PMEVCNTR7_EL0)
-          .Case("pmevcntr8_el0", ARM64SYS::PMEVCNTR8_EL0)
-          .Case("pmevcntr9_el0", ARM64SYS::PMEVCNTR9_EL0)
-          .Case("pmevcntr10_el0", ARM64SYS::PMEVCNTR10_EL0)
-          .Case("pmevcntr11_el0", ARM64SYS::PMEVCNTR11_EL0)
-          .Case("pmevcntr12_el0", ARM64SYS::PMEVCNTR12_EL0)
-          .Case("pmevcntr13_el0", ARM64SYS::PMEVCNTR13_EL0)
-          .Case("pmevcntr14_el0", ARM64SYS::PMEVCNTR14_EL0)
-          .Case("pmevcntr15_el0", ARM64SYS::PMEVCNTR15_EL0)
-          .Case("pmevcntr16_el0", ARM64SYS::PMEVCNTR16_EL0)
-          .Case("pmevcntr17_el0", ARM64SYS::PMEVCNTR17_EL0)
-          .Case("pmevcntr18_el0", ARM64SYS::PMEVCNTR18_EL0)
-          .Case("pmevcntr19_el0", ARM64SYS::PMEVCNTR19_EL0)
-          .Case("pmevcntr20_el0", ARM64SYS::PMEVCNTR20_EL0)
-          .Case("pmevcntr21_el0", ARM64SYS::PMEVCNTR21_EL0)
-          .Case("pmevcntr22_el0", ARM64SYS::PMEVCNTR22_EL0)
-          .Case("pmevcntr23_el0", ARM64SYS::PMEVCNTR23_EL0)
-          .Case("pmevcntr24_el0", ARM64SYS::PMEVCNTR24_EL0)
-          .Case("pmevcntr25_el0", ARM64SYS::PMEVCNTR25_EL0)
-          .Case("pmevcntr26_el0", ARM64SYS::PMEVCNTR26_EL0)
-          .Case("pmevcntr27_el0", ARM64SYS::PMEVCNTR27_EL0)
-          .Case("pmevcntr28_el0", ARM64SYS::PMEVCNTR28_EL0)
-          .Case("pmevcntr29_el0", ARM64SYS::PMEVCNTR29_EL0)
-          .Case("pmevcntr30_el0", ARM64SYS::PMEVCNTR30_EL0)
-          .Case("pmevtyper0_el0", ARM64SYS::PMEVTYPER0_EL0)
-          .Case("pmevtyper1_el0", ARM64SYS::PMEVTYPER1_EL0)
-          .Case("pmevtyper2_el0", ARM64SYS::PMEVTYPER2_EL0)
-          .Case("pmevtyper3_el0", ARM64SYS::PMEVTYPER3_EL0)
-          .Case("pmevtyper4_el0", ARM64SYS::PMEVTYPER4_EL0)
-          .Case("pmevtyper5_el0", ARM64SYS::PMEVTYPER5_EL0)
-          .Case("pmevtyper6_el0", ARM64SYS::PMEVTYPER6_EL0)
-          .Case("pmevtyper7_el0", ARM64SYS::PMEVTYPER7_EL0)
-          .Case("pmevtyper8_el0", ARM64SYS::PMEVTYPER8_EL0)
-          .Case("pmevtyper9_el0", ARM64SYS::PMEVTYPER9_EL0)
-          .Case("pmevtyper10_el0", ARM64SYS::PMEVTYPER10_EL0)
-          .Case("pmevtyper11_el0", ARM64SYS::PMEVTYPER11_EL0)
-          .Case("pmevtyper12_el0", ARM64SYS::PMEVTYPER12_EL0)
-          .Case("pmevtyper13_el0", ARM64SYS::PMEVTYPER13_EL0)
-          .Case("pmevtyper14_el0", ARM64SYS::PMEVTYPER14_EL0)
-          .Case("pmevtyper15_el0", ARM64SYS::PMEVTYPER15_EL0)
-          .Case("pmevtyper16_el0", ARM64SYS::PMEVTYPER16_EL0)
-          .Case("pmevtyper17_el0", ARM64SYS::PMEVTYPER17_EL0)
-          .Case("pmevtyper18_el0", ARM64SYS::PMEVTYPER18_EL0)
-          .Case("pmevtyper19_el0", ARM64SYS::PMEVTYPER19_EL0)
-          .Case("pmevtyper20_el0", ARM64SYS::PMEVTYPER20_EL0)
-          .Case("pmevtyper21_el0", ARM64SYS::PMEVTYPER21_EL0)
-          .Case("pmevtyper22_el0", ARM64SYS::PMEVTYPER22_EL0)
-          .Case("pmevtyper23_el0", ARM64SYS::PMEVTYPER23_EL0)
-          .Case("pmevtyper24_el0", ARM64SYS::PMEVTYPER24_EL0)
-          .Case("pmevtyper25_el0", ARM64SYS::PMEVTYPER25_EL0)
-          .Case("pmevtyper26_el0", ARM64SYS::PMEVTYPER26_EL0)
-          .Case("pmevtyper27_el0", ARM64SYS::PMEVTYPER27_EL0)
-          .Case("pmevtyper28_el0", ARM64SYS::PMEVTYPER28_EL0)
-          .Case("pmevtyper29_el0", ARM64SYS::PMEVTYPER29_EL0)
-          .Case("pmevtyper30_el0", ARM64SYS::PMEVTYPER30_EL0)
-          .Case("pmccfiltr_el0", ARM64SYS::PMCCFILTR_EL0)
-          .Case("rmr_el3", ARM64SYS::RMR_EL3)
-          .Case("rmr_el2", ARM64SYS::RMR_EL2)
-          .Case("rmr_el1", ARM64SYS::RMR_EL1)
-          .Case("cpm_ioacc_ctl_el3", ARM64SYS::CPM_IOACC_CTL_EL3)
-          .Case("mdccsr_el0", ARM64SYS::MDCCSR_EL0)
-          .Case("mdccint_el1", ARM64SYS::MDCCINT_EL1)
-          .Case("dbgdtr_el0", ARM64SYS::DBGDTR_EL0)
-          .Case("dbgdtrrx_el0", ARM64SYS::DBGDTRRX_EL0)
-          .Case("dbgdtrtx_el0", ARM64SYS::DBGDTRTX_EL0)
-          .Case("dbgvcr32_el2", ARM64SYS::DBGVCR32_EL2)
-          .Case("osdtrrx_el1", ARM64SYS::OSDTRRX_EL1)
-          .Case("mdscr_el1", ARM64SYS::MDSCR_EL1)
-          .Case("osdtrtx_el1", ARM64SYS::OSDTRTX_EL1)
-          .Case("oseccr_el11", ARM64SYS::OSECCR_EL11)
-          .Case("dbgbvr0_el1", ARM64SYS::DBGBVR0_EL1)
-          .Case("dbgbvr1_el1", ARM64SYS::DBGBVR1_EL1)
-          .Case("dbgbvr2_el1", ARM64SYS::DBGBVR2_EL1)
-          .Case("dbgbvr3_el1", ARM64SYS::DBGBVR3_EL1)
-          .Case("dbgbvr4_el1", ARM64SYS::DBGBVR4_EL1)
-          .Case("dbgbvr5_el1", ARM64SYS::DBGBVR5_EL1)
-          .Case("dbgbvr6_el1", ARM64SYS::DBGBVR6_EL1)
-          .Case("dbgbvr7_el1", ARM64SYS::DBGBVR7_EL1)
-          .Case("dbgbvr8_el1", ARM64SYS::DBGBVR8_EL1)
-          .Case("dbgbvr9_el1", ARM64SYS::DBGBVR9_EL1)
-          .Case("dbgbvr10_el1", ARM64SYS::DBGBVR10_EL1)
-          .Case("dbgbvr11_el1", ARM64SYS::DBGBVR11_EL1)
-          .Case("dbgbvr12_el1", ARM64SYS::DBGBVR12_EL1)
-          .Case("dbgbvr13_el1", ARM64SYS::DBGBVR13_EL1)
-          .Case("dbgbvr14_el1", ARM64SYS::DBGBVR14_EL1)
-          .Case("dbgbvr15_el1", ARM64SYS::DBGBVR15_EL1)
-          .Case("dbgbcr0_el1", ARM64SYS::DBGBCR0_EL1)
-          .Case("dbgbcr1_el1", ARM64SYS::DBGBCR1_EL1)
-          .Case("dbgbcr2_el1", ARM64SYS::DBGBCR2_EL1)
-          .Case("dbgbcr3_el1", ARM64SYS::DBGBCR3_EL1)
-          .Case("dbgbcr4_el1", ARM64SYS::DBGBCR4_EL1)
-          .Case("dbgbcr5_el1", ARM64SYS::DBGBCR5_EL1)
-          .Case("dbgbcr6_el1", ARM64SYS::DBGBCR6_EL1)
-          .Case("dbgbcr7_el1", ARM64SYS::DBGBCR7_EL1)
-          .Case("dbgbcr8_el1", ARM64SYS::DBGBCR8_EL1)
-          .Case("dbgbcr9_el1", ARM64SYS::DBGBCR9_EL1)
-          .Case("dbgbcr10_el1", ARM64SYS::DBGBCR10_EL1)
-          .Case("dbgbcr11_el1", ARM64SYS::DBGBCR11_EL1)
-          .Case("dbgbcr12_el1", ARM64SYS::DBGBCR12_EL1)
-          .Case("dbgbcr13_el1", ARM64SYS::DBGBCR13_EL1)
-          .Case("dbgbcr14_el1", ARM64SYS::DBGBCR14_EL1)
-          .Case("dbgbcr15_el1", ARM64SYS::DBGBCR15_EL1)
-          .Case("dbgwvr0_el1", ARM64SYS::DBGWVR0_EL1)
-          .Case("dbgwvr1_el1", ARM64SYS::DBGWVR1_EL1)
-          .Case("dbgwvr2_el1", ARM64SYS::DBGWVR2_EL1)
-          .Case("dbgwvr3_el1", ARM64SYS::DBGWVR3_EL1)
-          .Case("dbgwvr4_el1", ARM64SYS::DBGWVR4_EL1)
-          .Case("dbgwvr5_el1", ARM64SYS::DBGWVR5_EL1)
-          .Case("dbgwvr6_el1", ARM64SYS::DBGWVR6_EL1)
-          .Case("dbgwvr7_el1", ARM64SYS::DBGWVR7_EL1)
-          .Case("dbgwvr8_el1", ARM64SYS::DBGWVR8_EL1)
-          .Case("dbgwvr9_el1", ARM64SYS::DBGWVR9_EL1)
-          .Case("dbgwvr10_el1", ARM64SYS::DBGWVR10_EL1)
-          .Case("dbgwvr11_el1", ARM64SYS::DBGWVR11_EL1)
-          .Case("dbgwvr12_el1", ARM64SYS::DBGWVR12_EL1)
-          .Case("dbgwvr13_el1", ARM64SYS::DBGWVR13_EL1)
-          .Case("dbgwvr14_el1", ARM64SYS::DBGWVR14_EL1)
-          .Case("dbgwvr15_el1", ARM64SYS::DBGWVR15_EL1)
-          .Case("dbgwcr0_el1", ARM64SYS::DBGWCR0_EL1)
-          .Case("dbgwcr1_el1", ARM64SYS::DBGWCR1_EL1)
-          .Case("dbgwcr2_el1", ARM64SYS::DBGWCR2_EL1)
-          .Case("dbgwcr3_el1", ARM64SYS::DBGWCR3_EL1)
-          .Case("dbgwcr4_el1", ARM64SYS::DBGWCR4_EL1)
-          .Case("dbgwcr5_el1", ARM64SYS::DBGWCR5_EL1)
-          .Case("dbgwcr6_el1", ARM64SYS::DBGWCR6_EL1)
-          .Case("dbgwcr7_el1", ARM64SYS::DBGWCR7_EL1)
-          .Case("dbgwcr8_el1", ARM64SYS::DBGWCR8_EL1)
-          .Case("dbgwcr9_el1", ARM64SYS::DBGWCR9_EL1)
-          .Case("dbgwcr10_el1", ARM64SYS::DBGWCR10_EL1)
-          .Case("dbgwcr11_el1", ARM64SYS::DBGWCR11_EL1)
-          .Case("dbgwcr12_el1", ARM64SYS::DBGWCR12_EL1)
-          .Case("dbgwcr13_el1", ARM64SYS::DBGWCR13_EL1)
-          .Case("dbgwcr14_el1", ARM64SYS::DBGWCR14_EL1)
-          .Case("dbgwcr15_el1", ARM64SYS::DBGWCR15_EL1)
-          .Case("mdrar_el1", ARM64SYS::MDRAR_EL1)
-          .Case("oslar_el1", ARM64SYS::OSLAR_EL1)
-          .Case("oslsr_el1", ARM64SYS::OSLSR_EL1)
-          .Case("osdlr_el1", ARM64SYS::OSDLR_EL1)
-          .Case("dbgprcr_el1", ARM64SYS::DBGPRCR_EL1)
-          .Case("dbgclaimset_el1", ARM64SYS::DBGCLAIMSET_EL1)
-          .Case("dbgclaimclr_el1", ARM64SYS::DBGCLAIMCLR_EL1)
-          .Case("dbgauthstatus_el1", ARM64SYS::DBGAUTHSTATUS_EL1)
-          .Case("dbgdevid2", ARM64SYS::DBGDEVID2)
-          .Case("dbgdevid1", ARM64SYS::DBGDEVID1)
-          .Case("dbgdevid0", ARM64SYS::DBGDEVID0)
-          .Case("id_pfr0_el1", ARM64SYS::ID_PFR0_EL1)
-          .Case("id_pfr1_el1", ARM64SYS::ID_PFR1_EL1)
-          .Case("id_dfr0_el1", ARM64SYS::ID_DFR0_EL1)
-          .Case("id_afr0_el1", ARM64SYS::ID_AFR0_EL1)
-          .Case("id_isar0_el1", ARM64SYS::ID_ISAR0_EL1)
-          .Case("id_isar1_el1", ARM64SYS::ID_ISAR1_EL1)
-          .Case("id_isar2_el1", ARM64SYS::ID_ISAR2_EL1)
-          .Case("id_isar3_el1", ARM64SYS::ID_ISAR3_EL1)
-          .Case("id_isar4_el1", ARM64SYS::ID_ISAR4_EL1)
-          .Case("id_isar5_el1", ARM64SYS::ID_ISAR5_EL1)
-          .Case("afsr1_el1", ARM64SYS::AFSR1_EL1)
-          .Case("afsr0_el1", ARM64SYS::AFSR0_EL1)
-          .Case("revidr_el1", ARM64SYS::REVIDR_EL1)
-          .Default(ARM64SYS::InvalidSystemReg);
-  if (Reg != ARM64SYS::InvalidSystemReg) {
-    // We matched a reg name, so create the operand.
-    Operands.push_back(
-        ARM64Operand::CreateSystemRegister(Reg, getLoc(), getContext()));
-    Parser.Lex(); // Consume the register name.
-    return MatchOperand_Success;
-  }
-
-  // Or we may have an identifier that encodes the sub-operands.
-  // For example, s3_2_c15_c0_0.
-  unsigned op0, op1, CRn, CRm, op2;
-  std::string Desc = ID;
-  if (std::sscanf(Desc.c_str(), "s%u_%u_c%u_c%u_%u", &op0, &op1, &CRn, &CRm,
-                  &op2) != 5)
-    return MatchOperand_NoMatch;
-  if ((op0 != 2 && op0 != 3) || op1 > 7 || CRn > 15 || CRm > 15 || op2 > 7)
-    return MatchOperand_NoMatch;
-
-  unsigned Val = op0 << 14 | op1 << 11 | CRn << 7 | CRm << 3 | op2;
-  Operands.push_back(
-      ARM64Operand::CreateSystemRegister(Val, getLoc(), getContext()));
-  Parser.Lex(); // Consume the register name.
-
-  return MatchOperand_Success;
-}
-
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseCPSRField(OperandVector &Operands) {
-  const AsmToken &Tok = Parser.getTok();
-
-  if (Tok.isNot(AsmToken::Identifier))
-    return MatchOperand_NoMatch;
-
-  ARM64SYS::CPSRField Field =
-      StringSwitch<ARM64SYS::CPSRField>(Tok.getString().lower())
-          .Case("spsel", ARM64SYS::cpsr_SPSel)
-          .Case("daifset", ARM64SYS::cpsr_DAIFSet)
-          .Case("daifclr", ARM64SYS::cpsr_DAIFClr)
-          .Default(ARM64SYS::InvalidCPSRField);
-  if (Field == ARM64SYS::InvalidCPSRField)
-    return MatchOperand_NoMatch;
-  Operands.push_back(
-      ARM64Operand::CreateCPSRField(Field, getLoc(), getContext()));
-  Parser.Lex(); // Consume the register name.
-
-  return MatchOperand_Success;
-}
-
-/// tryParseVectorRegister - Parse a vector register operand.
-bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
-  if (Parser.getTok().isNot(AsmToken::Identifier))
-    return true;
-
-  SMLoc S = getLoc();
-  // Check for a vector register specifier first.
-  StringRef Kind;
-  int64_t Reg = tryMatchVectorRegister(Kind);
-  if (Reg == -1)
-    return true;
-  Operands.push_back(
-      ARM64Operand::CreateReg(Reg, true, S, getLoc(), getContext()));
-  // If there was an explicit qualifier, that goes on as a literal text
-  // operand.
-  if (!Kind.empty())
-    Operands.push_back(ARM64Operand::CreateToken(Kind, false, S, getContext()));
-
-  // If there is an index specifier following the register, parse that too.
-  if (Parser.getTok().is(AsmToken::LBrac)) {
-    SMLoc SIdx = getLoc();
-    Parser.Lex(); // Eat left bracket token.
-
-    const MCExpr *ImmVal;
-    if (getParser().parseExpression(ImmVal))
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
-    if (!MCE) {
-      TokError("immediate value expected for vector index");
-      return false;
-    }
-
-    SMLoc E = getLoc();
-    if (Parser.getTok().isNot(AsmToken::RBrac)) {
-      Error(E, "']' expected");
-      return false;
-    }
-
-    Parser.Lex(); // Eat right bracket token.
-
-    Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E,
-                                                       getContext()));
-  }
-
-  return false;
-}
-
-/// parseRegister - Parse a non-vector register operand.
-bool ARM64AsmParser::parseRegister(OperandVector &Operands) {
-  SMLoc S = getLoc();
-  // Try for a vector register.
-  if (!tryParseVectorRegister(Operands))
-    return false;
-
-  // Try for a scalar register.
-  int64_t Reg = tryParseRegister();
-  if (Reg == -1)
-    return true;
-  Operands.push_back(
-      ARM64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
-
-  // A small number of instructions (FMOVXDhighr, for example) have "[1]"
-  // as a string token in the instruction itself.
-  if (getLexer().getKind() == AsmToken::LBrac) {
-    SMLoc LBracS = getLoc();
-    Parser.Lex();
-    const AsmToken &Tok = Parser.getTok();
-    if (Tok.is(AsmToken::Integer)) {
-      SMLoc IntS = getLoc();
-      int64_t Val = Tok.getIntVal();
-      if (Val == 1) {
-        Parser.Lex();
-        if (getLexer().getKind() == AsmToken::RBrac) {
-          SMLoc RBracS = getLoc();
-          Parser.Lex();
-          Operands.push_back(
-              ARM64Operand::CreateToken("[", false, LBracS, getContext()));
-          Operands.push_back(
-              ARM64Operand::CreateToken("1", false, IntS, getContext()));
-          Operands.push_back(
-              ARM64Operand::CreateToken("]", false, RBracS, getContext()));
-          return false;
-        }
-      }
-    }
-  }
-
-  return false;
-}
-
-/// tryParseNoIndexMemory - Custom parser method for memory operands that
-///                         do not allow base regisrer writeback modes,
-///                         or those that handle writeback separately from
-///                         the memory operand (like the AdvSIMD ldX/stX
-///                         instructions.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseNoIndexMemory(OperandVector &Operands) {
-  if (Parser.getTok().isNot(AsmToken::LBrac))
-    return MatchOperand_NoMatch;
-  SMLoc S = getLoc();
-  Parser.Lex(); // Eat left bracket token.
-
-  const AsmToken &BaseRegTok = Parser.getTok();
-  if (BaseRegTok.isNot(AsmToken::Identifier)) {
-    Error(BaseRegTok.getLoc(), "register expected");
-    return MatchOperand_ParseFail;
-  }
-
-  int64_t Reg = tryParseRegister();
-  if (Reg == -1) {
-    Error(BaseRegTok.getLoc(), "register expected");
-    return MatchOperand_ParseFail;
-  }
-
-  SMLoc E = getLoc();
-  if (Parser.getTok().isNot(AsmToken::RBrac)) {
-    Error(E, "']' expected");
-    return MatchOperand_ParseFail;
-  }
-
-  Parser.Lex(); // Eat right bracket token.
-
-  Operands.push_back(ARM64Operand::CreateMem(Reg, 0, S, E, E, getContext()));
-  return MatchOperand_Success;
-}
-
-/// parseMemory - Parse a memory operand for a basic load/store instruction.
-bool ARM64AsmParser::parseMemory(OperandVector &Operands) {
-  assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket");
-  SMLoc S = getLoc();
-  Parser.Lex(); // Eat left bracket token.
-
-  const AsmToken &BaseRegTok = Parser.getTok();
-  if (BaseRegTok.isNot(AsmToken::Identifier))
-    return Error(BaseRegTok.getLoc(), "register expected");
-
-  int64_t Reg = tryParseRegister();
-  if (Reg == -1)
-    return Error(BaseRegTok.getLoc(), "register expected");
-
-  // If there is an offset expression, parse it.
-  const MCExpr *OffsetExpr = 0;
-  SMLoc OffsetLoc;
-  if (Parser.getTok().is(AsmToken::Comma)) {
-    Parser.Lex(); // Eat the comma.
-    OffsetLoc = getLoc();
-
-    // Register offset
-    const AsmToken &OffsetRegTok = Parser.getTok();
-    int Reg2 = OffsetRegTok.is(AsmToken::Identifier) ? tryParseRegister() : -1;
-    if (Reg2 != -1) {
-      // Default shift is LSL, with an omitted shift.  We use the third bit of
-      // the extend value to indicate presence/omission of the immediate offset.
-      ARM64_AM::ExtendType ExtOp = ARM64_AM::UXTX;
-      int64_t ShiftVal = 0;
-      bool ExplicitShift = false;
-
-      if (Parser.getTok().is(AsmToken::Comma)) {
-        // Embedded extend operand.
-        Parser.Lex(); // Eat the comma
-
-        SMLoc ExtLoc = getLoc();
-        const AsmToken &Tok = Parser.getTok();
-        ExtOp = StringSwitch<ARM64_AM::ExtendType>(Tok.getString())
-                    .Case("uxtw", ARM64_AM::UXTW)
-                    .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX
-                    .Case("sxtw", ARM64_AM::SXTW)
-                    .Case("sxtx", ARM64_AM::SXTX)
-                    .Case("UXTW", ARM64_AM::UXTW)
-                    .Case("LSL", ARM64_AM::UXTX) // Alias for UXTX
-                    .Case("SXTW", ARM64_AM::SXTW)
-                    .Case("SXTX", ARM64_AM::SXTX)
-                    .Default(ARM64_AM::InvalidExtend);
-        if (ExtOp == ARM64_AM::InvalidExtend)
-          return Error(ExtLoc, "expected valid extend operation");
-
-        Parser.Lex(); // Eat the extend op.
-
-        if (getLexer().is(AsmToken::RBrac)) {
-          // No immediate operand.
-          if (ExtOp == ARM64_AM::UXTX)
-            return Error(ExtLoc, "LSL extend requires immediate operand");
-        } else if (getLexer().is(AsmToken::Hash)) {
-          // Immediate operand.
-          Parser.Lex(); // Eat the '#'
-          const MCExpr *ImmVal;
-          SMLoc ExprLoc = getLoc();
-          if (getParser().parseExpression(ImmVal))
-            return true;
-          const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
-          if (!MCE)
-            return TokError("immediate value expected for extend operand");
-
-          ExplicitShift = true;
-          ShiftVal = MCE->getValue();
-          if (ShiftVal < 0 || ShiftVal > 4)
-            return Error(ExprLoc, "immediate operand out of range");
-        } else
-          return Error(getLoc(), "expected immediate operand");
-      }
-
-      if (Parser.getTok().isNot(AsmToken::RBrac))
-        return Error(getLoc(), "']' expected");
-
-      Parser.Lex(); // Eat right bracket token.
-
-      SMLoc E = getLoc();
-      Operands.push_back(ARM64Operand::CreateRegOffsetMem(
-          Reg, Reg2, ExtOp, ShiftVal, ExplicitShift, S, E, getContext()));
-      return false;
-
-      // Immediate expressions.
-    } else if (Parser.getTok().is(AsmToken::Hash)) {
-      Parser.Lex(); // Eat hash token.
-
-      if (parseSymbolicImmVal(OffsetExpr))
-        return true;
-    } else {
-      // FIXME: We really should make sure that we're dealing with a LDR/STR
-      // instruction that can legally have a symbolic expression here.
-      // Symbol reference.
-      if (Parser.getTok().isNot(AsmToken::Identifier) &&
-          Parser.getTok().isNot(AsmToken::String))
-        return Error(getLoc(), "identifier or immediate expression expected");
-      if (getParser().parseExpression(OffsetExpr))
-        return true;
-      // If this is a plain ref, Make sure a legal variant kind was specified.
-      // Otherwise, it's a more complicated expression and we have to just
-      // assume it's OK and let the relocation stuff puke if it's not.
-      ARM64MCExpr::VariantKind ELFRefKind;
-      MCSymbolRefExpr::VariantKind DarwinRefKind;
-      const MCConstantExpr *Addend;
-      if (classifySymbolRef(OffsetExpr, ELFRefKind, DarwinRefKind, Addend) &&
-          Addend == 0) {
-        assert(ELFRefKind == ARM64MCExpr::VK_INVALID &&
-               "ELF symbol modifiers not supported here yet");
-
-        switch (DarwinRefKind) {
-        default:
-          return Error(getLoc(), "expected @pageoff or @gotpageoff modifier");
-        case MCSymbolRefExpr::VK_GOTPAGEOFF:
-        case MCSymbolRefExpr::VK_PAGEOFF:
-        case MCSymbolRefExpr::VK_TLVPPAGEOFF:
-          // These are what we're expecting.
-          break;
-        }
-      }
-    }
-  }
-
-  SMLoc E = getLoc();
-  if (Parser.getTok().isNot(AsmToken::RBrac))
-    return Error(E, "']' expected");
-
-  Parser.Lex(); // Eat right bracket token.
-
-  // Create the memory operand.
-  Operands.push_back(
-      ARM64Operand::CreateMem(Reg, OffsetExpr, S, E, OffsetLoc, getContext()));
-
-  // Check for a '!', indicating pre-indexed addressing with writeback.
-  if (Parser.getTok().is(AsmToken::Exclaim)) {
-    // There needs to have been an immediate or wback doesn't make sense.
-    if (!OffsetExpr)
-      return Error(E, "missing offset for pre-indexed addressing");
-    // Pre-indexed with writeback must have a constant expression for the
-    // offset. FIXME: Theoretically, we'd like to allow fixups so long
-    // as they don't require a relocation.
-    if (!isa<MCConstantExpr>(OffsetExpr))
-      return Error(OffsetLoc, "constant immediate expression expected");
-
-    // Create the Token operand for the '!'.
-    Operands.push_back(ARM64Operand::CreateToken(
-        "!", false, Parser.getTok().getLoc(), getContext()));
-    Parser.Lex(); // Eat the '!' token.
-  }
-
-  return false;
-}
-
-bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
-  bool HasELFModifier = false;
-  ARM64MCExpr::VariantKind RefKind;
-
-  if (Parser.getTok().is(AsmToken::Colon)) {
-    Parser.Lex(); // Eat ':"
-    HasELFModifier = true;
-
-    if (Parser.getTok().isNot(AsmToken::Identifier)) {
-      Error(Parser.getTok().getLoc(),
-            "expect relocation specifier in operand after ':'");
-      return true;
-    }
-
-    std::string LowerCase = Parser.getTok().getIdentifier().lower();
-    RefKind = StringSwitch<ARM64MCExpr::VariantKind>(LowerCase)
-                  .Case("lo12", ARM64MCExpr::VK_LO12)
-                  .Case("abs_g3", ARM64MCExpr::VK_ABS_G3)
-                  .Case("abs_g2", ARM64MCExpr::VK_ABS_G2)
-                  .Case("abs_g2_nc", ARM64MCExpr::VK_ABS_G2_NC)
-                  .Case("abs_g1", ARM64MCExpr::VK_ABS_G1)
-                  .Case("abs_g1_nc", ARM64MCExpr::VK_ABS_G1_NC)
-                  .Case("abs_g0", ARM64MCExpr::VK_ABS_G0)
-                  .Case("abs_g0_nc", ARM64MCExpr::VK_ABS_G0_NC)
-                  .Case("dtprel_g2", ARM64MCExpr::VK_DTPREL_G2)
-                  .Case("dtprel_g1", ARM64MCExpr::VK_DTPREL_G1)
-                  .Case("dtprel_g1_nc", ARM64MCExpr::VK_DTPREL_G1_NC)
-                  .Case("dtprel_g0", ARM64MCExpr::VK_DTPREL_G0)
-                  .Case("dtprel_g0_nc", ARM64MCExpr::VK_DTPREL_G0_NC)
-                  .Case("dtprel_lo12", ARM64MCExpr::VK_DTPREL_LO12)
-                  .Case("dtprel_lo12_nc", ARM64MCExpr::VK_DTPREL_LO12_NC)
-                  .Case("tprel_g2", ARM64MCExpr::VK_TPREL_G2)
-                  .Case("tprel_g1", ARM64MCExpr::VK_TPREL_G1)
-                  .Case("tprel_g1_nc", ARM64MCExpr::VK_TPREL_G1_NC)
-                  .Case("tprel_g0", ARM64MCExpr::VK_TPREL_G0)
-                  .Case("tprel_g0_nc", ARM64MCExpr::VK_TPREL_G0_NC)
-                  .Case("tprel_lo12", ARM64MCExpr::VK_TPREL_LO12)
-                  .Case("tprel_lo12_nc", ARM64MCExpr::VK_TPREL_LO12_NC)
-                  .Case("tlsdesc_lo12", ARM64MCExpr::VK_TLSDESC_LO12)
-                  .Case("got", ARM64MCExpr::VK_GOT_PAGE)
-                  .Case("got_lo12", ARM64MCExpr::VK_GOT_LO12)
-                  .Case("gottprel", ARM64MCExpr::VK_GOTTPREL_PAGE)
-                  .Case("gottprel_lo12", ARM64MCExpr::VK_GOTTPREL_LO12_NC)
-                  .Case("gottprel_g1", ARM64MCExpr::VK_GOTTPREL_G1)
-                  .Case("gottprel_g0_nc", ARM64MCExpr::VK_GOTTPREL_G0_NC)
-                  .Case("tlsdesc", ARM64MCExpr::VK_TLSDESC_PAGE)
-                  .Default(ARM64MCExpr::VK_INVALID);
-
-    if (RefKind == ARM64MCExpr::VK_INVALID) {
-      Error(Parser.getTok().getLoc(),
-            "expect relocation specifier in operand after ':'");
-      return true;
-    }
-
-    Parser.Lex(); // Eat identifier
-
-    if (Parser.getTok().isNot(AsmToken::Colon)) {
-      Error(Parser.getTok().getLoc(), "expect ':' after relocation specifier");
-      return true;
-    }
-    Parser.Lex(); // Eat ':'
-  }
-
-  if (getParser().parseExpression(ImmVal))
-    return true;
-
-  if (HasELFModifier)
-    ImmVal = ARM64MCExpr::Create(ImmVal, RefKind, getContext());
-
-  return false;
-}
-
-/// parseVectorList - Parse a vector list operand for AdvSIMD instructions.
-bool ARM64AsmParser::parseVectorList(OperandVector &Operands) {
-  assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket");
-  SMLoc S = getLoc();
-  Parser.Lex(); // Eat left bracket token.
-  StringRef Kind;
-  int64_t FirstReg = tryMatchVectorRegister(Kind);
-  if (FirstReg == -1)
-    return Error(getLoc(), "vector register expected");
-  int64_t PrevReg = FirstReg;
-  unsigned Count = 1;
-  while (Parser.getTok().isNot(AsmToken::RCurly)) {
-    if (Parser.getTok().is(AsmToken::EndOfStatement))
-      Error(getLoc(), "'}' expected");
-
-    if (Parser.getTok().isNot(AsmToken::Comma))
-      return Error(getLoc(), "',' expected");
-    Parser.Lex(); // Eat the comma token.
-
-    SMLoc Loc = getLoc();
-    StringRef NextKind;
-    int64_t Reg = tryMatchVectorRegister(NextKind);
-    if (Reg == -1)
-      return Error(Loc, "vector register expected");
-    // Any Kind suffices must match on all regs in the list.
-    if (Kind != NextKind)
-      return Error(Loc, "mismatched register size suffix");
-
-    // Registers must be incremental (with wraparound at 31)
-    if (getContext().getRegisterInfo()->getEncodingValue(Reg) !=
-        (getContext().getRegisterInfo()->getEncodingValue(PrevReg) + 1) % 32)
-      return Error(Loc, "registers must be sequential");
-
-    PrevReg = Reg;
-    ++Count;
-  }
-  Parser.Lex(); // Eat the '}' token.
-
-  unsigned NumElements = 0;
-  char ElementKind = 0;
-  if (!Kind.empty())
-    parseValidVectorKind(Kind, NumElements, ElementKind);
-
-  Operands.push_back(ARM64Operand::CreateVectorList(
-      FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext()));
-
-  // If there is an index specifier following the list, parse that too.
-  if (Parser.getTok().is(AsmToken::LBrac)) {
-    SMLoc SIdx = getLoc();
-    Parser.Lex(); // Eat left bracket token.
-
-    const MCExpr *ImmVal;
-    if (getParser().parseExpression(ImmVal))
-      return false;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
-    if (!MCE) {
-      TokError("immediate value expected for vector index");
-      return false;
-    }
-
-    SMLoc E = getLoc();
-    if (Parser.getTok().isNot(AsmToken::RBrac)) {
-      Error(E, "']' expected");
-      return false;
-    }
-
-    Parser.Lex(); // Eat right bracket token.
-
-    Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E,
-                                                       getContext()));
-  }
-  return false;
-}
-
-/// parseOperand - Parse a arm instruction operand.  For now this parses the
-/// operand regardless of the mnemonic.
-bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
-                                  bool invertCondCode) {
-  // Check if the current operand has a custom associated parser, if so, try to
-  // custom parse the operand, or fallback to the general approach.
-  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
-  if (ResTy == MatchOperand_Success)
-    return false;
-  // If there wasn't a custom match, try the generic matcher below. Otherwise,
-  // there was a match, but an error occurred, in which case, just return that
-  // the operand parsing failed.
-  if (ResTy == MatchOperand_ParseFail)
-    return true;
-
-  // Nothing custom, so do general case parsing.
-  SMLoc S, E;
-  switch (getLexer().getKind()) {
-  default: {
-    SMLoc S = getLoc();
-    const MCExpr *Expr;
-    if (parseSymbolicImmVal(Expr))
-      return Error(S, "invalid operand");
-
-    SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-    Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
-    return false;
-  }
-  case AsmToken::LBrac:
-    return parseMemory(Operands);
-  case AsmToken::LCurly:
-    return parseVectorList(Operands);
-  case AsmToken::Identifier: {
-    // If we're expecting a Condition Code operand, then just parse that.
-    if (isCondCode)
-      return parseCondCode(Operands, invertCondCode);
-
-    // If it's a register name, parse it.
-    if (!parseRegister(Operands))
-      return false;
-
-    // This could be an optional "shift" operand.
-    if (!parseOptionalShift(Operands))
-      return false;
-
-    // Or maybe it could be an optional "extend" operand.
-    if (!parseOptionalExtend(Operands))
-      return false;
-
-    // This was not a register so parse other operands that start with an
-    // identifier (like labels) as expressions and create them as immediates.
-    const MCExpr *IdVal;
-    S = getLoc();
-    if (getParser().parseExpression(IdVal))
-      return true;
-
-    E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-    Operands.push_back(ARM64Operand::CreateImm(IdVal, S, E, getContext()));
-    return false;
-  }
-  case AsmToken::Hash: {
-    // #42 -> immediate.
-    S = getLoc();
-    Parser.Lex();
-
-    // The only Real that should come through here is a literal #0.0 for
-    // the fcmp[e] r, #0.0 instructions. They expect raw token operands,
-    // so convert the value.
-    const AsmToken &Tok = Parser.getTok();
-    if (Tok.is(AsmToken::Real)) {
-      APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
-      uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
-      if (IntVal != 0 || (Mnemonic != "fcmp" && Mnemonic != "fcmpe"))
-        return TokError("unexpected floating point literal");
-      Parser.Lex(); // Eat the token.
-
-      Operands.push_back(
-          ARM64Operand::CreateToken("#0", false, S, getContext()));
-      Operands.push_back(
-          ARM64Operand::CreateToken(".0", false, S, getContext()));
-      return false;
-    }
-
-    const MCExpr *ImmVal;
-    if (parseSymbolicImmVal(ImmVal))
-      return true;
-
-    E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-    Operands.push_back(ARM64Operand::CreateImm(ImmVal, S, E, getContext()));
-    return false;
-  }
-  }
-}
-
-/// ParseInstruction - Parse an ARM64 instruction mnemonic followed by its
-/// operands.
-bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
-                                      StringRef Name, SMLoc NameLoc,
-                                      OperandVector &Operands) {
-  // Create the leading tokens for the mnemonic, split by '.' characters.
-  size_t Start = 0, Next = Name.find('.');
-  StringRef Head = Name.slice(Start, Next);
-
-  // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction.
-  if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi")
-    return parseSysAlias(Head, NameLoc, Operands);
-
-  Operands.push_back(
-      ARM64Operand::CreateToken(Head, false, NameLoc, getContext()));
-  Mnemonic = Head;
-
-  // Handle condition codes for a branch mnemonic
-  if (Head == "b" && Next != StringRef::npos) {
-    Start = Next;
-    Next = Name.find('.', Start + 1);
-    Head = Name.slice(Start + 1, Next);
-
-    SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
-                                            (Head.data() - Name.data()));
-    unsigned CC = parseCondCodeString(Head);
-    if (CC == ~0U)
-      return Error(SuffixLoc, "invalid condition code");
-    const MCExpr *CCExpr = MCConstantExpr::Create(CC, getContext());
-    Operands.push_back(
-        ARM64Operand::CreateImm(CCExpr, NameLoc, NameLoc, getContext()));
-  }
-
-  // Add the remaining tokens in the mnemonic.
-  while (Next != StringRef::npos) {
-    Start = Next;
-    Next = Name.find('.', Start + 1);
-    Head = Name.slice(Start, Next);
-    SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
-                                            (Head.data() - Name.data()) + 1);
-    Operands.push_back(
-        ARM64Operand::CreateToken(Head, true, SuffixLoc, getContext()));
-  }
-
-  // Conditional compare instructions have a Condition Code operand, which needs
-  // to be parsed and an immediate operand created.
-  bool condCodeFourthOperand =
-      (Head == "ccmp" || Head == "ccmn" || Head == "fccmp" ||
-       Head == "fccmpe" || Head == "fcsel" || Head == "csel" ||
-       Head == "csinc" || Head == "csinv" || Head == "csneg");
-
-  // These instructions are aliases to some of the conditional select
-  // instructions. However, the condition code is inverted in the aliased
-  // instruction.
-  //
-  // FIXME: Is this the correct way to handle these? Or should the parser
-  //        generate the aliased instructions directly?
-  bool condCodeSecondOperand = (Head == "cset" || Head == "csetm");
-  bool condCodeThirdOperand =
-      (Head == "cinc" || Head == "cinv" || Head == "cneg");
-
-  // Read the remaining operands.
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    // Read the first operand.
-    if (parseOperand(Operands, false, false)) {
-      Parser.eatToEndOfStatement();
-      return true;
-    }
-
-    unsigned N = 2;
-    while (getLexer().is(AsmToken::Comma)) {
-      Parser.Lex(); // Eat the comma.
-
-      // Parse and remember the operand.
-      if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) ||
-                                     (N == 3 && condCodeThirdOperand) ||
-                                     (N == 2 && condCodeSecondOperand),
-                       condCodeSecondOperand || condCodeThirdOperand)) {
-        Parser.eatToEndOfStatement();
-        return true;
-      }
-
-      ++N;
-    }
-  }
-
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    SMLoc Loc = Parser.getTok().getLoc();
-    Parser.eatToEndOfStatement();
-    return Error(Loc, "unexpected token in argument list");
-  }
-
-  Parser.Lex(); // Consume the EndOfStatement
-  return false;
-}
-
-/// isFPR32Register - Check if a register is in the FPR32 register class.
-/// (The parser does not have the target register info to check the register
-/// class directly.)
-static bool isFPR32Register(unsigned Reg) {
-  using namespace ARM64;
-  switch (Reg) {
-  default:
-    break;
-  case S0:  case S1:  case S2:  case S3:  case S4:  case S5:  case S6:
-  case S7:  case S8:  case S9:  case S10:  case S11:  case S12:  case S13:
-  case S14:  case S15:  case S16:  case S17:  case S18:  case S19:  case S20:
-  case S21:  case S22:  case S23:  case S24:  case S25:  case S26:  case S27:
-  case S28:  case S29:  case S30:  case S31:
-    return true;
-  }
-  return false;
-}
-
-/// isGPR32Register - Check if a register is in the GPR32sp register class.
-/// (The parser does not have the target register info to check the register
-/// class directly.)
-static bool isGPR32Register(unsigned Reg) {
-  using namespace ARM64;
-  switch (Reg) {
-  default:
-    break;
-  case W0:  case W1:  case W2:  case W3:  case W4:  case W5:  case W6:
-  case W7:  case W8:  case W9:  case W10:  case W11:  case W12:  case W13:
-  case W14:  case W15:  case W16:  case W17:  case W18:  case W19:  case W20:
-  case W21:  case W22:  case W23:  case W24:  case W25:  case W26:  case W27:
-  case W28:  case W29:  case W30:  case WSP:
-    return true;
-  }
-  return false;
-}
-
-static bool isGPR64Reg(unsigned Reg) {
-  using namespace ARM64;
-  switch (Reg) {
-  case X0:  case X1:  case X2:  case X3:  case X4:  case X5:  case X6:
-  case X7:  case X8:  case X9:  case X10:  case X11:  case X12:  case X13:
-  case X14:  case X15:  case X16:  case X17:  case X18:  case X19:  case X20:
-  case X21:  case X22:  case X23:  case X24:  case X25:  case X26:  case X27:
-  case X28:  case FP:  case LR:  case SP:  case XZR:
-    return true;
-  default:
-    return false;
-  }
-}
-
-
-// FIXME: This entire function is a giant hack to provide us with decent
-// operand range validation/diagnostics until TableGen/MC can be extended
-// to support autogeneration of this kind of validation.
-bool ARM64AsmParser::validateInstruction(MCInst &Inst,
-                                         SmallVectorImpl<SMLoc> &Loc) {
-  const MCRegisterInfo *RI = getContext().getRegisterInfo();
-  // Check for indexed addressing modes w/ the base register being the
-  // same as a destination/source register or pair load where
-  // the Rt == Rt2. All of those are undefined behaviour.
-  switch (Inst.getOpcode()) {
-  case ARM64::LDPSWpre:
-  case ARM64::LDPWpost:
-  case ARM64::LDPWpre:
-  case ARM64::LDPXpost:
-  case ARM64::LDPXpre: {
-    unsigned Rt = Inst.getOperand(0).getReg();
-    unsigned Rt2 = Inst.getOperand(1).getReg();
-    unsigned Rn = Inst.getOperand(2).getReg();
-    if (RI->isSubRegisterEq(Rn, Rt))
-      return Error(Loc[0], "unpredictable LDP instruction, writeback base "
-                           "is also a destination");
-    if (RI->isSubRegisterEq(Rn, Rt2))
-      return Error(Loc[1], "unpredictable LDP instruction, writeback base "
-                           "is also a destination");
-    // FALLTHROUGH
-  }
-  case ARM64::LDPDpost:
-  case ARM64::LDPDpre:
-  case ARM64::LDPQpost:
-  case ARM64::LDPQpre:
-  case ARM64::LDPSpost:
-  case ARM64::LDPSpre:
-  case ARM64::LDPSWpost:
-  case ARM64::LDPDi:
-  case ARM64::LDPQi:
-  case ARM64::LDPSi:
-  case ARM64::LDPSWi:
-  case ARM64::LDPWi:
-  case ARM64::LDPXi: {
-    unsigned Rt = Inst.getOperand(0).getReg();
-    unsigned Rt2 = Inst.getOperand(1).getReg();
-    if (Rt == Rt2)
-      return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt");
-    break;
-  }
-  case ARM64::STPDpost:
-  case ARM64::STPDpre:
-  case ARM64::STPQpost:
-  case ARM64::STPQpre:
-  case ARM64::STPSpost:
-  case ARM64::STPSpre:
-  case ARM64::STPWpost:
-  case ARM64::STPWpre:
-  case ARM64::STPXpost:
-  case ARM64::STPXpre: {
-    unsigned Rt = Inst.getOperand(0).getReg();
-    unsigned Rt2 = Inst.getOperand(1).getReg();
-    unsigned Rn = Inst.getOperand(2).getReg();
-    if (RI->isSubRegisterEq(Rn, Rt))
-      return Error(Loc[0], "unpredictable STP instruction, writeback base "
-                           "is also a source");
-    if (RI->isSubRegisterEq(Rn, Rt2))
-      return Error(Loc[1], "unpredictable STP instruction, writeback base "
-                           "is also a source");
-    break;
-  }
-  case ARM64::LDRBBpre:
-  case ARM64::LDRBpre:
-  case ARM64::LDRHHpre:
-  case ARM64::LDRHpre:
-  case ARM64::LDRSBWpre:
-  case ARM64::LDRSBXpre:
-  case ARM64::LDRSHWpre:
-  case ARM64::LDRSHXpre:
-  case ARM64::LDRSWpre:
-  case ARM64::LDRWpre:
-  case ARM64::LDRXpre:
-  case ARM64::LDRBBpost:
-  case ARM64::LDRBpost:
-  case ARM64::LDRHHpost:
-  case ARM64::LDRHpost:
-  case ARM64::LDRSBWpost:
-  case ARM64::LDRSBXpost:
-  case ARM64::LDRSHWpost:
-  case ARM64::LDRSHXpost:
-  case ARM64::LDRSWpost:
-  case ARM64::LDRWpost:
-  case ARM64::LDRXpost: {
-    unsigned Rt = Inst.getOperand(0).getReg();
-    unsigned Rn = Inst.getOperand(1).getReg();
-    if (RI->isSubRegisterEq(Rn, Rt))
-      return Error(Loc[0], "unpredictable LDR instruction, writeback base "
-                           "is also a source");
-    break;
-  }
-  case ARM64::STRBBpost:
-  case ARM64::STRBpost:
-  case ARM64::STRHHpost:
-  case ARM64::STRHpost:
-  case ARM64::STRWpost:
-  case ARM64::STRXpost:
-  case ARM64::STRBBpre:
-  case ARM64::STRBpre:
-  case ARM64::STRHHpre:
-  case ARM64::STRHpre:
-  case ARM64::STRWpre:
-  case ARM64::STRXpre: {
-    unsigned Rt = Inst.getOperand(0).getReg();
-    unsigned Rn = Inst.getOperand(1).getReg();
-    if (RI->isSubRegisterEq(Rn, Rt))
-      return Error(Loc[0], "unpredictable STR instruction, writeback base "
-                           "is also a source");
-    break;
-  }
-  }
-
-  // Now check immediate ranges. Separate from the above as there is overlap
-  // in the instructions being checked and this keeps the nested conditionals
-  // to a minimum.
-  switch (Inst.getOpcode()) {
-  case ARM64::ANDWrs:
-  case ARM64::ANDSWrs:
-  case ARM64::EORWrs:
-  case ARM64::ORRWrs: {
-    if (!Inst.getOperand(3).isImm())
-      return Error(Loc[3], "immediate value expected");
-    int64_t shifter = Inst.getOperand(3).getImm();
-    ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(shifter);
-    if (ST == ARM64_AM::LSL && shifter > 31)
-      return Error(Loc[3], "shift value out of range");
-    return false;
-  }
-  case ARM64::ADDSWri:
-  case ARM64::ADDSXri:
-  case ARM64::ADDWri:
-  case ARM64::ADDXri:
-  case ARM64::SUBSWri:
-  case ARM64::SUBSXri:
-  case ARM64::SUBWri:
-  case ARM64::SUBXri: {
-    if (!Inst.getOperand(3).isImm())
-      return Error(Loc[3], "immediate value expected");
-    int64_t shifter = Inst.getOperand(3).getImm();
-    if (shifter != 0 && shifter != 12)
-      return Error(Loc[3], "shift value out of range");
-    // The imm12 operand can be an expression. Validate that it's legit.
-    // FIXME: We really, really want to allow arbitrary expressions here
-    // and resolve the value and validate the result at fixup time, but
-    // that's hard as we have long since lost any source information we
-    // need to generate good diagnostics by that point.
-    if (Inst.getOpcode() == ARM64::ADDXri && Inst.getOperand(2).isExpr()) {
-      const MCExpr *Expr = Inst.getOperand(2).getExpr();
-      ARM64MCExpr::VariantKind ELFRefKind;
-      MCSymbolRefExpr::VariantKind DarwinRefKind;
-      const MCConstantExpr *Addend;
-      if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
-        return Error(Loc[2], "invalid immediate expression");
-      }
-
-      if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
-          DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF ||
-          ELFRefKind == ARM64MCExpr::VK_LO12 ||
-          ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 ||
-          ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC ||
-          ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 ||
-          ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC ||
-          ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) {
-        // Note that we don't range-check the addend. It's adjusted
-        // modulo page size when converted, so there is no "out of range"
-        // condition when using @pageoff. Any validity checking for the value
-        // was done in the is*() predicate function.
-        return false;
-      } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF) {
-        // @gotpageoff can only be used directly, not with an addend.
-        return Addend != 0;
-      }
-
-      // Otherwise, we're not sure, so don't allow it for now.
-      return Error(Loc[2], "invalid immediate expression");
-    }
-
-    // If it's anything but an immediate, it's not legit.
-    if (!Inst.getOperand(2).isImm())
-      return Error(Loc[2], "invalid immediate expression");
-    int64_t imm = Inst.getOperand(2).getImm();
-    if (imm > 4095 || imm < 0)
-      return Error(Loc[2], "immediate value out of range");
-    return false;
-  }
-  case ARM64::LDRBpre:
-  case ARM64::LDRHpre:
-  case ARM64::LDRSBWpre:
-  case ARM64::LDRSBXpre:
-  case ARM64::LDRSHWpre:
-  case ARM64::LDRSHXpre:
-  case ARM64::LDRWpre:
-  case ARM64::LDRXpre:
-  case ARM64::LDRSpre:
-  case ARM64::LDRDpre:
-  case ARM64::LDRQpre:
-  case ARM64::STRBpre:
-  case ARM64::STRHpre:
-  case ARM64::STRWpre:
-  case ARM64::STRXpre:
-  case ARM64::STRSpre:
-  case ARM64::STRDpre:
-  case ARM64::STRQpre:
-  case ARM64::LDRBpost:
-  case ARM64::LDRHpost:
-  case ARM64::LDRSBWpost:
-  case ARM64::LDRSBXpost:
-  case ARM64::LDRSHWpost:
-  case ARM64::LDRSHXpost:
-  case ARM64::LDRWpost:
-  case ARM64::LDRXpost:
-  case ARM64::LDRSpost:
-  case ARM64::LDRDpost:
-  case ARM64::LDRQpost:
-  case ARM64::STRBpost:
-  case ARM64::STRHpost:
-  case ARM64::STRWpost:
-  case ARM64::STRXpost:
-  case ARM64::STRSpost:
-  case ARM64::STRDpost:
-  case ARM64::STRQpost:
-  case ARM64::LDTRXi:
-  case ARM64::LDTRWi:
-  case ARM64::LDTRHi:
-  case ARM64::LDTRBi:
-  case ARM64::LDTRSHWi:
-  case ARM64::LDTRSHXi:
-  case ARM64::LDTRSBWi:
-  case ARM64::LDTRSBXi:
-  case ARM64::LDTRSWi:
-  case ARM64::STTRWi:
-  case ARM64::STTRXi:
-  case ARM64::STTRHi:
-  case ARM64::STTRBi:
-  case ARM64::LDURWi:
-  case ARM64::LDURXi:
-  case ARM64::LDURSi:
-  case ARM64::LDURDi:
-  case ARM64::LDURQi:
-  case ARM64::LDURHi:
-  case ARM64::LDURBi:
-  case ARM64::LDURSHWi:
-  case ARM64::LDURSHXi:
-  case ARM64::LDURSBWi:
-  case ARM64::LDURSBXi:
-  case ARM64::LDURSWi:
-  case ARM64::PRFUMi:
-  case ARM64::STURWi:
-  case ARM64::STURXi:
-  case ARM64::STURSi:
-  case ARM64::STURDi:
-  case ARM64::STURQi:
-  case ARM64::STURHi:
-  case ARM64::STURBi: {
-    // FIXME: Should accept expressions and error in fixup evaluation
-    // if out of range.
-    if (!Inst.getOperand(2).isImm())
-      return Error(Loc[1], "immediate value expected");
-    int64_t offset = Inst.getOperand(2).getImm();
-    if (offset > 255 || offset < -256)
-      return Error(Loc[1], "offset value out of range");
-    return false;
-  }
-  case ARM64::LDRSro:
-  case ARM64::LDRWro:
-  case ARM64::LDRSWro:
-  case ARM64::STRWro:
-  case ARM64::STRSro: {
-    // FIXME: Should accept expressions and error in fixup evaluation
-    // if out of range.
-    if (!Inst.getOperand(3).isImm())
-      return Error(Loc[1], "immediate value expected");
-    int64_t shift = Inst.getOperand(3).getImm();
-    ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
-    if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
-        type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
-      return Error(Loc[1], "shift type invalid");
-    return false;
-  }
-  case ARM64::LDRDro:
-  case ARM64::LDRQro:
-  case ARM64::LDRXro:
-  case ARM64::PRFMro:
-  case ARM64::STRXro:
-  case ARM64::STRDro:
-  case ARM64::STRQro: {
-    // FIXME: Should accept expressions and error in fixup evaluation
-    // if out of range.
-    if (!Inst.getOperand(3).isImm())
-      return Error(Loc[1], "immediate value expected");
-    int64_t shift = Inst.getOperand(3).getImm();
-    ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
-    if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
-        type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
-      return Error(Loc[1], "shift type invalid");
-    return false;
-  }
-  case ARM64::LDRHro:
-  case ARM64::LDRHHro:
-  case ARM64::LDRSHWro:
-  case ARM64::LDRSHXro:
-  case ARM64::STRHro:
-  case ARM64::STRHHro: {
-    // FIXME: Should accept expressions and error in fixup evaluation
-    // if out of range.
-    if (!Inst.getOperand(3).isImm())
-      return Error(Loc[1], "immediate value expected");
-    int64_t shift = Inst.getOperand(3).getImm();
-    ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
-    if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
-        type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
-      return Error(Loc[1], "shift type invalid");
-    return false;
-  }
-  case ARM64::LDRBro:
-  case ARM64::LDRBBro:
-  case ARM64::LDRSBWro:
-  case ARM64::LDRSBXro:
-  case ARM64::STRBro:
-  case ARM64::STRBBro: {
-    // FIXME: Should accept expressions and error in fixup evaluation
-    // if out of range.
-    if (!Inst.getOperand(3).isImm())
-      return Error(Loc[1], "immediate value expected");
-    int64_t shift = Inst.getOperand(3).getImm();
-    ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
-    if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
-        type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
-      return Error(Loc[1], "shift type invalid");
-    return false;
-  }
-  case ARM64::LDPWi:
-  case ARM64::LDPXi:
-  case ARM64::LDPSi:
-  case ARM64::LDPDi:
-  case ARM64::LDPQi:
-  case ARM64::LDPSWi:
-  case ARM64::STPWi:
-  case ARM64::STPXi:
-  case ARM64::STPSi:
-  case ARM64::STPDi:
-  case ARM64::STPQi:
-  case ARM64::LDPWpre:
-  case ARM64::LDPXpre:
-  case ARM64::LDPSpre:
-  case ARM64::LDPDpre:
-  case ARM64::LDPQpre:
-  case ARM64::LDPSWpre:
-  case ARM64::STPWpre:
-  case ARM64::STPXpre:
-  case ARM64::STPSpre:
-  case ARM64::STPDpre:
-  case ARM64::STPQpre:
-  case ARM64::LDPWpost:
-  case ARM64::LDPXpost:
-  case ARM64::LDPSpost:
-  case ARM64::LDPDpost:
-  case ARM64::LDPQpost:
-  case ARM64::LDPSWpost:
-  case ARM64::STPWpost:
-  case ARM64::STPXpost:
-  case ARM64::STPSpost:
-  case ARM64::STPDpost:
-  case ARM64::STPQpost:
-  case ARM64::LDNPWi:
-  case ARM64::LDNPXi:
-  case ARM64::LDNPSi:
-  case ARM64::LDNPDi:
-  case ARM64::LDNPQi:
-  case ARM64::STNPWi:
-  case ARM64::STNPXi:
-  case ARM64::STNPSi:
-  case ARM64::STNPDi:
-  case ARM64::STNPQi: {
-    // FIXME: Should accept expressions and error in fixup evaluation
-    // if out of range.
-    if (!Inst.getOperand(3).isImm())
-      return Error(Loc[2], "immediate value expected");
-    int64_t offset = Inst.getOperand(3).getImm();
-    if (offset > 63 || offset < -64)
-      return Error(Loc[2], "offset value out of range");
-    return false;
-  }
-  default:
-    return false;
-  }
-}
-
-static void rewriteMOV(ARM64AsmParser::OperandVector &Operands,
-                       StringRef mnemonic, uint64_t imm, unsigned shift,
-                       MCContext &Context) {
-  ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]);
-  ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
-  Operands[0] =
-      ARM64Operand::CreateToken(mnemonic, false, Op->getStartLoc(), Context);
-
-  const MCExpr *NewImm = MCConstantExpr::Create(imm >> shift, Context);
-  Operands[2] = ARM64Operand::CreateImm(NewImm, Op2->getStartLoc(),
-                                        Op2->getEndLoc(), Context);
-
-  Operands.push_back(ARM64Operand::CreateShifter(
-      ARM64_AM::LSL, shift, Op2->getStartLoc(), Op2->getEndLoc(), Context));
-  delete Op2;
-  delete Op;
-}
-
-bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
-  switch (ErrCode) {
-  case Match_MissingFeature:
-    return Error(Loc,
-                 "instruction requires a CPU feature not currently enabled");
-  case Match_InvalidOperand:
-    return Error(Loc, "invalid operand for instruction");
-  case Match_InvalidSuffix:
-    return Error(Loc, "invalid type suffix for instruction");
-  case Match_InvalidMemoryIndexedSImm9:
-    return Error(Loc, "index must be an integer in range [-256,255].");
-  case Match_InvalidMemoryIndexed32SImm7:
-    return Error(Loc, "index must be a multiple of 4 in range [-256,252].");
-  case Match_InvalidMemoryIndexed64SImm7:
-    return Error(Loc, "index must be a multiple of 8 in range [-512,504].");
-  case Match_InvalidMemoryIndexed128SImm7:
-    return Error(Loc, "index must be a multiple of 16 in range [-1024,1008].");
-  case Match_InvalidMemoryIndexed8:
-    return Error(Loc, "index must be an integer in range [0,4095].");
-  case Match_InvalidMemoryIndexed16:
-    return Error(Loc, "index must be a multiple of 2 in range [0,8190].");
-  case Match_InvalidMemoryIndexed32:
-    return Error(Loc, "index must be a multiple of 4 in range [0,16380].");
-  case Match_InvalidMemoryIndexed64:
-    return Error(Loc, "index must be a multiple of 8 in range [0,32760].");
-  case Match_InvalidMemoryIndexed128:
-    return Error(Loc, "index must be a multiple of 16 in range [0,65520].");
-  case Match_InvalidImm1_8:
-    return Error(Loc, "immediate must be an integer in range [1,8].");
-  case Match_InvalidImm1_16:
-    return Error(Loc, "immediate must be an integer in range [1,16].");
-  case Match_InvalidImm1_32:
-    return Error(Loc, "immediate must be an integer in range [1,32].");
-  case Match_InvalidImm1_64:
-    return Error(Loc, "immediate must be an integer in range [1,64].");
-  case Match_MnemonicFail:
-    return Error(Loc, "unrecognized instruction mnemonic");
-  default:
-    assert(0 && "unexpected error code!");
-    return Error(Loc, "invalid instruction format");
-  }
-}
-
-bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
-                                             OperandVector &Operands,
-                                             MCStreamer &Out,
-                                             unsigned &ErrorInfo,
-                                             bool MatchingInlineAsm) {
-  assert(!Operands.empty() && "Unexpect empty operand list!");
-  ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]);
-  assert(Op->isToken() && "Leading operand should always be a mnemonic!");
-
-  StringRef Tok = Op->getToken();
-  // Translate CMN/CMP pseudos to ADDS/SUBS with zero register destination.
-  // This needs to be done before the special handling of ADD/SUB immediates.
-  if (Tok == "cmp" || Tok == "cmn") {
-    // Replace the opcode with either ADDS or SUBS.
-    const char *Repl = StringSwitch<const char *>(Tok)
-                           .Case("cmp", "subs")
-                           .Case("cmn", "adds")
-                           .Default(0);
-    assert(Repl && "Unknown compare instruction");
-    delete Operands[0];
-    Operands[0] = ARM64Operand::CreateToken(Repl, false, IDLoc, getContext());
-
-    // Insert WZR or XZR as destination operand.
-    ARM64Operand *RegOp = static_cast<ARM64Operand *>(Operands[1]);
-    unsigned ZeroReg;
-    if (RegOp->isReg() &&
-        (isGPR32Register(RegOp->getReg()) || RegOp->getReg() == ARM64::WZR))
-      ZeroReg = ARM64::WZR;
-    else
-      ZeroReg = ARM64::XZR;
-    Operands.insert(
-        Operands.begin() + 1,
-        ARM64Operand::CreateReg(ZeroReg, false, IDLoc, IDLoc, getContext()));
-    // Update since we modified it above.
-    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]);
-    Tok = Op->getToken();
-  }
-
-  unsigned NumOperands = Operands.size();
-
-  if (Tok == "mov" && NumOperands == 3) {
-    // The MOV mnemomic is aliased to movn/movz, depending on the value of
-    // the immediate being instantiated.
-    // FIXME: Catching this here is a total hack, and we should use tblgen
-    // support to implement this instead as soon as it is available.
-
-    ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
-    if (Op2->isImm()) {
-      if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op2->getImm())) {
-        uint64_t Val = CE->getValue();
-        uint64_t NVal = ~Val;
-
-        // If this is a 32-bit register and the value has none of the upper
-        // set, clear the complemented upper 32-bits so the logic below works
-        // for 32-bit registers too.
-        ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
-        if (Op1->isReg() && isGPR32Register(Op1->getReg()) &&
-            (Val & 0xFFFFFFFFULL) == Val)
-          NVal &= 0x00000000FFFFFFFFULL;
-
-        // MOVK Rd, imm << 0
-        if ((Val & 0xFFFF) == Val)
-          rewriteMOV(Operands, "movz", Val, 0, getContext());
-
-        // MOVK Rd, imm << 16
-        else if ((Val & 0xFFFF0000ULL) == Val)
-          rewriteMOV(Operands, "movz", Val, 16, getContext());
-
-        // MOVK Rd, imm << 32
-        else if ((Val & 0xFFFF00000000ULL) == Val)
-          rewriteMOV(Operands, "movz", Val, 32, getContext());
-
-        // MOVK Rd, imm << 48
-        else if ((Val & 0xFFFF000000000000ULL) == Val)
-          rewriteMOV(Operands, "movz", Val, 48, getContext());
-
-        // MOVN Rd, (~imm << 0)
-        else if ((NVal & 0xFFFFULL) == NVal)
-          rewriteMOV(Operands, "movn", NVal, 0, getContext());
-
-        // MOVN Rd, ~(imm << 16)
-        else if ((NVal & 0xFFFF0000ULL) == NVal)
-          rewriteMOV(Operands, "movn", NVal, 16, getContext());
-
-        // MOVN Rd, ~(imm << 32)
-        else if ((NVal & 0xFFFF00000000ULL) == NVal)
-          rewriteMOV(Operands, "movn", NVal, 32, getContext());
-
-        // MOVN Rd, ~(imm << 48)
-        else if ((NVal & 0xFFFF000000000000ULL) == NVal)
-          rewriteMOV(Operands, "movn", NVal, 48, getContext());
-      }
-    }
-  } else if (NumOperands == 4) {
-    if (Tok == "add" || Tok == "adds" || Tok == "sub" || Tok == "subs") {
-      // Handle the uimm24 immediate form, where the shift is not specified.
-      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
-      if (Op3->isImm()) {
-        if (const MCConstantExpr *CE =
-                dyn_cast<MCConstantExpr>(Op3->getImm())) {
-          uint64_t Val = CE->getValue();
-          if (Val >= (1 << 24)) {
-            Error(IDLoc, "immediate value is too large");
-            return true;
-          }
-          if (Val < (1 << 12)) {
-            Operands.push_back(ARM64Operand::CreateShifter(
-                ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext()));
-          } else if ((Val & 0xfff) == 0) {
-            delete Operands[3];
-            CE = MCConstantExpr::Create(Val >> 12, getContext());
-            Operands[3] =
-                ARM64Operand::CreateImm(CE, IDLoc, IDLoc, getContext());
-            Operands.push_back(ARM64Operand::CreateShifter(
-                ARM64_AM::LSL, 12, IDLoc, IDLoc, getContext()));
-          } else {
-            Error(IDLoc, "immediate value is too large");
-            return true;
-          }
-        } else {
-          Operands.push_back(ARM64Operand::CreateShifter(
-              ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext()));
-        }
-      }
-
-      // FIXME: Horible hack to handle the LSL -> UBFM alias.
-    } else if (NumOperands == 4 && Tok == "lsl") {
-      ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
-      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
-      if (Op2->isReg() && Op3->isImm()) {
-        const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
-        if (Op3CE) {
-          uint64_t Op3Val = Op3CE->getValue();
-          uint64_t NewOp3Val = 0;
-          uint64_t NewOp4Val = 0;
-          if (isGPR32Register(Op2->getReg()) || Op2->getReg() == ARM64::WZR) {
-            NewOp3Val = (32 - Op3Val) & 0x1f;
-            NewOp4Val = 31 - Op3Val;
-          } else {
-            NewOp3Val = (64 - Op3Val) & 0x3f;
-            NewOp4Val = 63 - Op3Val;
-          }
-
-          const MCExpr *NewOp3 =
-              MCConstantExpr::Create(NewOp3Val, getContext());
-          const MCExpr *NewOp4 =
-              MCConstantExpr::Create(NewOp4Val, getContext());
-
-          Operands[0] = ARM64Operand::CreateToken(
-              "ubfm", false, Op->getStartLoc(), getContext());
-          Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
-                                                Op3->getEndLoc(), getContext());
-          Operands.push_back(ARM64Operand::CreateImm(
-              NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext()));
-          delete Op3;
-          delete Op;
-        }
-      }
-
-      // FIXME: Horrible hack to handle the optional LSL shift for vector
-      //        instructions.
-    } else if (NumOperands == 4 && (Tok == "bic" || Tok == "orr")) {
-      ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
-      ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
-      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
-      if ((Op1->isToken() && Op2->isVectorReg() && Op3->isImm()) ||
-          (Op1->isVectorReg() && Op2->isToken() && Op3->isImm()))
-        Operands.push_back(ARM64Operand::CreateShifter(ARM64_AM::LSL, 0, IDLoc,
-                                                       IDLoc, getContext()));
-    } else if (NumOperands == 4 && (Tok == "movi" || Tok == "mvni")) {
-      ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
-      ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
-      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
-      if ((Op1->isToken() && Op2->isVectorReg() && Op3->isImm()) ||
-          (Op1->isVectorReg() && Op2->isToken() && Op3->isImm())) {
-        StringRef Suffix = Op1->isToken() ? Op1->getToken() : Op2->getToken();
-        // Canonicalize on lower-case for ease of comparison.
-        std::string CanonicalSuffix = Suffix.lower();
-        if (Tok != "movi" ||
-            (CanonicalSuffix != ".1d" && CanonicalSuffix != ".2d" &&
-             CanonicalSuffix != ".8b" && CanonicalSuffix != ".16b"))
-          Operands.push_back(ARM64Operand::CreateShifter(
-              ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext()));
-      }
-    }
-  } else if (NumOperands == 5) {
-    // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and
-    // UBFIZ -> UBFM aliases.
-    if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") {
-      ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
-      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
-      ARM64Operand *Op4 = static_cast<ARM64Operand *>(Operands[4]);
-
-      if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
-        const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
-        const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
-
-        if (Op3CE && Op4CE) {
-          uint64_t Op3Val = Op3CE->getValue();
-          uint64_t Op4Val = Op4CE->getValue();
-
-          uint64_t NewOp3Val = 0;
-          if (isGPR32Register(Op1->getReg()))
-            NewOp3Val = (32 - Op3Val) & 0x1f;
-          else
-            NewOp3Val = (64 - Op3Val) & 0x3f;
-
-          uint64_t NewOp4Val = Op4Val - 1;
-
-          const MCExpr *NewOp3 =
-              MCConstantExpr::Create(NewOp3Val, getContext());
-          const MCExpr *NewOp4 =
-              MCConstantExpr::Create(NewOp4Val, getContext());
-          Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
-                                                Op3->getEndLoc(), getContext());
-          Operands[4] = ARM64Operand::CreateImm(NewOp4, Op4->getStartLoc(),
-                                                Op4->getEndLoc(), getContext());
-          if (Tok == "bfi")
-            Operands[0] = ARM64Operand::CreateToken(
-                "bfm", false, Op->getStartLoc(), getContext());
-          else if (Tok == "sbfiz")
-            Operands[0] = ARM64Operand::CreateToken(
-                "sbfm", false, Op->getStartLoc(), getContext());
-          else if (Tok == "ubfiz")
-            Operands[0] = ARM64Operand::CreateToken(
-                "ubfm", false, Op->getStartLoc(), getContext());
-          else
-            llvm_unreachable("No valid mnemonic for alias?");
-
-          delete Op;
-          delete Op3;
-          delete Op4;
-        }
-      }
-
-      // FIXME: Horrible hack to handle the BFXIL->BFM, SBFX->SBFM, and
-      // UBFX -> UBFM aliases.
-    } else if (NumOperands == 5 &&
-               (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) {
-      ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
-      ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
-      ARM64Operand *Op4 = static_cast<ARM64Operand *>(Operands[4]);
-
-      if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
-        const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
-        const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
-
-        if (Op3CE && Op4CE) {
-          uint64_t Op3Val = Op3CE->getValue();
-          uint64_t Op4Val = Op4CE->getValue();
-          uint64_t NewOp4Val = Op3Val + Op4Val - 1;
-
-          if (NewOp4Val >= Op3Val) {
-            const MCExpr *NewOp4 =
-                MCConstantExpr::Create(NewOp4Val, getContext());
-            Operands[4] = ARM64Operand::CreateImm(
-                NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
-            if (Tok == "bfxil")
-              Operands[0] = ARM64Operand::CreateToken(
-                  "bfm", false, Op->getStartLoc(), getContext());
-            else if (Tok == "sbfx")
-              Operands[0] = ARM64Operand::CreateToken(
-                  "sbfm", false, Op->getStartLoc(), getContext());
-            else if (Tok == "ubfx")
-              Operands[0] = ARM64Operand::CreateToken(
-                  "ubfm", false, Op->getStartLoc(), getContext());
-            else
-              llvm_unreachable("No valid mnemonic for alias?");
-
-            delete Op;
-            delete Op4;
-          }
-        }
-      }
-    }
-  }
-  // FIXME: Horrible hack for tbz and tbnz with Wn register operand.
-  //        InstAlias can't quite handle this since the reg classes aren't
-  //        subclasses.
-  if (NumOperands == 4 && (Tok == "tbz" || Tok == "tbnz")) {
-    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
-    if (Op->isImm()) {
-      if (const MCConstantExpr *OpCE = dyn_cast<MCConstantExpr>(Op->getImm())) {
-        if (OpCE->getValue() < 32) {
-          // The source register can be Wn here, but the matcher expects a
-          // GPR64. Twiddle it here if necessary.
-          ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]);
-          if (Op->isReg()) {
-            unsigned Reg = getXRegFromWReg(Op->getReg());
-            Operands[1] = ARM64Operand::CreateReg(
-                Reg, false, Op->getStartLoc(), Op->getEndLoc(), getContext());
-            delete Op;
-          }
-        }
-      }
-    }
-  }
-  // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands.
-  //        InstAlias can't quite handle this since the reg classes aren't
-  //        subclasses.
-  if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) {
-    // The source register can be Wn here, but the matcher expects a
-    // GPR64. Twiddle it here if necessary.
-    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
-    if (Op->isReg()) {
-      unsigned Reg = getXRegFromWReg(Op->getReg());
-      Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(),
-                                            Op->getEndLoc(), getContext());
-      delete Op;
-    }
-  }
-  // FIXME: Likewise for [su]xt[bh] with a Xd dst operand
-  else if (NumOperands == 3 &&
-           (Tok == "sxtb" || Tok == "uxtb" || Tok == "sxth" || Tok == "uxth")) {
-    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]);
-    if (Op->isReg() && isGPR64Reg(Op->getReg())) {
-      // The source register can be Wn here, but the matcher expects a
-      // GPR64. Twiddle it here if necessary.
-      ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
-      if (Op->isReg()) {
-        unsigned Reg = getXRegFromWReg(Op->getReg());
-        Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(),
-                                              Op->getEndLoc(), getContext());
-        delete Op;
-      }
-    }
-  }
-
-  // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR.
-  if (NumOperands == 3 && Tok == "fmov") {
-    ARM64Operand *RegOp = static_cast<ARM64Operand *>(Operands[1]);
-    ARM64Operand *ImmOp = static_cast<ARM64Operand *>(Operands[2]);
-    if (RegOp->isReg() && ImmOp->isFPImm() &&
-        ImmOp->getFPImm() == (unsigned)-1) {
-      unsigned zreg =
-          isFPR32Register(RegOp->getReg()) ? ARM64::WZR : ARM64::XZR;
-      Operands[2] = ARM64Operand::CreateReg(zreg, false, Op->getStartLoc(),
-                                            Op->getEndLoc(), getContext());
-      delete ImmOp;
-    }
-  }
-
-  // FIXME: Horrible hack to handle the literal .d[1] vector index on
-  // FMOV instructions. The index isn't an actual instruction operand
-  // but rather syntactic sugar. It really should be part of the mnemonic,
-  // not the operand, but whatever.
-  if ((NumOperands == 5) && Tok == "fmov") {
-    // If the last operand is a vectorindex of '1', then replace it with
-    // a '[' '1' ']' token sequence, which is what the matcher
-    // (annoyingly) expects for a literal vector index operand.
-    ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[NumOperands - 1]);
-    if (Op->isVectorIndexD() && Op->getVectorIndex() == 1) {
-      SMLoc Loc = Op->getStartLoc();
-      Operands.pop_back();
-      Operands.push_back(
-          ARM64Operand::CreateToken("[", false, Loc, getContext()));
-      Operands.push_back(
-          ARM64Operand::CreateToken("1", false, Loc, getContext()));
-      Operands.push_back(
-          ARM64Operand::CreateToken("]", false, Loc, getContext()));
-    } else if (Op->isReg()) {
-      // Similarly, check the destination operand for the GPR->High-lane
-      // variant.
-      unsigned OpNo = NumOperands - 2;
-      ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[OpNo]);
-      if (Op->isVectorIndexD() && Op->getVectorIndex() == 1) {
-        SMLoc Loc = Op->getStartLoc();
-        Operands[OpNo] =
-            ARM64Operand::CreateToken("[", false, Loc, getContext());
-        Operands.insert(
-            Operands.begin() + OpNo + 1,
-            ARM64Operand::CreateToken("1", false, Loc, getContext()));
-        Operands.insert(
-            Operands.begin() + OpNo + 2,
-            ARM64Operand::CreateToken("]", false, Loc, getContext()));
-      }
-    }
-  }
-
-  MCInst Inst;
-  // First try to match against the secondary set of tables containing the
-  // short-form NEON instructions (e.g. "fadd.2s v0, v1, v2").
-  unsigned MatchResult =
-      MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 1);
-
-  // If that fails, try against the alternate table containing long-form NEON:
-  // "fadd v0.2s, v1.2s, v2.2s"
-  if (MatchResult != Match_Success)
-    MatchResult =
-        MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0);
-
-  switch (MatchResult) {
-  case Match_Success: {
-    // Perform range checking and other semantic validations
-    SmallVector<SMLoc, 8> OperandLocs;
-    NumOperands = Operands.size();
-    for (unsigned i = 1; i < NumOperands; ++i)
-      OperandLocs.push_back(Operands[i]->getStartLoc());
-    if (validateInstruction(Inst, OperandLocs))
-      return true;
-
-    Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, STI);
-    return false;
-  }
-  case Match_MissingFeature:
-  case Match_MnemonicFail:
-    return showMatchError(IDLoc, MatchResult);
-  case Match_InvalidOperand: {
-    SMLoc ErrorLoc = IDLoc;
-    if (ErrorInfo != ~0U) {
-      if (ErrorInfo >= Operands.size())
-        return Error(IDLoc, "too few operands for instruction");
-
-      ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
-      if (ErrorLoc == SMLoc())
-        ErrorLoc = IDLoc;
-    }
-    // If the match failed on a suffix token operand, tweak the diagnostic
-    // accordingly.
-    if (((ARM64Operand *)Operands[ErrorInfo])->isToken() &&
-        ((ARM64Operand *)Operands[ErrorInfo])->isTokenSuffix())
-      MatchResult = Match_InvalidSuffix;
-
-    return showMatchError(ErrorLoc, MatchResult);
-  }
-  case Match_InvalidMemoryIndexedSImm9: {
-    // If there is not a '!' after the memory operand that failed, we really
-    // want the diagnostic for the non-pre-indexed instruction variant instead.
-    // Be careful to check for the post-indexed variant as well, which also
-    // uses this match diagnostic. Also exclude the explicitly unscaled
-    // mnemonics, as they want the unscaled diagnostic as well.
-    if (Operands.size() == ErrorInfo + 1 &&
-        !((ARM64Operand *)Operands[ErrorInfo])->isImm() &&
-        !Tok.startswith("stur") && !Tok.startswith("ldur")) {
-      // whether we want an Indexed64 or Indexed32 diagnostic depends on
-      // the register class of the previous operand. Default to 64 in case
-      // we see something unexpected.
-      MatchResult = Match_InvalidMemoryIndexed64;
-      if (ErrorInfo) {
-        ARM64Operand *PrevOp = (ARM64Operand *)Operands[ErrorInfo - 1];
-        if (PrevOp->isReg() && ARM64MCRegisterClasses[ARM64::GPR32RegClassID]
-                                   .contains(PrevOp->getReg()))
-          MatchResult = Match_InvalidMemoryIndexed32;
-      }
-    }
-    SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
-    if (ErrorLoc == SMLoc())
-      ErrorLoc = IDLoc;
-    return showMatchError(ErrorLoc, MatchResult);
-  }
-  case Match_InvalidMemoryIndexed32:
-  case Match_InvalidMemoryIndexed64:
-  case Match_InvalidMemoryIndexed128:
-    // If there is a '!' after the memory operand that failed, we really
-    // want the diagnostic for the pre-indexed instruction variant instead.
-    if (Operands.size() > ErrorInfo + 1 &&
-        ((ARM64Operand *)Operands[ErrorInfo + 1])->isTokenEqual("!"))
-      MatchResult = Match_InvalidMemoryIndexedSImm9;
-  // FALL THROUGH
-  case Match_InvalidMemoryIndexed8:
-  case Match_InvalidMemoryIndexed16:
-  case Match_InvalidMemoryIndexed32SImm7:
-  case Match_InvalidMemoryIndexed64SImm7:
-  case Match_InvalidMemoryIndexed128SImm7:
-  case Match_InvalidImm1_8:
-  case Match_InvalidImm1_16:
-  case Match_InvalidImm1_32:
-  case Match_InvalidImm1_64: {
-    // Any time we get here, there's nothing fancy to do. Just get the
-    // operand SMLoc and display the diagnostic.
-    SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
-    // If it's a memory operand, the error is with the offset immediate,
-    // so get that location instead.
-    if (((ARM64Operand *)Operands[ErrorInfo])->isMem())
-      ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getOffsetLoc();
-    if (ErrorLoc == SMLoc())
-      ErrorLoc = IDLoc;
-    return showMatchError(ErrorLoc, MatchResult);
-  }
-  }
-
-  llvm_unreachable("Implement any new match types added!");
-  return true;
-}
-
-/// ParseDirective parses the arm specific directives
-bool ARM64AsmParser::ParseDirective(AsmToken DirectiveID) {
-  StringRef IDVal = DirectiveID.getIdentifier();
-  SMLoc Loc = DirectiveID.getLoc();
-  if (IDVal == ".hword")
-    return parseDirectiveWord(2, Loc);
-  if (IDVal == ".word")
-    return parseDirectiveWord(4, Loc);
-  if (IDVal == ".xword")
-    return parseDirectiveWord(8, Loc);
-  if (IDVal == ".tlsdesccall")
-    return parseDirectiveTLSDescCall(Loc);
-
-  return parseDirectiveLOH(IDVal, Loc);
-}
-
-/// parseDirectiveWord
-///  ::= .word [ expression (, expression)* ]
-bool ARM64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    for (;;) {
-      const MCExpr *Value;
-      if (getParser().parseExpression(Value))
-        return true;
-
-      getParser().getStreamer().EmitValue(Value, Size);
-
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
-
-      // FIXME: Improve diagnostic.
-      if (getLexer().isNot(AsmToken::Comma))
-        return Error(L, "unexpected token in directive");
-      Parser.Lex();
-    }
-  }
-
-  Parser.Lex();
-  return false;
-}
-
-// parseDirectiveTLSDescCall:
-//   ::= .tlsdesccall symbol
-bool ARM64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
-  StringRef Name;
-  if (getParser().parseIdentifier(Name))
-    return Error(L, "expected symbol after directive");
-
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
-  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
-  Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_TLSDESC, getContext());
-
-  MCInst Inst;
-  Inst.setOpcode(ARM64::TLSDESCCALL);
-  Inst.addOperand(MCOperand::CreateExpr(Expr));
-
-  getParser().getStreamer().EmitInstruction(Inst, STI);
-  return false;
-}
-
-/// ::= .loh <lohName | lohId> label1, ..., labelN
-/// The number of arguments depends on the loh identifier.
-bool ARM64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
-  if (IDVal != MCLOHDirectiveName())
-    return true;
-  MCLOHType Kind;
-  if (getParser().getTok().isNot(AsmToken::Identifier)) {
-    if (getParser().getTok().isNot(AsmToken::Integer))
-      return TokError("expected an identifier or a number in directive");
-    // We successfully get a numeric value for the identifier.
-    // Check if it is valid.
-    int64_t Id = getParser().getTok().getIntVal();
-    Kind = (MCLOHType)Id;
-    // Check that Id does not overflow MCLOHType.
-    if (!isValidMCLOHType(Kind) || Id != Kind)
-      return TokError("invalid numeric identifier in directive");
-  } else {
-    StringRef Name = getTok().getIdentifier();
-    // We successfully parse an identifier.
-    // Check if it is a recognized one.
-    int Id = MCLOHNameToId(Name);
-
-    if (Id == -1)
-      return TokError("invalid identifier in directive");
-    Kind = (MCLOHType)Id;
-  }
-  // Consume the identifier.
-  Lex();
-  // Get the number of arguments of this LOH.
-  int NbArgs = MCLOHIdToNbArgs(Kind);
-
-  assert(NbArgs != -1 && "Invalid number of arguments");
-
-  SmallVector<MCSymbol *, 3> Args;
-  for (int Idx = 0; Idx < NbArgs; ++Idx) {
-    StringRef Name;
-    if (getParser().parseIdentifier(Name))
-      return TokError("expected identifier in directive");
-    Args.push_back(getContext().GetOrCreateSymbol(Name));
-
-    if (Idx + 1 == NbArgs)
-      break;
-    if (getLexer().isNot(AsmToken::Comma))
-      return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
-    Lex();
-  }
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
-
-  getStreamer().EmitLOHDirective((MCLOHType)Kind, Args);
-  return false;
-}
-
-bool
-ARM64AsmParser::classifySymbolRef(const MCExpr *Expr,
-                                  ARM64MCExpr::VariantKind &ELFRefKind,
-                                  MCSymbolRefExpr::VariantKind &DarwinRefKind,
-                                  const MCConstantExpr *&Addend) {
-  ELFRefKind = ARM64MCExpr::VK_INVALID;
-  DarwinRefKind = MCSymbolRefExpr::VK_None;
-
-  if (const ARM64MCExpr *AE = dyn_cast<ARM64MCExpr>(Expr)) {
-    ELFRefKind = AE->getKind();
-    Expr = AE->getSubExpr();
-  }
-
-  const MCSymbolRefExpr *SE = dyn_cast<MCSymbolRefExpr>(Expr);
-  if (SE) {
-    // It's a simple symbol reference with no addend.
-    DarwinRefKind = SE->getKind();
-    Addend = 0;
-    return true;
-  }
-
-  const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr);
-  if (!BE)
-    return false;
-
-  SE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
-  if (!SE)
-    return false;
-  DarwinRefKind = SE->getKind();
-
-  if (BE->getOpcode() != MCBinaryExpr::Add)
-    return false;
-
-  // See if the addend is is a constant, otherwise there's more going
-  // on here than we can deal with.
-  Addend = dyn_cast<MCConstantExpr>(BE->getRHS());
-  if (!Addend)
-    return false;
-
-  // It's some symbol reference + a constant addend, but really
-  // shouldn't use both Darwin and ELF syntax.
-  return ELFRefKind == ARM64MCExpr::VK_INVALID ||
-         DarwinRefKind == MCSymbolRefExpr::VK_None;
-}
-
-/// Force static initialization.
-extern "C" void LLVMInitializeARM64AsmParser() {
-  RegisterMCAsmParser<ARM64AsmParser> X(TheARM64Target);
-}
-
-#define GET_REGISTER_MATCHER
-#define GET_MATCHER_IMPLEMENTATION
-#include "ARM64GenAsmMatcher.inc"
-
-// Define this matcher function after the auto-generated include so we
-// have the match class enum definitions.
-unsigned ARM64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
-                                                    unsigned Kind) {
-  ARM64Operand *Op = static_cast<ARM64Operand *>(AsmOp);
-  // If the kind is a token for a literal immediate, check if our asm
-  // operand matches. This is for InstAliases which have a fixed-value
-  // immediate in the syntax.
-  int64_t ExpectedVal;
-  switch (Kind) {
-  default:
-    return Match_InvalidOperand;
-  case MCK__35_0:
-    ExpectedVal = 0;
-    break;
-  case MCK__35_1:
-    ExpectedVal = 1;
-    break;
-  case MCK__35_12:
-    ExpectedVal = 12;
-    break;
-  case MCK__35_16:
-    ExpectedVal = 16;
-    break;
-  case MCK__35_2:
-    ExpectedVal = 2;
-    break;
-  case MCK__35_24:
-    ExpectedVal = 24;
-    break;
-  case MCK__35_3:
-    ExpectedVal = 3;
-    break;
-  case MCK__35_32:
-    ExpectedVal = 32;
-    break;
-  case MCK__35_4:
-    ExpectedVal = 4;
-    break;
-  case MCK__35_48:
-    ExpectedVal = 48;
-    break;
-  case MCK__35_6:
-    ExpectedVal = 6;
-    break;
-  case MCK__35_64:
-    ExpectedVal = 64;
-    break;
-  case MCK__35_8:
-    ExpectedVal = 8;
-    break;
-  }
-  if (!Op->isImm())
-    return Match_InvalidOperand;
-  const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
-  if (!CE)
-    return Match_InvalidOperand;
-  if (CE->getValue() == ExpectedVal)
-    return Match_Success;
-  return Match_InvalidOperand;
-}
diff --git a/lib/Target/ARM64/AsmParser/CMakeLists.txt b/lib/Target/ARM64/AsmParser/CMakeLists.txt
deleted file mode 100644
index 826158b..0000000
--- a/lib/Target/ARM64/AsmParser/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARM64AsmParser
-  ARM64AsmParser.cpp
-  )
-
diff --git a/lib/Target/ARM64/AsmParser/LLVMBuild.txt b/lib/Target/ARM64/AsmParser/LLVMBuild.txt
deleted file mode 100644
index 2c8fafe..0000000
--- a/lib/Target/ARM64/AsmParser/LLVMBuild.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-;===- ./lib/Target/ARM64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ARM64AsmParser
-parent = ARM64
-required_libraries = ARM64Desc ARM64Info MC MCParser Support
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/AsmParser/Makefile b/lib/Target/ARM64/AsmParser/Makefile
deleted file mode 100644
index d25c47f..0000000
--- a/lib/Target/ARM64/AsmParser/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64AsmParser
-
-# Hack: we need to include 'main' ARM target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/CMakeLists.txt b/lib/Target/ARM64/CMakeLists.txt
deleted file mode 100644
index 6de861c..0000000
--- a/lib/Target/ARM64/CMakeLists.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS ARM64.td)
-
-tablegen(LLVM ARM64GenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM ARM64GenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM ARM64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-tablegen(LLVM ARM64GenMCPseudoLowering.inc -gen-pseudo-lowering)
-tablegen(LLVM ARM64GenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM ARM64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
-tablegen(LLVM ARM64GenAsmMatcher.inc -gen-asm-matcher)
-tablegen(LLVM ARM64GenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM ARM64GenFastISel.inc -gen-fast-isel)
-tablegen(LLVM ARM64GenCallingConv.inc -gen-callingconv)
-tablegen(LLVM ARM64GenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM ARM64GenDisassemblerTables.inc -gen-disassembler)
-add_public_tablegen_target(ARM64CommonTableGen)
-
-add_llvm_target(ARM64CodeGen
-  ARM64AddressTypePromotion.cpp
-  ARM64AdvSIMDScalarPass.cpp
-  ARM64AsmPrinter.cpp
-  ARM64BranchRelaxation.cpp
-  ARM64CleanupLocalDynamicTLSPass.cpp
-  ARM64CollectLOH.cpp
-  ARM64ConditionalCompares.cpp
-  ARM64DeadRegisterDefinitionsPass.cpp
-  ARM64ExpandPseudoInsts.cpp
-  ARM64FastISel.cpp
-  ARM64FrameLowering.cpp
-  ARM64ISelDAGToDAG.cpp
-  ARM64ISelLowering.cpp
-  ARM64InstrInfo.cpp
-  ARM64LoadStoreOptimizer.cpp
-  ARM64MCInstLower.cpp
-  ARM64PromoteConstant.cpp
-  ARM64RegisterInfo.cpp
-  ARM64SelectionDAGInfo.cpp
-  ARM64StorePairSuppress.cpp
-  ARM64Subtarget.cpp
-  ARM64TargetMachine.cpp
-  ARM64TargetObjectFile.cpp
-  ARM64TargetTransformInfo.cpp
-)
-
-add_dependencies(LLVMARM64CodeGen intrinsics_gen)
-
-add_subdirectory(TargetInfo)
-add_subdirectory(AsmParser)
-add_subdirectory(Disassembler)
-add_subdirectory(InstPrinter)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
deleted file mode 100644
index 44c501f..0000000
--- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
+++ /dev/null
@@ -1,2142 +0,0 @@
-//===- ARM64Disassembler.cpp - Disassembler for ARM64 -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-disassembler"
-
-#include "ARM64Disassembler.h"
-#include "ARM64Subtarget.h"
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-
-// Pull DecodeStatus and its enum values into the global namespace.
-typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
-
-// Forward declare these because the autogenerated code will reference them.
-// Definitions are further down.
-static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
-                                              unsigned RegNo, uint64_t Address,
-                                              const void *Decoder);
-static DecodeStatus DecodeFPR128_loRegisterClass(llvm::MCInst &Inst,
-                                                 unsigned RegNo,
-                                                 uint64_t Address,
-                                                 const void *Decoder);
-static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                             uint64_t Address,
-                                             const void *Decoder);
-static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                             uint64_t Address,
-                                             const void *Decoder);
-static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                             uint64_t Address,
-                                             const void *Decoder);
-static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                            uint64_t Address,
-                                            const void *Decoder);
-static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                             uint64_t Address,
-                                             const void *Decoder);
-static DecodeStatus DecodeGPR64spRegisterClass(llvm::MCInst &Inst,
-                                               unsigned RegNo, uint64_t Address,
-                                               const void *Decoder);
-static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                             uint64_t Address,
-                                             const void *Decoder);
-static DecodeStatus DecodeGPR32spRegisterClass(llvm::MCInst &Inst,
-                                               unsigned RegNo, uint64_t Address,
-                                               const void *Decoder);
-static DecodeStatus DecodeQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                          uint64_t Address,
-                                          const void *Decoder);
-static DecodeStatus DecodeQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                           uint64_t Address,
-                                           const void *Decoder);
-static DecodeStatus DecodeQQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                            uint64_t Address,
-                                            const void *Decoder);
-static DecodeStatus DecodeDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                          uint64_t Address,
-                                          const void *Decoder);
-static DecodeStatus DecodeDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                           uint64_t Address,
-                                           const void *Decoder);
-static DecodeStatus DecodeDDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
-                                            uint64_t Address,
-                                            const void *Decoder);
-
-static DecodeStatus DecodeFixedPointScaleImm(llvm::MCInst &Inst, unsigned Imm,
-                                             uint64_t Address,
-                                             const void *Decoder);
-static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm,
-                                           uint64_t Address,
-                                           const void *Decoder);
-static DecodeStatus DecodeSystemRegister(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
-                                                   uint32_t insn,
-                                                   uint64_t Address,
-                                                   const void *Decoder);
-static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
-                                             uint64_t Address,
-                                             const void *Decoder);
-static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
-                                                  uint32_t insn,
-                                                  uint64_t Address,
-                                                  const void *Decoder);
-static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
-                                                uint32_t insn, uint64_t Address,
-                                                const void *Decoder);
-static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
-                                                   uint32_t insn,
-                                                   uint64_t Address,
-                                                   const void *Decoder);
-static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
-                                              uint64_t Address,
-                                              const void *Decoder);
-static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst,
-                                                   uint32_t insn,
-                                                   uint64_t Address,
-                                                   const void *Decoder);
-static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
-                                                uint32_t insn, uint64_t Address,
-                                                const void *Decoder);
-static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
-                                                uint32_t insn, uint64_t Address,
-                                                const void *Decoder);
-static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
-                                            uint64_t Address,
-                                            const void *Decoder);
-static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
-                                                uint32_t insn, uint64_t Address,
-                                                const void *Decoder);
-static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
-                                         uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
-                                        uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
-                                              uint64_t Address,
-                                              const void *Decoder);
-static DecodeStatus DecodeSystemCPSRInstruction(llvm::MCInst &Inst,
-                                                uint32_t insn, uint64_t Address,
-                                                const void *Decoder);
-static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
-                                        uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSIMDLdStPost(llvm::MCInst &Inst, uint32_t insn,
-                                       uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeSIMDLdStSingle(llvm::MCInst &Inst, uint32_t insn,
-                                         uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeSIMDLdStSingleTied(llvm::MCInst &Inst, uint32_t insn,
-                                             uint64_t Addr,
-                                             const void *Decoder);
-
-static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr,
-                                               const void *Decoder);
-static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr,
-                                               const void *Decoder);
-static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr,
-                                               const void *Decoder);
-static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm,
-                                        uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
-                                        uint64_t Addr, const void *Decoder);
-
-#include "ARM64GenDisassemblerTables.inc"
-#include "ARM64GenInstrInfo.inc"
-
-using namespace llvm;
-
-#define Success llvm::MCDisassembler::Success
-#define Fail llvm::MCDisassembler::Fail
-
-static MCDisassembler *createARM64Disassembler(const Target &T,
-                                               const MCSubtargetInfo &STI) {
-  return new ARM64Disassembler(STI);
-}
-
-DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
-                                               const MemoryObject &Region,
-                                               uint64_t Address,
-                                               raw_ostream &os,
-                                               raw_ostream &cs) const {
-  CommentStream = &cs;
-
-  uint8_t bytes[4];
-
-  Size = 0;
-  // We want to read exactly 4 bytes of data.
-  if (Region.readBytes(Address, 4, (uint8_t *)bytes) == -1)
-    return Fail;
-  Size = 4;
-
-  // Encoded as a small-endian 32-bit word in the stream.
-  uint32_t insn =
-      (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | (bytes[0] << 0);
-
-  // Calling the auto-generated decoder function.
-  DecodeStatus result =
-      decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
-  if (!result)
-    return Fail;
-
-  return Success;
-}
-
-static MCSymbolRefExpr::VariantKind
-getVariant(uint64_t LLVMDisassembler_VariantKind) {
-  switch (LLVMDisassembler_VariantKind) {
-  case LLVMDisassembler_VariantKind_None:
-    return MCSymbolRefExpr::VK_None;
-  case LLVMDisassembler_VariantKind_ARM64_PAGE:
-    return MCSymbolRefExpr::VK_PAGE;
-  case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
-    return MCSymbolRefExpr::VK_PAGEOFF;
-  case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
-    return MCSymbolRefExpr::VK_GOTPAGE;
-  case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
-    return MCSymbolRefExpr::VK_GOTPAGEOFF;
-  case LLVMDisassembler_VariantKind_ARM64_TLVP:
-  case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
-  default:
-    assert(0 && "bad LLVMDisassembler_VariantKind");
-    return MCSymbolRefExpr::VK_None;
-  }
-}
-
-/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
-/// operand in place of the immediate Value in the MCInst.  The immediate
-/// Value has not had any PC adjustment made by the caller. If the instruction
-/// is a branch that adds the PC to the immediate Value then isBranch is
-/// Success, else Fail.  If the getOpInfo() function was set as part of the
-/// setupForSymbolicDisassembly() call then that function is called to get any
-/// symbolic information at the Address for this instrution.  If that returns
-/// non-zero then the symbolic information it returns is used to create an
-/// MCExpr and that is added as an operand to the MCInst.  If getOpInfo()
-/// returns zero and isBranch is Success then a symbol look up for
-/// Address + Value is done and if a symbol is found an MCExpr is created with
-/// that, else an MCExpr with Address + Value is created.  If getOpInfo()
-/// returns zero and isBranch is Fail then the the Opcode of the MCInst is
-/// tested and for ADRP an other instructions that help to load of pointers
-/// a symbol look up is done to see it is returns a specific reference type
-/// to add to the comment stream.  This function returns Success if it adds
-/// an operand to the MCInst and Fail otherwise.
-bool ARM64Disassembler::tryAddingSymbolicOperand(uint64_t Address, int Value,
-                                                 bool isBranch,
-                                                 uint64_t InstSize, MCInst &MI,
-                                                 uint32_t insn) const {
-  LLVMOpInfoCallback getOpInfo = getLLVMOpInfoCallback();
-
-  struct LLVMOpInfo1 SymbolicOp;
-  memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
-  SymbolicOp.Value = Value;
-  void *DisInfo = getDisInfoBlock();
-  uint64_t ReferenceType;
-  const char *ReferenceName;
-  const char *Name;
-  LLVMSymbolLookupCallback SymbolLookUp = getLLVMSymbolLookupCallback();
-  if (!getOpInfo ||
-      !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
-    if (isBranch) {
-      if (SymbolLookUp) {
-        ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
-        Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
-                            &ReferenceName);
-        if (Name) {
-          SymbolicOp.AddSymbol.Name = Name;
-          SymbolicOp.AddSymbol.Present = Success;
-          SymbolicOp.Value = 0;
-        } else {
-          SymbolicOp.Value = Address + Value;
-        }
-        if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
-          (*CommentStream) << "symbol stub for: " << ReferenceName;
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_Message)
-          (*CommentStream) << "Objc message: " << ReferenceName;
-      } else {
-        return false;
-      }
-    } else if (MI.getOpcode() == ARM64::ADRP) {
-      if (SymbolLookUp) {
-        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
-        Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address,
-                            &ReferenceName);
-        (*CommentStream) << format("0x%llx",
-                                   0xfffffffffffff000LL & (Address + Value));
-      } else {
-        return false;
-      }
-    } else if (MI.getOpcode() == ARM64::ADDXri ||
-               MI.getOpcode() == ARM64::LDRXui ||
-               MI.getOpcode() == ARM64::LDRXl || MI.getOpcode() == ARM64::ADR) {
-      if (SymbolLookUp) {
-        if (MI.getOpcode() == ARM64::ADDXri)
-          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
-        else if (MI.getOpcode() == ARM64::LDRXui)
-          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
-        if (MI.getOpcode() == ARM64::LDRXl) {
-          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
-          Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
-                              &ReferenceName);
-        } else if (MI.getOpcode() == ARM64::ADR) {
-          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
-          Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
-                              &ReferenceName);
-        } else {
-          Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address,
-                              &ReferenceName);
-        }
-        if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
-          (*CommentStream) << "literal pool symbol address: " << ReferenceName;
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
-          (*CommentStream) << "literal pool for: \"" << ReferenceName << "\"";
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
-          (*CommentStream) << "Objc cfstring ref: @\"" << ReferenceName << "\"";
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_Message)
-          (*CommentStream) << "Objc message: " << ReferenceName;
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
-          (*CommentStream) << "Objc message ref: " << ReferenceName;
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
-          (*CommentStream) << "Objc selector ref: " << ReferenceName;
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
-          (*CommentStream) << "Objc class ref: " << ReferenceName;
-        // For these instructions, the SymbolLookUp() above is just to get the
-        // ReferenceType and ReferenceName.  We want to make sure not to
-        // fall through so we don't build an MCExpr to leave the disassembly
-        // of the immediate values of these instructions to the InstPrinter.
-        return false;
-      } else {
-        return false;
-      }
-    } else {
-      return false;
-    }
-  }
-
-  MCContext *Ctx = getMCContext();
-  const MCExpr *Add = NULL;
-  if (SymbolicOp.AddSymbol.Present) {
-    if (SymbolicOp.AddSymbol.Name) {
-      StringRef Name(SymbolicOp.AddSymbol.Name);
-      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
-      MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
-      if (Variant != MCSymbolRefExpr::VK_None)
-        Add = MCSymbolRefExpr::Create(Sym, Variant, *Ctx);
-      else
-        Add = MCSymbolRefExpr::Create(Sym, *Ctx);
-    } else {
-      Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
-    }
-  }
-
-  const MCExpr *Sub = NULL;
-  if (SymbolicOp.SubtractSymbol.Present) {
-    if (SymbolicOp.SubtractSymbol.Name) {
-      StringRef Name(SymbolicOp.SubtractSymbol.Name);
-      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
-      Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
-    } else {
-      Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
-    }
-  }
-
-  const MCExpr *Off = NULL;
-  if (SymbolicOp.Value != 0)
-    Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
-
-  const MCExpr *Expr;
-  if (Sub) {
-    const MCExpr *LHS;
-    if (Add)
-      LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
-    else
-      LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
-    if (Off != 0)
-      Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
-    else
-      Expr = LHS;
-  } else if (Add) {
-    if (Off != 0)
-      Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
-    else
-      Expr = Add;
-  } else {
-    if (Off != 0)
-      Expr = Off;
-    else
-      Expr = MCConstantExpr::Create(0, *Ctx);
-  }
-
-  MI.addOperand(MCOperand::CreateExpr(Expr));
-
-  return true;
-}
-
-extern "C" void LLVMInitializeARM64Disassembler() {
-  TargetRegistry::RegisterMCDisassembler(TheARM64Target,
-                                         createARM64Disassembler);
-}
-
-static const unsigned FPR128DecoderTable[] = {
-  ARM64::Q0,  ARM64::Q1,  ARM64::Q2,  ARM64::Q3,  ARM64::Q4,  ARM64::Q5,
-  ARM64::Q6,  ARM64::Q7,  ARM64::Q8,  ARM64::Q9,  ARM64::Q10, ARM64::Q11,
-  ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17,
-  ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23,
-  ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29,
-  ARM64::Q30, ARM64::Q31
-};
-
-static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
-                                              uint64_t Addr,
-                                              const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-
-  unsigned Register = FPR128DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
-                                                 uint64_t Addr,
-                                                 const void *Decoder) {
-  if (RegNo > 15)
-    return Fail;
-  return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
-}
-
-static const unsigned FPR64DecoderTable[] = {
-  ARM64::D0,  ARM64::D1,  ARM64::D2,  ARM64::D3,  ARM64::D4,  ARM64::D5,
-  ARM64::D6,  ARM64::D7,  ARM64::D8,  ARM64::D9,  ARM64::D10, ARM64::D11,
-  ARM64::D12, ARM64::D13, ARM64::D14, ARM64::D15, ARM64::D16, ARM64::D17,
-  ARM64::D18, ARM64::D19, ARM64::D20, ARM64::D21, ARM64::D22, ARM64::D23,
-  ARM64::D24, ARM64::D25, ARM64::D26, ARM64::D27, ARM64::D28, ARM64::D29,
-  ARM64::D30, ARM64::D31
-};
-
-static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
-                                             uint64_t Addr,
-                                             const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-
-  unsigned Register = FPR64DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned FPR32DecoderTable[] = {
-  ARM64::S0,  ARM64::S1,  ARM64::S2,  ARM64::S3,  ARM64::S4,  ARM64::S5,
-  ARM64::S6,  ARM64::S7,  ARM64::S8,  ARM64::S9,  ARM64::S10, ARM64::S11,
-  ARM64::S12, ARM64::S13, ARM64::S14, ARM64::S15, ARM64::S16, ARM64::S17,
-  ARM64::S18, ARM64::S19, ARM64::S20, ARM64::S21, ARM64::S22, ARM64::S23,
-  ARM64::S24, ARM64::S25, ARM64::S26, ARM64::S27, ARM64::S28, ARM64::S29,
-  ARM64::S30, ARM64::S31
-};
-
-static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
-                                             uint64_t Addr,
-                                             const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-
-  unsigned Register = FPR32DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned FPR16DecoderTable[] = {
-  ARM64::H0,  ARM64::H1,  ARM64::H2,  ARM64::H3,  ARM64::H4,  ARM64::H5,
-  ARM64::H6,  ARM64::H7,  ARM64::H8,  ARM64::H9,  ARM64::H10, ARM64::H11,
-  ARM64::H12, ARM64::H13, ARM64::H14, ARM64::H15, ARM64::H16, ARM64::H17,
-  ARM64::H18, ARM64::H19, ARM64::H20, ARM64::H21, ARM64::H22, ARM64::H23,
-  ARM64::H24, ARM64::H25, ARM64::H26, ARM64::H27, ARM64::H28, ARM64::H29,
-  ARM64::H30, ARM64::H31
-};
-
-static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
-                                             uint64_t Addr,
-                                             const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-
-  unsigned Register = FPR16DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned FPR8DecoderTable[] = {
-  ARM64::B0,  ARM64::B1,  ARM64::B2,  ARM64::B3,  ARM64::B4,  ARM64::B5,
-  ARM64::B6,  ARM64::B7,  ARM64::B8,  ARM64::B9,  ARM64::B10, ARM64::B11,
-  ARM64::B12, ARM64::B13, ARM64::B14, ARM64::B15, ARM64::B16, ARM64::B17,
-  ARM64::B18, ARM64::B19, ARM64::B20, ARM64::B21, ARM64::B22, ARM64::B23,
-  ARM64::B24, ARM64::B25, ARM64::B26, ARM64::B27, ARM64::B28, ARM64::B29,
-  ARM64::B30, ARM64::B31
-};
-
-static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
-                                            uint64_t Addr,
-                                            const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-
-  unsigned Register = FPR8DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned GPR64DecoderTable[] = {
-  ARM64::X0,  ARM64::X1,  ARM64::X2,  ARM64::X3,  ARM64::X4,  ARM64::X5,
-  ARM64::X6,  ARM64::X7,  ARM64::X8,  ARM64::X9,  ARM64::X10, ARM64::X11,
-  ARM64::X12, ARM64::X13, ARM64::X14, ARM64::X15, ARM64::X16, ARM64::X17,
-  ARM64::X18, ARM64::X19, ARM64::X20, ARM64::X21, ARM64::X22, ARM64::X23,
-  ARM64::X24, ARM64::X25, ARM64::X26, ARM64::X27, ARM64::X28, ARM64::FP,
-  ARM64::LR,  ARM64::XZR
-};
-
-static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
-                                             uint64_t Addr,
-                                             const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-
-  unsigned Register = GPR64DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
-                                               uint64_t Addr,
-                                               const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-  unsigned Register = GPR64DecoderTable[RegNo];
-  if (Register == ARM64::XZR)
-    Register = ARM64::SP;
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned GPR32DecoderTable[] = {
-  ARM64::W0,  ARM64::W1,  ARM64::W2,  ARM64::W3,  ARM64::W4,  ARM64::W5,
-  ARM64::W6,  ARM64::W7,  ARM64::W8,  ARM64::W9,  ARM64::W10, ARM64::W11,
-  ARM64::W12, ARM64::W13, ARM64::W14, ARM64::W15, ARM64::W16, ARM64::W17,
-  ARM64::W18, ARM64::W19, ARM64::W20, ARM64::W21, ARM64::W22, ARM64::W23,
-  ARM64::W24, ARM64::W25, ARM64::W26, ARM64::W27, ARM64::W28, ARM64::W29,
-  ARM64::W30, ARM64::WZR
-};
-
-static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
-                                             uint64_t Addr,
-                                             const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-
-  unsigned Register = GPR32DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
-                                               uint64_t Addr,
-                                               const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-
-  unsigned Register = GPR32DecoderTable[RegNo];
-  if (Register == ARM64::WZR)
-    Register = ARM64::WSP;
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned VectorDecoderTable[] = {
-  ARM64::Q0,  ARM64::Q1,  ARM64::Q2,  ARM64::Q3,  ARM64::Q4,  ARM64::Q5,
-  ARM64::Q6,  ARM64::Q7,  ARM64::Q8,  ARM64::Q9,  ARM64::Q10, ARM64::Q11,
-  ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17,
-  ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23,
-  ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29,
-  ARM64::Q30, ARM64::Q31
-};
-
-static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo,
-                                              uint64_t Addr,
-                                              const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-
-  unsigned Register = VectorDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned QQDecoderTable[] = {
-  ARM64::Q0_Q1,   ARM64::Q1_Q2,   ARM64::Q2_Q3,   ARM64::Q3_Q4,
-  ARM64::Q4_Q5,   ARM64::Q5_Q6,   ARM64::Q6_Q7,   ARM64::Q7_Q8,
-  ARM64::Q8_Q9,   ARM64::Q9_Q10,  ARM64::Q10_Q11, ARM64::Q11_Q12,
-  ARM64::Q12_Q13, ARM64::Q13_Q14, ARM64::Q14_Q15, ARM64::Q15_Q16,
-  ARM64::Q16_Q17, ARM64::Q17_Q18, ARM64::Q18_Q19, ARM64::Q19_Q20,
-  ARM64::Q20_Q21, ARM64::Q21_Q22, ARM64::Q22_Q23, ARM64::Q23_Q24,
-  ARM64::Q24_Q25, ARM64::Q25_Q26, ARM64::Q26_Q27, ARM64::Q27_Q28,
-  ARM64::Q28_Q29, ARM64::Q29_Q30, ARM64::Q30_Q31, ARM64::Q31_Q0
-};
-
-static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
-                                          uint64_t Addr, const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-  unsigned Register = QQDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned QQQDecoderTable[] = {
-  ARM64::Q0_Q1_Q2,    ARM64::Q1_Q2_Q3,    ARM64::Q2_Q3_Q4,
-  ARM64::Q3_Q4_Q5,    ARM64::Q4_Q5_Q6,    ARM64::Q5_Q6_Q7,
-  ARM64::Q6_Q7_Q8,    ARM64::Q7_Q8_Q9,    ARM64::Q8_Q9_Q10,
-  ARM64::Q9_Q10_Q11,  ARM64::Q10_Q11_Q12, ARM64::Q11_Q12_Q13,
-  ARM64::Q12_Q13_Q14, ARM64::Q13_Q14_Q15, ARM64::Q14_Q15_Q16,
-  ARM64::Q15_Q16_Q17, ARM64::Q16_Q17_Q18, ARM64::Q17_Q18_Q19,
-  ARM64::Q18_Q19_Q20, ARM64::Q19_Q20_Q21, ARM64::Q20_Q21_Q22,
-  ARM64::Q21_Q22_Q23, ARM64::Q22_Q23_Q24, ARM64::Q23_Q24_Q25,
-  ARM64::Q24_Q25_Q26, ARM64::Q25_Q26_Q27, ARM64::Q26_Q27_Q28,
-  ARM64::Q27_Q28_Q29, ARM64::Q28_Q29_Q30, ARM64::Q29_Q30_Q31,
-  ARM64::Q30_Q31_Q0,  ARM64::Q31_Q0_Q1
-};
-
-static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
-                                           uint64_t Addr, const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-  unsigned Register = QQQDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned QQQQDecoderTable[] = {
-  ARM64::Q0_Q1_Q2_Q3,     ARM64::Q1_Q2_Q3_Q4,     ARM64::Q2_Q3_Q4_Q5,
-  ARM64::Q3_Q4_Q5_Q6,     ARM64::Q4_Q5_Q6_Q7,     ARM64::Q5_Q6_Q7_Q8,
-  ARM64::Q6_Q7_Q8_Q9,     ARM64::Q7_Q8_Q9_Q10,    ARM64::Q8_Q9_Q10_Q11,
-  ARM64::Q9_Q10_Q11_Q12,  ARM64::Q10_Q11_Q12_Q13, ARM64::Q11_Q12_Q13_Q14,
-  ARM64::Q12_Q13_Q14_Q15, ARM64::Q13_Q14_Q15_Q16, ARM64::Q14_Q15_Q16_Q17,
-  ARM64::Q15_Q16_Q17_Q18, ARM64::Q16_Q17_Q18_Q19, ARM64::Q17_Q18_Q19_Q20,
-  ARM64::Q18_Q19_Q20_Q21, ARM64::Q19_Q20_Q21_Q22, ARM64::Q20_Q21_Q22_Q23,
-  ARM64::Q21_Q22_Q23_Q24, ARM64::Q22_Q23_Q24_Q25, ARM64::Q23_Q24_Q25_Q26,
-  ARM64::Q24_Q25_Q26_Q27, ARM64::Q25_Q26_Q27_Q28, ARM64::Q26_Q27_Q28_Q29,
-  ARM64::Q27_Q28_Q29_Q30, ARM64::Q28_Q29_Q30_Q31, ARM64::Q29_Q30_Q31_Q0,
-  ARM64::Q30_Q31_Q0_Q1,   ARM64::Q31_Q0_Q1_Q2
-};
-
-static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
-                                            uint64_t Addr,
-                                            const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-  unsigned Register = QQQQDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned DDDecoderTable[] = {
-  ARM64::D0_D1,   ARM64::D1_D2,   ARM64::D2_D3,   ARM64::D3_D4,
-  ARM64::D4_D5,   ARM64::D5_D6,   ARM64::D6_D7,   ARM64::D7_D8,
-  ARM64::D8_D9,   ARM64::D9_D10,  ARM64::D10_D11, ARM64::D11_D12,
-  ARM64::D12_D13, ARM64::D13_D14, ARM64::D14_D15, ARM64::D15_D16,
-  ARM64::D16_D17, ARM64::D17_D18, ARM64::D18_D19, ARM64::D19_D20,
-  ARM64::D20_D21, ARM64::D21_D22, ARM64::D22_D23, ARM64::D23_D24,
-  ARM64::D24_D25, ARM64::D25_D26, ARM64::D26_D27, ARM64::D27_D28,
-  ARM64::D28_D29, ARM64::D29_D30, ARM64::D30_D31, ARM64::D31_D0
-};
-
-static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
-                                          uint64_t Addr, const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-  unsigned Register = DDDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned DDDDecoderTable[] = {
-  ARM64::D0_D1_D2,    ARM64::D1_D2_D3,    ARM64::D2_D3_D4,
-  ARM64::D3_D4_D5,    ARM64::D4_D5_D6,    ARM64::D5_D6_D7,
-  ARM64::D6_D7_D8,    ARM64::D7_D8_D9,    ARM64::D8_D9_D10,
-  ARM64::D9_D10_D11,  ARM64::D10_D11_D12, ARM64::D11_D12_D13,
-  ARM64::D12_D13_D14, ARM64::D13_D14_D15, ARM64::D14_D15_D16,
-  ARM64::D15_D16_D17, ARM64::D16_D17_D18, ARM64::D17_D18_D19,
-  ARM64::D18_D19_D20, ARM64::D19_D20_D21, ARM64::D20_D21_D22,
-  ARM64::D21_D22_D23, ARM64::D22_D23_D24, ARM64::D23_D24_D25,
-  ARM64::D24_D25_D26, ARM64::D25_D26_D27, ARM64::D26_D27_D28,
-  ARM64::D27_D28_D29, ARM64::D28_D29_D30, ARM64::D29_D30_D31,
-  ARM64::D30_D31_D0,  ARM64::D31_D0_D1
-};
-
-static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
-                                           uint64_t Addr, const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-  unsigned Register = DDDDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static const unsigned DDDDDecoderTable[] = {
-  ARM64::D0_D1_D2_D3,     ARM64::D1_D2_D3_D4,     ARM64::D2_D3_D4_D5,
-  ARM64::D3_D4_D5_D6,     ARM64::D4_D5_D6_D7,     ARM64::D5_D6_D7_D8,
-  ARM64::D6_D7_D8_D9,     ARM64::D7_D8_D9_D10,    ARM64::D8_D9_D10_D11,
-  ARM64::D9_D10_D11_D12,  ARM64::D10_D11_D12_D13, ARM64::D11_D12_D13_D14,
-  ARM64::D12_D13_D14_D15, ARM64::D13_D14_D15_D16, ARM64::D14_D15_D16_D17,
-  ARM64::D15_D16_D17_D18, ARM64::D16_D17_D18_D19, ARM64::D17_D18_D19_D20,
-  ARM64::D18_D19_D20_D21, ARM64::D19_D20_D21_D22, ARM64::D20_D21_D22_D23,
-  ARM64::D21_D22_D23_D24, ARM64::D22_D23_D24_D25, ARM64::D23_D24_D25_D26,
-  ARM64::D24_D25_D26_D27, ARM64::D25_D26_D27_D28, ARM64::D26_D27_D28_D29,
-  ARM64::D27_D28_D29_D30, ARM64::D28_D29_D30_D31, ARM64::D29_D30_D31_D0,
-  ARM64::D30_D31_D0_D1,   ARM64::D31_D0_D1_D2
-};
-
-static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
-                                            uint64_t Addr,
-                                            const void *Decoder) {
-  if (RegNo > 31)
-    return Fail;
-  unsigned Register = DDDDDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::CreateReg(Register));
-  return Success;
-}
-
-static DecodeStatus DecodeFixedPointScaleImm(llvm::MCInst &Inst, unsigned Imm,
-                                             uint64_t Addr,
-                                             const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(64 - Imm));
-  return Success;
-}
-
-static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm,
-                                           uint64_t Addr, const void *Decoder) {
-  int64_t ImmVal = Imm;
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
-
-  // Sign-extend 19-bit immediate.
-  if (ImmVal & (1 << (19 - 1)))
-    ImmVal |= ~((1LL << 19) - 1);
-
-  if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal << 2,
-                                     Inst.getOpcode() != ARM64::LDRXl, 4, Inst))
-    Inst.addOperand(MCOperand::CreateImm(ImmVal));
-  return Success;
-}
-
-static DecodeStatus DecodeSystemRegister(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Address,
-                                         const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(Imm | 0x8000));
-  return Success;
-}
-
-static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm,
-                                       unsigned Add) {
-  Inst.addOperand(MCOperand::CreateImm(Add - Imm));
-  return Success;
-}
-
-static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm,
-                                       unsigned Add) {
-  Inst.addOperand(MCOperand::CreateImm((Imm + Add) & (Add - 1)));
-  return Success;
-}
-
-static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder) {
-  return DecodeVecShiftRImm(Inst, Imm, 64);
-}
-
-static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr,
-                                               const void *Decoder) {
-  return DecodeVecShiftRImm(Inst, Imm | 0x20, 64);
-}
-
-static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder) {
-  return DecodeVecShiftRImm(Inst, Imm, 32);
-}
-
-static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr,
-                                               const void *Decoder) {
-  return DecodeVecShiftRImm(Inst, Imm | 0x10, 32);
-}
-
-static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder) {
-  return DecodeVecShiftRImm(Inst, Imm, 16);
-}
-
-static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr,
-                                               const void *Decoder) {
-  return DecodeVecShiftRImm(Inst, Imm | 0x8, 16);
-}
-
-static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm,
-                                        uint64_t Addr, const void *Decoder) {
-  return DecodeVecShiftRImm(Inst, Imm, 8);
-}
-
-static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder) {
-  return DecodeVecShiftLImm(Inst, Imm, 64);
-}
-
-static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder) {
-  return DecodeVecShiftLImm(Inst, Imm, 32);
-}
-
-static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
-                                         uint64_t Addr, const void *Decoder) {
-  return DecodeVecShiftLImm(Inst, Imm, 16);
-}
-
-static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
-                                        uint64_t Addr, const void *Decoder) {
-  return DecodeVecShiftLImm(Inst, Imm, 8);
-}
-
-static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
-                                                   uint32_t insn, uint64_t Addr,
-                                                   const void *Decoder) {
-  unsigned Rd = fieldFromInstruction(insn, 0, 5);
-  unsigned Rn = fieldFromInstruction(insn, 5, 5);
-  unsigned Rm = fieldFromInstruction(insn, 16, 5);
-  unsigned shiftHi = fieldFromInstruction(insn, 22, 2);
-  unsigned shiftLo = fieldFromInstruction(insn, 10, 6);
-  unsigned shift = (shiftHi << 6) | shiftLo;
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::ANDWrs:
-  case ARM64::ANDSWrs:
-  case ARM64::BICWrs:
-  case ARM64::BICSWrs:
-  case ARM64::ORRWrs:
-  case ARM64::ORNWrs:
-  case ARM64::EORWrs:
-  case ARM64::EONWrs:
-  case ARM64::ADDWrs:
-  case ARM64::ADDSWrs:
-  case ARM64::SUBWrs:
-  case ARM64::SUBSWrs: {
-    DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder);
-    DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
-    break;
-  }
-  case ARM64::ANDXrs:
-  case ARM64::ANDSXrs:
-  case ARM64::BICXrs:
-  case ARM64::BICSXrs:
-  case ARM64::ORRXrs:
-  case ARM64::ORNXrs:
-  case ARM64::EORXrs:
-  case ARM64::EONXrs:
-  case ARM64::ADDXrs:
-  case ARM64::ADDSXrs:
-  case ARM64::SUBXrs:
-  case ARM64::SUBSXrs:
-    DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
-    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
-    break;
-  }
-
-  Inst.addOperand(MCOperand::CreateImm(shift));
-  return Success;
-}
-
-static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
-                                             uint64_t Addr,
-                                             const void *Decoder) {
-  unsigned Rd = fieldFromInstruction(insn, 0, 5);
-  unsigned imm = fieldFromInstruction(insn, 5, 16);
-  unsigned shift = fieldFromInstruction(insn, 21, 2);
-  shift <<= 4;
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::MOVZWi:
-  case ARM64::MOVNWi:
-  case ARM64::MOVKWi:
-    DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
-    break;
-  case ARM64::MOVZXi:
-  case ARM64::MOVNXi:
-  case ARM64::MOVKXi:
-    DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
-    break;
-  }
-
-  if (Inst.getOpcode() == ARM64::MOVKWi || Inst.getOpcode() == ARM64::MOVKXi)
-    Inst.addOperand(Inst.getOperand(0));
-
-  Inst.addOperand(MCOperand::CreateImm(imm));
-  Inst.addOperand(MCOperand::CreateImm(shift));
-  return Success;
-}
-
-static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
-                                                  uint32_t insn, uint64_t Addr,
-                                                  const void *Decoder) {
-  unsigned Rt = fieldFromInstruction(insn, 0, 5);
-  unsigned Rn = fieldFromInstruction(insn, 5, 5);
-  unsigned offset = fieldFromInstruction(insn, 10, 12);
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
-
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::PRFMui:
-    // Rt is an immediate in prefetch.
-    Inst.addOperand(MCOperand::CreateImm(Rt));
-    break;
-  case ARM64::STRBBui:
-  case ARM64::LDRBBui:
-  case ARM64::LDRSBWui:
-  case ARM64::STRHHui:
-  case ARM64::LDRHHui:
-  case ARM64::LDRSHWui:
-  case ARM64::STRWui:
-  case ARM64::LDRWui:
-    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRSBXui:
-  case ARM64::LDRSHXui:
-  case ARM64::LDRSWui:
-  case ARM64::STRXui:
-  case ARM64::LDRXui:
-    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRQui:
-  case ARM64::STRQui:
-    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRDui:
-  case ARM64::STRDui:
-    DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRSui:
-  case ARM64::STRSui:
-    DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRHui:
-  case ARM64::STRHui:
-    DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRBui:
-  case ARM64::STRBui:
-    DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  }
-
-  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  if (!Dis->tryAddingSymbolicOperand(Addr, offset, Fail, 4, Inst, insn))
-    Inst.addOperand(MCOperand::CreateImm(offset));
-  return Success;
-}
-
-static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
-                                                uint32_t insn, uint64_t Addr,
-                                                const void *Decoder) {
-  unsigned Rt = fieldFromInstruction(insn, 0, 5);
-  unsigned Rn = fieldFromInstruction(insn, 5, 5);
-  int64_t offset = fieldFromInstruction(insn, 12, 9);
-
-  // offset is a 9-bit signed immediate, so sign extend it to
-  // fill the unsigned.
-  if (offset & (1 << (9 - 1)))
-    offset |= ~((1LL << 9) - 1);
-
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::PRFUMi:
-    // Rt is an immediate in prefetch.
-    Inst.addOperand(MCOperand::CreateImm(Rt));
-    break;
-  case ARM64::STURBBi:
-  case ARM64::LDURBBi:
-  case ARM64::LDURSBWi:
-  case ARM64::STURHHi:
-  case ARM64::LDURHHi:
-  case ARM64::LDURSHWi:
-  case ARM64::STURWi:
-  case ARM64::LDURWi:
-  case ARM64::LDTRSBWi:
-  case ARM64::LDTRSHWi:
-  case ARM64::STTRWi:
-  case ARM64::LDTRWi:
-  case ARM64::STTRHi:
-  case ARM64::LDTRHi:
-  case ARM64::LDTRBi:
-  case ARM64::STTRBi:
-  case ARM64::LDRSBWpre:
-  case ARM64::LDRSHWpre:
-  case ARM64::STRBBpre:
-  case ARM64::LDRBBpre:
-  case ARM64::STRHHpre:
-  case ARM64::LDRHHpre:
-  case ARM64::STRWpre:
-  case ARM64::LDRWpre:
-  case ARM64::LDRSBWpost:
-  case ARM64::LDRSHWpost:
-  case ARM64::STRBBpost:
-  case ARM64::LDRBBpost:
-  case ARM64::STRHHpost:
-  case ARM64::LDRHHpost:
-  case ARM64::STRWpost:
-  case ARM64::LDRWpost:
-    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDURSBXi:
-  case ARM64::LDURSHXi:
-  case ARM64::LDURSWi:
-  case ARM64::STURXi:
-  case ARM64::LDURXi:
-  case ARM64::LDTRSBXi:
-  case ARM64::LDTRSHXi:
-  case ARM64::LDTRSWi:
-  case ARM64::STTRXi:
-  case ARM64::LDTRXi:
-  case ARM64::LDRSBXpre:
-  case ARM64::LDRSHXpre:
-  case ARM64::STRXpre:
-  case ARM64::LDRSWpre:
-  case ARM64::LDRXpre:
-  case ARM64::LDRSBXpost:
-  case ARM64::LDRSHXpost:
-  case ARM64::STRXpost:
-  case ARM64::LDRSWpost:
-  case ARM64::LDRXpost:
-    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDURQi:
-  case ARM64::STURQi:
-  case ARM64::LDRQpre:
-  case ARM64::STRQpre:
-  case ARM64::LDRQpost:
-  case ARM64::STRQpost:
-    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDURDi:
-  case ARM64::STURDi:
-  case ARM64::LDRDpre:
-  case ARM64::STRDpre:
-  case ARM64::LDRDpost:
-  case ARM64::STRDpost:
-    DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDURSi:
-  case ARM64::STURSi:
-  case ARM64::LDRSpre:
-  case ARM64::STRSpre:
-  case ARM64::LDRSpost:
-  case ARM64::STRSpost:
-    DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDURHi:
-  case ARM64::STURHi:
-  case ARM64::LDRHpre:
-  case ARM64::STRHpre:
-  case ARM64::LDRHpost:
-  case ARM64::STRHpost:
-    DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDURBi:
-  case ARM64::STURBi:
-  case ARM64::LDRBpre:
-  case ARM64::STRBpre:
-  case ARM64::LDRBpost:
-  case ARM64::STRBpost:
-    DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  }
-
-  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  Inst.addOperand(MCOperand::CreateImm(offset));
-  return Success;
-}
-
-static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
-                                                   uint32_t insn, uint64_t Addr,
-                                                   const void *Decoder) {
-  unsigned Rt = fieldFromInstruction(insn, 0, 5);
-  unsigned Rn = fieldFromInstruction(insn, 5, 5);
-  unsigned Rt2 = fieldFromInstruction(insn, 10, 5);
-  unsigned Rs = fieldFromInstruction(insn, 16, 5);
-
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::STLXRW:
-  case ARM64::STLXRB:
-  case ARM64::STLXRH:
-  case ARM64::STXRW:
-  case ARM64::STXRB:
-  case ARM64::STXRH:
-    DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
-  // FALLTHROUGH
-  case ARM64::LDARW:
-  case ARM64::LDARB:
-  case ARM64::LDARH:
-  case ARM64::LDAXRW:
-  case ARM64::LDAXRB:
-  case ARM64::LDAXRH:
-  case ARM64::LDXRW:
-  case ARM64::LDXRB:
-  case ARM64::LDXRH:
-  case ARM64::STLRW:
-  case ARM64::STLRB:
-  case ARM64::STLRH:
-    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::STLXRX:
-  case ARM64::STXRX:
-    DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
-  // FALLTHROUGH
-  case ARM64::LDARX:
-  case ARM64::LDAXRX:
-  case ARM64::LDXRX:
-  case ARM64::STLRX:
-    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::STLXPW:
-  case ARM64::STXPW:
-    DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
-  // FALLTHROUGH
-  case ARM64::LDAXPW:
-  case ARM64::LDXPW:
-    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder);
-    break;
-  case ARM64::STLXPX:
-  case ARM64::STXPX:
-    DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
-  // FALLTHROUGH
-  case ARM64::LDAXPX:
-  case ARM64::LDXPX:
-    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
-    break;
-  }
-
-  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  return Success;
-}
-
-static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
-                                              uint64_t Addr,
-                                              const void *Decoder) {
-  unsigned Rt = fieldFromInstruction(insn, 0, 5);
-  unsigned Rn = fieldFromInstruction(insn, 5, 5);
-  unsigned Rt2 = fieldFromInstruction(insn, 10, 5);
-  int64_t offset = fieldFromInstruction(insn, 15, 7);
-
-  // offset is a 7-bit signed immediate, so sign extend it to
-  // fill the unsigned.
-  if (offset & (1 << (7 - 1)))
-    offset |= ~((1LL << 7) - 1);
-
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::LDNPXi:
-  case ARM64::STNPXi:
-  case ARM64::LDPXpost:
-  case ARM64::STPXpost:
-  case ARM64::LDPSWpost:
-  case ARM64::LDPXi:
-  case ARM64::STPXi:
-  case ARM64::LDPSWi:
-  case ARM64::LDPXpre:
-  case ARM64::STPXpre:
-  case ARM64::LDPSWpre:
-    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
-    break;
-  case ARM64::LDNPWi:
-  case ARM64::STNPWi:
-  case ARM64::LDPWpost:
-  case ARM64::STPWpost:
-  case ARM64::LDPWi:
-  case ARM64::STPWi:
-  case ARM64::LDPWpre:
-  case ARM64::STPWpre:
-    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder);
-    break;
-  case ARM64::LDNPQi:
-  case ARM64::STNPQi:
-  case ARM64::LDPQpost:
-  case ARM64::STPQpost:
-  case ARM64::LDPQi:
-  case ARM64::STPQi:
-  case ARM64::LDPQpre:
-  case ARM64::STPQpre:
-    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeFPR128RegisterClass(Inst, Rt2, Addr, Decoder);
-    break;
-  case ARM64::LDNPDi:
-  case ARM64::STNPDi:
-  case ARM64::LDPDpost:
-  case ARM64::STPDpost:
-  case ARM64::LDPDi:
-  case ARM64::STPDi:
-  case ARM64::LDPDpre:
-  case ARM64::STPDpre:
-    DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeFPR64RegisterClass(Inst, Rt2, Addr, Decoder);
-    break;
-  case ARM64::LDNPSi:
-  case ARM64::STNPSi:
-  case ARM64::LDPSpost:
-  case ARM64::STPSpost:
-  case ARM64::LDPSi:
-  case ARM64::STPSi:
-  case ARM64::LDPSpre:
-  case ARM64::STPSpre:
-    DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeFPR32RegisterClass(Inst, Rt2, Addr, Decoder);
-    break;
-  }
-
-  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  Inst.addOperand(MCOperand::CreateImm(offset));
-  return Success;
-}
-
-static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst,
-                                                   uint32_t insn, uint64_t Addr,
-                                                   const void *Decoder) {
-  unsigned Rt = fieldFromInstruction(insn, 0, 5);
-  unsigned Rn = fieldFromInstruction(insn, 5, 5);
-  unsigned Rm = fieldFromInstruction(insn, 16, 5);
-  unsigned extendHi = fieldFromInstruction(insn, 13, 3);
-  unsigned extendLo = fieldFromInstruction(insn, 12, 1);
-  unsigned extend = 0;
-
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::LDRSWro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRXro:
-  case ARM64::STRXro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRWro:
-  case ARM64::STRWro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRQro:
-  case ARM64::STRQro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRDro:
-  case ARM64::STRDro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRSro:
-  case ARM64::STRSro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRHro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRBro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRBBro:
-  case ARM64::STRBBro:
-  case ARM64::LDRSBWro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRHHro:
-  case ARM64::STRHHro:
-  case ARM64::LDRSHWro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRSHXro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LDRSBXro:
-    extend = (extendHi << 1) | extendLo;
-    DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::PRFMro:
-    extend = (extendHi << 1) | extendLo;
-    Inst.addOperand(MCOperand::CreateImm(Rt));
-  }
-
-  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-
-  if (extendHi == 0x3)
-    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
-  else
-    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
-
-  Inst.addOperand(MCOperand::CreateImm(extend));
-  return Success;
-}
-
-static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
-                                                uint32_t insn, uint64_t Addr,
-                                                const void *Decoder) {
-  unsigned Rd = fieldFromInstruction(insn, 0, 5);
-  unsigned Rn = fieldFromInstruction(insn, 5, 5);
-  unsigned Rm = fieldFromInstruction(insn, 16, 5);
-  unsigned extend = fieldFromInstruction(insn, 10, 6);
-
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::ADDWrx:
-  case ARM64::SUBWrx:
-    DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
-    DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
-    break;
-  case ARM64::ADDSWrx:
-  case ARM64::SUBSWrx:
-    DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
-    DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
-    break;
-  case ARM64::ADDXrx:
-  case ARM64::SUBXrx:
-    DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-    DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
-    break;
-  case ARM64::ADDSXrx:
-  case ARM64::SUBSXrx:
-    DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-    DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
-    break;
-  case ARM64::ADDXrx64:
-  case ARM64::ADDSXrx64:
-  case ARM64::SUBXrx64:
-  case ARM64::SUBSXrx64:
-    DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
-    break;
-  }
-
-  Inst.addOperand(MCOperand::CreateImm(extend));
-  return Success;
-}
-
-static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
-                                                uint32_t insn, uint64_t Addr,
-                                                const void *Decoder) {
-  unsigned Rd = fieldFromInstruction(insn, 0, 5);
-  unsigned Rn = fieldFromInstruction(insn, 5, 5);
-  unsigned Datasize = fieldFromInstruction(insn, 31, 1);
-  unsigned imm;
-
-  if (Datasize) {
-    DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
-    imm = fieldFromInstruction(insn, 10, 13);
-    if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 64))
-      return Fail;
-  } else {
-    DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder);
-    imm = fieldFromInstruction(insn, 10, 12);
-    if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 32))
-      return Fail;
-  }
-  Inst.addOperand(MCOperand::CreateImm(imm));
-  return Success;
-}
-
-static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
-                                            uint64_t Addr,
-                                            const void *Decoder) {
-  unsigned Rd = fieldFromInstruction(insn, 0, 5);
-  unsigned cmode = fieldFromInstruction(insn, 12, 4);
-  unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
-  imm |= fieldFromInstruction(insn, 5, 5);
-
-  if (Inst.getOpcode() == ARM64::MOVID)
-    DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder);
-  else
-    DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
-
-  Inst.addOperand(MCOperand::CreateImm(imm));
-
-  switch (Inst.getOpcode()) {
-  default:
-    break;
-  case ARM64::MOVIv4i16:
-  case ARM64::MOVIv8i16:
-  case ARM64::MVNIv4i16:
-  case ARM64::MVNIv8i16:
-  case ARM64::MOVIv2i32:
-  case ARM64::MOVIv4i32:
-  case ARM64::MVNIv2i32:
-  case ARM64::MVNIv4i32:
-    Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
-    break;
-  case ARM64::MOVIv2s_msl:
-  case ARM64::MOVIv4s_msl:
-  case ARM64::MVNIv2s_msl:
-  case ARM64::MVNIv4s_msl:
-    Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108));
-    break;
-  }
-
-  return Success;
-}
-
-static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
-                                                uint32_t insn, uint64_t Addr,
-                                                const void *Decoder) {
-  unsigned Rd = fieldFromInstruction(insn, 0, 5);
-  unsigned cmode = fieldFromInstruction(insn, 12, 4);
-  unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
-  imm |= fieldFromInstruction(insn, 5, 5);
-
-  // Tied operands added twice.
-  DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
-  DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
-
-  Inst.addOperand(MCOperand::CreateImm(imm));
-  Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
-
-  return Success;
-}
-
-static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
-                                         uint64_t Addr, const void *Decoder) {
-  unsigned Rd = fieldFromInstruction(insn, 0, 5);
-  int64_t imm = fieldFromInstruction(insn, 5, 19) << 2;
-  imm |= fieldFromInstruction(insn, 29, 2);
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
-
-  // Sign-extend the 21-bit immediate.
-  if (imm & (1 << (21 - 1)))
-    imm |= ~((1LL << 21) - 1);
-
-  DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
-  if (!Dis->tryAddingSymbolicOperand(Addr, imm, Fail, 4, Inst, insn))
-    Inst.addOperand(MCOperand::CreateImm(imm));
-
-  return Success;
-}
-
-static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
-                                        uint64_t Addr, const void *Decoder) {
-  unsigned Rd = fieldFromInstruction(insn, 0, 5);
-  unsigned Rn = fieldFromInstruction(insn, 5, 5);
-  unsigned Imm = fieldFromInstruction(insn, 10, 14);
-  unsigned S = fieldFromInstruction(insn, 29, 1);
-  unsigned Datasize = fieldFromInstruction(insn, 31, 1);
-
-  unsigned ShifterVal = (Imm >> 12) & 3;
-  unsigned ImmVal = Imm & 0xFFF;
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
-
-  if (ShifterVal != 0 && ShifterVal != 1)
-    return Fail;
-
-  if (Datasize) {
-    if (Rd == 31 && !S)
-      DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
-    else
-      DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  } else {
-    if (Rd == 31 && !S)
-      DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
-    else
-      DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
-    DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
-  }
-
-  if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal, Fail, 4, Inst, insn))
-    Inst.addOperand(MCOperand::CreateImm(ImmVal));
-  Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal));
-  return Success;
-}
-
-static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
-                                              uint64_t Addr,
-                                              const void *Decoder) {
-  int64_t imm = fieldFromInstruction(insn, 0, 26);
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
-
-  // Sign-extend the 26-bit immediate.
-  if (imm & (1 << (26 - 1)))
-    imm |= ~((1LL << 26) - 1);
-
-  if (!Dis->tryAddingSymbolicOperand(Addr, imm << 2, true, 4, Inst))
-    Inst.addOperand(MCOperand::CreateImm(imm));
-
-  return Success;
-}
-
-static DecodeStatus DecodeSystemCPSRInstruction(llvm::MCInst &Inst,
-                                                uint32_t insn, uint64_t Addr,
-                                                const void *Decoder) {
-  uint64_t op1 = fieldFromInstruction(insn, 16, 3);
-  uint64_t op2 = fieldFromInstruction(insn, 5, 3);
-  uint64_t crm = fieldFromInstruction(insn, 8, 4);
-
-  Inst.addOperand(MCOperand::CreateImm((op1 << 3) | op2));
-  Inst.addOperand(MCOperand::CreateImm(crm));
-
-  return Success;
-}
-
-static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
-                                        uint64_t Addr, const void *Decoder) {
-  uint64_t Rt = fieldFromInstruction(insn, 0, 5);
-  uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5;
-  bit |= fieldFromInstruction(insn, 19, 5);
-  int64_t dst = fieldFromInstruction(insn, 5, 14);
-  const ARM64Disassembler *Dis =
-      static_cast<const ARM64Disassembler *>(Decoder);
-
-  // Sign-extend 14-bit immediate.
-  if (dst & (1 << (14 - 1)))
-    dst |= ~((1LL << 14) - 1);
-
-  DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
-  Inst.addOperand(MCOperand::CreateImm(bit));
-  if (!Dis->tryAddingSymbolicOperand(Addr, dst << 2, true, 4, Inst))
-    Inst.addOperand(MCOperand::CreateImm(dst));
-
-  return Success;
-}
-
-static DecodeStatus DecodeSIMDLdStPost(llvm::MCInst &Inst, uint32_t insn,
-                                       uint64_t Addr, const void *Decoder) {
-  uint64_t Rd = fieldFromInstruction(insn, 0, 5);
-  uint64_t Rn = fieldFromInstruction(insn, 5, 5);
-  uint64_t Rm = fieldFromInstruction(insn, 16, 5);
-
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::ST1Onev8b_POST:
-  case ARM64::ST1Onev4h_POST:
-  case ARM64::ST1Onev2s_POST:
-  case ARM64::ST1Onev1d_POST:
-  case ARM64::LD1Onev8b_POST:
-  case ARM64::LD1Onev4h_POST:
-  case ARM64::LD1Onev2s_POST:
-  case ARM64::LD1Onev1d_POST:
-    DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder);
-    break;
-  case ARM64::ST1Onev16b_POST:
-  case ARM64::ST1Onev8h_POST:
-  case ARM64::ST1Onev4s_POST:
-  case ARM64::ST1Onev2d_POST:
-  case ARM64::LD1Onev16b_POST:
-  case ARM64::LD1Onev8h_POST:
-  case ARM64::LD1Onev4s_POST:
-  case ARM64::LD1Onev2d_POST:
-    DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder);
-    break;
-  case ARM64::ST1Twov8b_POST:
-  case ARM64::ST1Twov4h_POST:
-  case ARM64::ST1Twov2s_POST:
-  case ARM64::ST1Twov1d_POST:
-  case ARM64::ST2Twov8b_POST:
-  case ARM64::ST2Twov4h_POST:
-  case ARM64::ST2Twov2s_POST:
-  case ARM64::LD1Twov8b_POST:
-  case ARM64::LD1Twov4h_POST:
-  case ARM64::LD1Twov2s_POST:
-  case ARM64::LD1Twov1d_POST:
-  case ARM64::LD2Twov8b_POST:
-  case ARM64::LD2Twov4h_POST:
-  case ARM64::LD2Twov2s_POST:
-    DecodeDDRegisterClass(Inst, Rd, Addr, Decoder);
-    break;
-  case ARM64::ST1Threev8b_POST:
-  case ARM64::ST1Threev4h_POST:
-  case ARM64::ST1Threev2s_POST:
-  case ARM64::ST1Threev1d_POST:
-  case ARM64::ST3Threev8b_POST:
-  case ARM64::ST3Threev4h_POST:
-  case ARM64::ST3Threev2s_POST:
-  case ARM64::LD1Threev8b_POST:
-  case ARM64::LD1Threev4h_POST:
-  case ARM64::LD1Threev2s_POST:
-  case ARM64::LD1Threev1d_POST:
-  case ARM64::LD3Threev8b_POST:
-  case ARM64::LD3Threev4h_POST:
-  case ARM64::LD3Threev2s_POST:
-    DecodeDDDRegisterClass(Inst, Rd, Addr, Decoder);
-    break;
-  case ARM64::ST1Fourv8b_POST:
-  case ARM64::ST1Fourv4h_POST:
-  case ARM64::ST1Fourv2s_POST:
-  case ARM64::ST1Fourv1d_POST:
-  case ARM64::ST4Fourv8b_POST:
-  case ARM64::ST4Fourv4h_POST:
-  case ARM64::ST4Fourv2s_POST:
-  case ARM64::LD1Fourv8b_POST:
-  case ARM64::LD1Fourv4h_POST:
-  case ARM64::LD1Fourv2s_POST:
-  case ARM64::LD1Fourv1d_POST:
-  case ARM64::LD4Fourv8b_POST:
-  case ARM64::LD4Fourv4h_POST:
-  case ARM64::LD4Fourv2s_POST:
-    DecodeDDDDRegisterClass(Inst, Rd, Addr, Decoder);
-    break;
-  case ARM64::ST1Twov16b_POST:
-  case ARM64::ST1Twov8h_POST:
-  case ARM64::ST1Twov4s_POST:
-  case ARM64::ST1Twov2d_POST:
-  case ARM64::ST2Twov16b_POST:
-  case ARM64::ST2Twov8h_POST:
-  case ARM64::ST2Twov4s_POST:
-  case ARM64::ST2Twov2d_POST:
-  case ARM64::LD1Twov16b_POST:
-  case ARM64::LD1Twov8h_POST:
-  case ARM64::LD1Twov4s_POST:
-  case ARM64::LD1Twov2d_POST:
-  case ARM64::LD2Twov16b_POST:
-  case ARM64::LD2Twov8h_POST:
-  case ARM64::LD2Twov4s_POST:
-  case ARM64::LD2Twov2d_POST:
-    DecodeQQRegisterClass(Inst, Rd, Addr, Decoder);
-    break;
-  case ARM64::ST1Threev16b_POST:
-  case ARM64::ST1Threev8h_POST:
-  case ARM64::ST1Threev4s_POST:
-  case ARM64::ST1Threev2d_POST:
-  case ARM64::ST3Threev16b_POST:
-  case ARM64::ST3Threev8h_POST:
-  case ARM64::ST3Threev4s_POST:
-  case ARM64::ST3Threev2d_POST:
-  case ARM64::LD1Threev16b_POST:
-  case ARM64::LD1Threev8h_POST:
-  case ARM64::LD1Threev4s_POST:
-  case ARM64::LD1Threev2d_POST:
-  case ARM64::LD3Threev16b_POST:
-  case ARM64::LD3Threev8h_POST:
-  case ARM64::LD3Threev4s_POST:
-  case ARM64::LD3Threev2d_POST:
-    DecodeQQQRegisterClass(Inst, Rd, Addr, Decoder);
-    break;
-  case ARM64::ST1Fourv16b_POST:
-  case ARM64::ST1Fourv8h_POST:
-  case ARM64::ST1Fourv4s_POST:
-  case ARM64::ST1Fourv2d_POST:
-  case ARM64::ST4Fourv16b_POST:
-  case ARM64::ST4Fourv8h_POST:
-  case ARM64::ST4Fourv4s_POST:
-  case ARM64::ST4Fourv2d_POST:
-  case ARM64::LD1Fourv16b_POST:
-  case ARM64::LD1Fourv8h_POST:
-  case ARM64::LD1Fourv4s_POST:
-  case ARM64::LD1Fourv2d_POST:
-  case ARM64::LD4Fourv16b_POST:
-  case ARM64::LD4Fourv8h_POST:
-  case ARM64::LD4Fourv4s_POST:
-  case ARM64::LD4Fourv2d_POST:
-    DecodeQQQQRegisterClass(Inst, Rd, Addr, Decoder);
-    break;
-  }
-
-  DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
-  return Success;
-}
-
-static DecodeStatus DecodeSIMDLdStSingle(llvm::MCInst &Inst, uint32_t insn,
-                                         uint64_t Addr, const void *Decoder) {
-  uint64_t Rt = fieldFromInstruction(insn, 0, 5);
-  uint64_t Rn = fieldFromInstruction(insn, 5, 5);
-  uint64_t Rm = fieldFromInstruction(insn, 16, 5);
-  uint64_t size = fieldFromInstruction(insn, 10, 2);
-  uint64_t S = fieldFromInstruction(insn, 12, 1);
-  uint64_t Q = fieldFromInstruction(insn, 30, 1);
-  uint64_t index = 0;
-
-  switch (Inst.getOpcode()) {
-  case ARM64::ST1i8:
-  case ARM64::ST1i8_POST:
-  case ARM64::ST2i8:
-  case ARM64::ST2i8_POST:
-  case ARM64::ST3i8_POST:
-  case ARM64::ST3i8:
-  case ARM64::ST4i8_POST:
-  case ARM64::ST4i8:
-    index = (Q << 3) | (S << 2) | size;
-    break;
-  case ARM64::ST1i16:
-  case ARM64::ST1i16_POST:
-  case ARM64::ST2i16:
-  case ARM64::ST2i16_POST:
-  case ARM64::ST3i16_POST:
-  case ARM64::ST3i16:
-  case ARM64::ST4i16_POST:
-  case ARM64::ST4i16:
-    index = (Q << 2) | (S << 1) | (size >> 1);
-    break;
-  case ARM64::ST1i32:
-  case ARM64::ST1i32_POST:
-  case ARM64::ST2i32:
-  case ARM64::ST2i32_POST:
-  case ARM64::ST3i32_POST:
-  case ARM64::ST3i32:
-  case ARM64::ST4i32_POST:
-  case ARM64::ST4i32:
-    index = (Q << 1) | S;
-    break;
-  case ARM64::ST1i64:
-  case ARM64::ST1i64_POST:
-  case ARM64::ST2i64:
-  case ARM64::ST2i64_POST:
-  case ARM64::ST3i64_POST:
-  case ARM64::ST3i64:
-  case ARM64::ST4i64_POST:
-  case ARM64::ST4i64:
-    index = Q;
-    break;
-  }
-
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::LD1Rv8b:
-  case ARM64::LD1Rv8b_POST:
-  case ARM64::LD1Rv4h:
-  case ARM64::LD1Rv4h_POST:
-  case ARM64::LD1Rv2s:
-  case ARM64::LD1Rv2s_POST:
-  case ARM64::LD1Rv1d:
-  case ARM64::LD1Rv1d_POST:
-    DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LD1Rv16b:
-  case ARM64::LD1Rv16b_POST:
-  case ARM64::LD1Rv8h:
-  case ARM64::LD1Rv8h_POST:
-  case ARM64::LD1Rv4s:
-  case ARM64::LD1Rv4s_POST:
-  case ARM64::LD1Rv2d:
-  case ARM64::LD1Rv2d_POST:
-  case ARM64::ST1i8:
-  case ARM64::ST1i8_POST:
-  case ARM64::ST1i16:
-  case ARM64::ST1i16_POST:
-  case ARM64::ST1i32:
-  case ARM64::ST1i32_POST:
-  case ARM64::ST1i64:
-  case ARM64::ST1i64_POST:
-    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LD2Rv16b:
-  case ARM64::LD2Rv16b_POST:
-  case ARM64::LD2Rv8h:
-  case ARM64::LD2Rv8h_POST:
-  case ARM64::LD2Rv4s:
-  case ARM64::LD2Rv4s_POST:
-  case ARM64::LD2Rv2d:
-  case ARM64::LD2Rv2d_POST:
-  case ARM64::ST2i8:
-  case ARM64::ST2i8_POST:
-  case ARM64::ST2i16:
-  case ARM64::ST2i16_POST:
-  case ARM64::ST2i32:
-  case ARM64::ST2i32_POST:
-  case ARM64::ST2i64:
-  case ARM64::ST2i64_POST:
-    DecodeQQRegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LD2Rv8b:
-  case ARM64::LD2Rv8b_POST:
-  case ARM64::LD2Rv4h:
-  case ARM64::LD2Rv4h_POST:
-  case ARM64::LD2Rv2s:
-  case ARM64::LD2Rv2s_POST:
-  case ARM64::LD2Rv1d:
-  case ARM64::LD2Rv1d_POST:
-    DecodeDDRegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LD3Rv8b:
-  case ARM64::LD3Rv8b_POST:
-  case ARM64::LD3Rv4h:
-  case ARM64::LD3Rv4h_POST:
-  case ARM64::LD3Rv2s:
-  case ARM64::LD3Rv2s_POST:
-  case ARM64::LD3Rv1d:
-  case ARM64::LD3Rv1d_POST:
-    DecodeDDDRegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LD3Rv16b:
-  case ARM64::LD3Rv16b_POST:
-  case ARM64::LD3Rv8h:
-  case ARM64::LD3Rv8h_POST:
-  case ARM64::LD3Rv4s:
-  case ARM64::LD3Rv4s_POST:
-  case ARM64::LD3Rv2d:
-  case ARM64::LD3Rv2d_POST:
-  case ARM64::ST3i8:
-  case ARM64::ST3i8_POST:
-  case ARM64::ST3i16:
-  case ARM64::ST3i16_POST:
-  case ARM64::ST3i32:
-  case ARM64::ST3i32_POST:
-  case ARM64::ST3i64:
-  case ARM64::ST3i64_POST:
-    DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LD4Rv8b:
-  case ARM64::LD4Rv8b_POST:
-  case ARM64::LD4Rv4h:
-  case ARM64::LD4Rv4h_POST:
-  case ARM64::LD4Rv2s:
-  case ARM64::LD4Rv2s_POST:
-  case ARM64::LD4Rv1d:
-  case ARM64::LD4Rv1d_POST:
-    DecodeDDDDRegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LD4Rv16b:
-  case ARM64::LD4Rv16b_POST:
-  case ARM64::LD4Rv8h:
-  case ARM64::LD4Rv8h_POST:
-  case ARM64::LD4Rv4s:
-  case ARM64::LD4Rv4s_POST:
-  case ARM64::LD4Rv2d:
-  case ARM64::LD4Rv2d_POST:
-  case ARM64::ST4i8:
-  case ARM64::ST4i8_POST:
-  case ARM64::ST4i16:
-  case ARM64::ST4i16_POST:
-  case ARM64::ST4i32:
-  case ARM64::ST4i32_POST:
-  case ARM64::ST4i64:
-  case ARM64::ST4i64_POST:
-    DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  }
-
-  switch (Inst.getOpcode()) {
-  case ARM64::LD1Rv8b:
-  case ARM64::LD1Rv8b_POST:
-  case ARM64::LD1Rv16b:
-  case ARM64::LD1Rv16b_POST:
-  case ARM64::LD1Rv4h:
-  case ARM64::LD1Rv4h_POST:
-  case ARM64::LD1Rv8h:
-  case ARM64::LD1Rv8h_POST:
-  case ARM64::LD1Rv4s:
-  case ARM64::LD1Rv4s_POST:
-  case ARM64::LD1Rv2s:
-  case ARM64::LD1Rv2s_POST:
-  case ARM64::LD1Rv1d:
-  case ARM64::LD1Rv1d_POST:
-  case ARM64::LD1Rv2d:
-  case ARM64::LD1Rv2d_POST:
-  case ARM64::LD2Rv8b:
-  case ARM64::LD2Rv8b_POST:
-  case ARM64::LD2Rv16b:
-  case ARM64::LD2Rv16b_POST:
-  case ARM64::LD2Rv4h:
-  case ARM64::LD2Rv4h_POST:
-  case ARM64::LD2Rv8h:
-  case ARM64::LD2Rv8h_POST:
-  case ARM64::LD2Rv2s:
-  case ARM64::LD2Rv2s_POST:
-  case ARM64::LD2Rv4s:
-  case ARM64::LD2Rv4s_POST:
-  case ARM64::LD2Rv2d:
-  case ARM64::LD2Rv2d_POST:
-  case ARM64::LD2Rv1d:
-  case ARM64::LD2Rv1d_POST:
-  case ARM64::LD3Rv8b:
-  case ARM64::LD3Rv8b_POST:
-  case ARM64::LD3Rv16b:
-  case ARM64::LD3Rv16b_POST:
-  case ARM64::LD3Rv4h:
-  case ARM64::LD3Rv4h_POST:
-  case ARM64::LD3Rv8h:
-  case ARM64::LD3Rv8h_POST:
-  case ARM64::LD3Rv2s:
-  case ARM64::LD3Rv2s_POST:
-  case ARM64::LD3Rv4s:
-  case ARM64::LD3Rv4s_POST:
-  case ARM64::LD3Rv2d:
-  case ARM64::LD3Rv2d_POST:
-  case ARM64::LD3Rv1d:
-  case ARM64::LD3Rv1d_POST:
-  case ARM64::LD4Rv8b:
-  case ARM64::LD4Rv8b_POST:
-  case ARM64::LD4Rv16b:
-  case ARM64::LD4Rv16b_POST:
-  case ARM64::LD4Rv4h:
-  case ARM64::LD4Rv4h_POST:
-  case ARM64::LD4Rv8h:
-  case ARM64::LD4Rv8h_POST:
-  case ARM64::LD4Rv2s:
-  case ARM64::LD4Rv2s_POST:
-  case ARM64::LD4Rv4s:
-  case ARM64::LD4Rv4s_POST:
-  case ARM64::LD4Rv2d:
-  case ARM64::LD4Rv2d_POST:
-  case ARM64::LD4Rv1d:
-  case ARM64::LD4Rv1d_POST:
-    break;
-  default:
-    Inst.addOperand(MCOperand::CreateImm(index));
-  }
-
-  DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
-
-  switch (Inst.getOpcode()) {
-  case ARM64::ST1i8_POST:
-  case ARM64::ST1i16_POST:
-  case ARM64::ST1i32_POST:
-  case ARM64::ST1i64_POST:
-  case ARM64::LD1Rv8b_POST:
-  case ARM64::LD1Rv16b_POST:
-  case ARM64::LD1Rv4h_POST:
-  case ARM64::LD1Rv8h_POST:
-  case ARM64::LD1Rv2s_POST:
-  case ARM64::LD1Rv4s_POST:
-  case ARM64::LD1Rv1d_POST:
-  case ARM64::LD1Rv2d_POST:
-  case ARM64::ST2i8_POST:
-  case ARM64::ST2i16_POST:
-  case ARM64::ST2i32_POST:
-  case ARM64::ST2i64_POST:
-  case ARM64::LD2Rv8b_POST:
-  case ARM64::LD2Rv16b_POST:
-  case ARM64::LD2Rv4h_POST:
-  case ARM64::LD2Rv8h_POST:
-  case ARM64::LD2Rv2s_POST:
-  case ARM64::LD2Rv4s_POST:
-  case ARM64::LD2Rv2d_POST:
-  case ARM64::LD2Rv1d_POST:
-  case ARM64::ST3i8_POST:
-  case ARM64::ST3i16_POST:
-  case ARM64::ST3i32_POST:
-  case ARM64::ST3i64_POST:
-  case ARM64::LD3Rv8b_POST:
-  case ARM64::LD3Rv16b_POST:
-  case ARM64::LD3Rv4h_POST:
-  case ARM64::LD3Rv8h_POST:
-  case ARM64::LD3Rv2s_POST:
-  case ARM64::LD3Rv4s_POST:
-  case ARM64::LD3Rv2d_POST:
-  case ARM64::LD3Rv1d_POST:
-  case ARM64::ST4i8_POST:
-  case ARM64::ST4i16_POST:
-  case ARM64::ST4i32_POST:
-  case ARM64::ST4i64_POST:
-  case ARM64::LD4Rv8b_POST:
-  case ARM64::LD4Rv16b_POST:
-  case ARM64::LD4Rv4h_POST:
-  case ARM64::LD4Rv8h_POST:
-  case ARM64::LD4Rv2s_POST:
-  case ARM64::LD4Rv4s_POST:
-  case ARM64::LD4Rv2d_POST:
-  case ARM64::LD4Rv1d_POST:
-    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
-    break;
-  }
-  return Success;
-}
-
-static DecodeStatus DecodeSIMDLdStSingleTied(llvm::MCInst &Inst, uint32_t insn,
-                                             uint64_t Addr,
-                                             const void *Decoder) {
-  uint64_t Rt = fieldFromInstruction(insn, 0, 5);
-  uint64_t Rn = fieldFromInstruction(insn, 5, 5);
-  uint64_t Rm = fieldFromInstruction(insn, 16, 5);
-  uint64_t size = fieldFromInstruction(insn, 10, 2);
-  uint64_t S = fieldFromInstruction(insn, 12, 1);
-  uint64_t Q = fieldFromInstruction(insn, 30, 1);
-  uint64_t index = 0;
-
-  switch (Inst.getOpcode()) {
-  case ARM64::LD1i8:
-  case ARM64::LD1i8_POST:
-  case ARM64::LD2i8:
-  case ARM64::LD2i8_POST:
-  case ARM64::LD3i8_POST:
-  case ARM64::LD3i8:
-  case ARM64::LD4i8_POST:
-  case ARM64::LD4i8:
-    index = (Q << 3) | (S << 2) | size;
-    break;
-  case ARM64::LD1i16:
-  case ARM64::LD1i16_POST:
-  case ARM64::LD2i16:
-  case ARM64::LD2i16_POST:
-  case ARM64::LD3i16_POST:
-  case ARM64::LD3i16:
-  case ARM64::LD4i16_POST:
-  case ARM64::LD4i16:
-    index = (Q << 2) | (S << 1) | (size >> 1);
-    break;
-  case ARM64::LD1i32:
-  case ARM64::LD1i32_POST:
-  case ARM64::LD2i32:
-  case ARM64::LD2i32_POST:
-  case ARM64::LD3i32_POST:
-  case ARM64::LD3i32:
-  case ARM64::LD4i32_POST:
-  case ARM64::LD4i32:
-    index = (Q << 1) | S;
-    break;
-  case ARM64::LD1i64:
-  case ARM64::LD1i64_POST:
-  case ARM64::LD2i64:
-  case ARM64::LD2i64_POST:
-  case ARM64::LD3i64_POST:
-  case ARM64::LD3i64:
-  case ARM64::LD4i64_POST:
-  case ARM64::LD4i64:
-    index = Q;
-    break;
-  }
-
-  switch (Inst.getOpcode()) {
-  default:
-    return Fail;
-  case ARM64::LD1i8:
-  case ARM64::LD1i8_POST:
-  case ARM64::LD1i16:
-  case ARM64::LD1i16_POST:
-  case ARM64::LD1i32:
-  case ARM64::LD1i32_POST:
-  case ARM64::LD1i64:
-  case ARM64::LD1i64_POST:
-    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LD2i8:
-  case ARM64::LD2i8_POST:
-  case ARM64::LD2i16:
-  case ARM64::LD2i16_POST:
-  case ARM64::LD2i32:
-  case ARM64::LD2i32_POST:
-  case ARM64::LD2i64:
-  case ARM64::LD2i64_POST:
-    DecodeQQRegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeQQRegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LD3i8:
-  case ARM64::LD3i8_POST:
-  case ARM64::LD3i16:
-  case ARM64::LD3i16_POST:
-  case ARM64::LD3i32:
-  case ARM64::LD3i32_POST:
-  case ARM64::LD3i64:
-  case ARM64::LD3i64_POST:
-    DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  case ARM64::LD4i8:
-  case ARM64::LD4i8_POST:
-  case ARM64::LD4i16:
-  case ARM64::LD4i16_POST:
-  case ARM64::LD4i32:
-  case ARM64::LD4i32_POST:
-  case ARM64::LD4i64:
-  case ARM64::LD4i64_POST:
-    DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder);
-    DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder);
-    break;
-  }
-
-  Inst.addOperand(MCOperand::CreateImm(index));
-  DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
-
-  switch (Inst.getOpcode()) {
-  case ARM64::LD1i8_POST:
-  case ARM64::LD1i16_POST:
-  case ARM64::LD1i32_POST:
-  case ARM64::LD1i64_POST:
-  case ARM64::LD2i8_POST:
-  case ARM64::LD2i16_POST:
-  case ARM64::LD2i32_POST:
-  case ARM64::LD2i64_POST:
-  case ARM64::LD3i8_POST:
-  case ARM64::LD3i16_POST:
-  case ARM64::LD3i32_POST:
-  case ARM64::LD3i64_POST:
-  case ARM64::LD4i8_POST:
-  case ARM64::LD4i16_POST:
-  case ARM64::LD4i32_POST:
-  case ARM64::LD4i64_POST:
-    DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
-    break;
-  }
-  return Success;
-}
diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h b/lib/Target/ARM64/Disassembler/ARM64Disassembler.h
deleted file mode 100644
index 35efc8d..0000000
--- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- ARM64Disassembler.h - Disassembler for ARM64 -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64DISASSEMBLER_H
-#define ARM64DISASSEMBLER_H
-
-#include "llvm/MC/MCDisassembler.h"
-
-namespace llvm {
-
-class MCInst;
-class MemoryObject;
-class raw_ostream;
-
-class ARM64Disassembler : public MCDisassembler {
-public:
-  ARM64Disassembler(const MCSubtargetInfo &STI) : MCDisassembler(STI) {}
-
-  ~ARM64Disassembler() {}
-
-  /// getInstruction - See MCDisassembler.
-  MCDisassembler::DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
-                                              const MemoryObject &region,
-                                              uint64_t address,
-                                              raw_ostream &vStream,
-                                              raw_ostream &cStream) const;
-
-  /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
-  /// operand in place of the immediate Value in the MCInst.  The immediate
-  /// Value has not had any PC adjustment made by the caller. If the instruction
-  /// adds the PC to the immediate Value then InstsAddsAddressToValue is true,
-  /// else false.  If the getOpInfo() function was set as part of the
-  /// setupForSymbolicDisassembly() call then that function is called to get any
-  /// symbolic information at the Address for this instrution.  If that returns
-  /// non-zero then the symbolic information it returns is used to create an
-  /// MCExpr and that is added as an operand to the MCInst.  This function
-  /// returns true if it adds an operand to the MCInst and false otherwise.
-  bool tryAddingSymbolicOperand(uint64_t Address, int Value,
-                                bool InstsAddsAddressToValue, uint64_t InstSize,
-                                MCInst &MI, uint32_t insn = 0) const;
-};
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/Disassembler/CMakeLists.txt b/lib/Target/ARM64/Disassembler/CMakeLists.txt
deleted file mode 100644
index ad998c2..0000000
--- a/lib/Target/ARM64/Disassembler/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARM64Disassembler
-  ARM64Disassembler.cpp
-  )
-# workaround for hanging compilation on MSVC8, 9 and 10
-#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
-#set_property(
-#  SOURCE ARMDisassembler.cpp
-#  PROPERTY COMPILE_FLAGS "/Od"
-#  )
-#endif()
-add_dependencies(LLVMARM64Disassembler ARM64CommonTableGen)
diff --git a/lib/Target/ARM64/Disassembler/LLVMBuild.txt b/lib/Target/ARM64/Disassembler/LLVMBuild.txt
deleted file mode 100644
index 5935ee6..0000000
--- a/lib/Target/ARM64/Disassembler/LLVMBuild.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-;===- ./lib/Target/ARM64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ARM64Disassembler
-parent = ARM64
-required_libraries = ARM64Desc ARM64Info MC Support
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/Disassembler/Makefile b/lib/Target/ARM64/Disassembler/Makefile
deleted file mode 100644
index 479d00c..0000000
--- a/lib/Target/ARM64/Disassembler/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/ARM64/Disassembler/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64Disassembler
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp
deleted file mode 100644
index bb90707..0000000
--- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp
+++ /dev/null
@@ -1,1428 +0,0 @@
-//===-- ARM64InstPrinter.cpp - Convert ARM64 MCInst to assembly syntax ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an ARM64 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "ARM64InstPrinter.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "ARM64GenAsmWriter.inc"
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "ARM64GenAsmWriter1.inc"
-
-ARM64InstPrinter::ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                                   const MCRegisterInfo &MRI,
-                                   const MCSubtargetInfo &STI)
-    : MCInstPrinter(MAI, MII, MRI) {
-  // Initialize the set of available features.
-  setAvailableFeatures(STI.getFeatureBits());
-}
-
-ARM64AppleInstPrinter::ARM64AppleInstPrinter(const MCAsmInfo &MAI,
-                                             const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI)
-    : ARM64InstPrinter(MAI, MII, MRI, STI) {}
-
-void ARM64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  // This is for .cfi directives.
-  OS << getRegisterName(RegNo);
-}
-
-void ARM64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                 StringRef Annot) {
-  // Check for special encodings and print the cannonical alias instead.
-
-  unsigned Opcode = MI->getOpcode();
-
-  if (Opcode == ARM64::SYS || Opcode == ARM64::SYSxt)
-    if (printSysAlias(MI, O)) {
-      printAnnotation(O, Annot);
-      return;
-    }
-
-  // TBZ/TBNZ should print the register operand as a Wreg if the bit
-  // number is < 32.
-  if ((Opcode == ARM64::TBNZ || Opcode == ARM64::TBZ) &&
-      MI->getOperand(1).getImm() < 32) {
-    MCInst newMI = *MI;
-    unsigned Reg = MI->getOperand(0).getReg();
-    newMI.getOperand(0).setReg(getWRegFromXReg(Reg));
-    printInstruction(&newMI, O);
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  // SBFM/UBFM should print to a nicer aliased form if possible.
-  if (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri ||
-      Opcode == ARM64::UBFMXri || Opcode == ARM64::UBFMWri) {
-    const MCOperand &Op0 = MI->getOperand(0);
-    const MCOperand &Op1 = MI->getOperand(1);
-    const MCOperand &Op2 = MI->getOperand(2);
-    const MCOperand &Op3 = MI->getOperand(3);
-
-    if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) {
-      bool IsSigned = (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri);
-      const char *AsmMnemonic = 0;
-
-      switch (Op3.getImm()) {
-      default:
-        break;
-      case 7:
-        AsmMnemonic = IsSigned ? "sxtb" : "uxtb";
-        break;
-      case 15:
-        AsmMnemonic = IsSigned ? "sxth" : "uxth";
-        break;
-      case 31:
-        AsmMnemonic = IsSigned ? "sxtw" : "uxtw";
-        break;
-      }
-
-      if (AsmMnemonic) {
-        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
-          << ", " << getRegisterName(Op1.getReg());
-        printAnnotation(O, Annot);
-        return;
-      }
-    }
-
-    // All immediate shifts are aliases, implemented using the Bitfield
-    // instruction. In all cases the immediate shift amount shift must be in
-    // the range 0 to (reg.size -1).
-    if (Op2.isImm() && Op3.isImm()) {
-      const char *AsmMnemonic = 0;
-      int shift = 0;
-      int64_t immr = Op2.getImm();
-      int64_t imms = Op3.getImm();
-      if (Opcode == ARM64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) {
-        AsmMnemonic = "lsl";
-        shift = 31 - imms;
-      } else if (Opcode == ARM64::UBFMXri && imms != 0x3f &&
-                 ((imms + 1 == immr))) {
-        AsmMnemonic = "lsl";
-        shift = 63 - imms;
-      } else if (Opcode == ARM64::UBFMWri && imms == 0x1f) {
-        AsmMnemonic = "lsr";
-        shift = immr;
-      } else if (Opcode == ARM64::UBFMXri && imms == 0x3f) {
-        AsmMnemonic = "lsr";
-        shift = immr;
-      } else if (Opcode == ARM64::SBFMWri && imms == 0x1f) {
-        AsmMnemonic = "asr";
-        shift = immr;
-      } else if (Opcode == ARM64::SBFMXri && imms == 0x3f) {
-        AsmMnemonic = "asr";
-        shift = immr;
-      }
-      if (AsmMnemonic) {
-        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
-          << ", " << getRegisterName(Op1.getReg()) << ", #" << shift;
-        printAnnotation(O, Annot);
-        return;
-      }
-    }
-  }
-
-  // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift
-  // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be
-  // printed.
-  if ((Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi ||
-       Opcode == ARM64::MOVNXi || Opcode == ARM64::MOVNWi) &&
-      MI->getOperand(1).isExpr()) {
-    if (Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi)
-      O << "\tmovz\t";
-    else
-      O << "\tmovn\t";
-
-    O << getRegisterName(MI->getOperand(0).getReg()) << ", #"
-      << *MI->getOperand(1).getExpr();
-    return;
-  }
-
-  if ((Opcode == ARM64::MOVKXi || Opcode == ARM64::MOVKWi) &&
-      MI->getOperand(2).isExpr()) {
-    O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
-      << *MI->getOperand(2).getExpr();
-    return;
-  }
-
-  // ANDS WZR, Wn, #imm ==> TST Wn, #imm
-  // ANDS XZR, Xn, #imm ==> TST Xn, #imm
-  if (Opcode == ARM64::ANDSWri && MI->getOperand(0).getReg() == ARM64::WZR) {
-    O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
-    printLogicalImm32(MI, 2, O);
-    return;
-  }
-  if (Opcode == ARM64::ANDSXri && MI->getOperand(0).getReg() == ARM64::XZR) {
-    O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
-    printLogicalImm64(MI, 2, O);
-    return;
-  }
-  // ANDS WZR, Wn, Wm{, lshift #imm} ==> TST Wn{, lshift #imm}
-  // ANDS XZR, Xn, Xm{, lshift #imm} ==> TST Xn{, lshift #imm}
-  if ((Opcode == ARM64::ANDSWrs && MI->getOperand(0).getReg() == ARM64::WZR) ||
-      (Opcode == ARM64::ANDSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) {
-    O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
-    printShiftedRegister(MI, 2, O);
-    return;
-  }
-
-  // SUBS WZR, Wn, #imm ==> CMP Wn, #imm
-  // SUBS XZR, Xn, #imm ==> CMP Xn, #imm
-  if ((Opcode == ARM64::SUBSWri && MI->getOperand(0).getReg() == ARM64::WZR) ||
-      (Opcode == ARM64::SUBSXri && MI->getOperand(0).getReg() == ARM64::XZR)) {
-    O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
-    printAddSubImm(MI, 2, O);
-    return;
-  }
-  // SUBS WZR, Wn, Wm{, lshift #imm} ==> CMP Wn, Wm{, lshift #imm}
-  // SUBS XZR, Xn, Xm{, lshift #imm} ==> CMP Xn, Xm{, lshift #imm}
-  if ((Opcode == ARM64::SUBSWrs && MI->getOperand(0).getReg() == ARM64::WZR) ||
-      (Opcode == ARM64::SUBSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) {
-    O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
-    printShiftedRegister(MI, 2, O);
-    return;
-  }
-  // SUBS XZR, Xn, Wm, uxtb #imm ==> CMP Xn, uxtb #imm
-  // SUBS WZR, Wn, Xm, uxtb #imm ==> CMP Wn, uxtb #imm
-  if ((Opcode == ARM64::SUBSXrx && MI->getOperand(0).getReg() == ARM64::XZR) ||
-      (Opcode == ARM64::SUBSWrx && MI->getOperand(0).getReg() == ARM64::WZR)) {
-    O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
-    printExtendedRegister(MI, 2, O);
-    return;
-  }
-  // SUBS XZR, Xn, Xm, uxtx #imm ==> CMP Xn, uxtb #imm
-  if (Opcode == ARM64::SUBSXrx64 && MI->getOperand(0).getReg() == ARM64::XZR) {
-    O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "
-      << getRegisterName(MI->getOperand(2).getReg());
-    printExtend(MI, 3, O);
-    return;
-  }
-
-  // ADDS WZR, Wn, #imm ==> CMN Wn, #imm
-  // ADDS XZR, Xn, #imm ==> CMN Xn, #imm
-  if ((Opcode == ARM64::ADDSWri && MI->getOperand(0).getReg() == ARM64::WZR) ||
-      (Opcode == ARM64::ADDSXri && MI->getOperand(0).getReg() == ARM64::XZR)) {
-    O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
-    printAddSubImm(MI, 2, O);
-    return;
-  }
-  // ADDS WZR, Wn, Wm{, lshift #imm} ==> CMN Wn, Wm{, lshift #imm}
-  // ADDS XZR, Xn, Xm{, lshift #imm} ==> CMN Xn, Xm{, lshift #imm}
-  if ((Opcode == ARM64::ADDSWrs && MI->getOperand(0).getReg() == ARM64::WZR) ||
-      (Opcode == ARM64::ADDSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) {
-    O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
-    printShiftedRegister(MI, 2, O);
-    return;
-  }
-  // ADDS XZR, Xn, Wm, uxtb #imm ==> CMN Xn, uxtb #imm
-  if (Opcode == ARM64::ADDSXrx && MI->getOperand(0).getReg() == ARM64::XZR) {
-    O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
-    printExtendedRegister(MI, 2, O);
-    return;
-  }
-  // ADDS XZR, Xn, Xm, uxtx #imm ==> CMN Xn, uxtb #imm
-  if (Opcode == ARM64::ADDSXrx64 && MI->getOperand(0).getReg() == ARM64::XZR) {
-    O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "
-      << getRegisterName(MI->getOperand(2).getReg());
-    printExtend(MI, 3, O);
-    return;
-  }
-
-  if (!printAliasInstr(MI, O))
-    printInstruction(MI, O);
-
-  printAnnotation(O, Annot);
-}
-
-static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout,
-                                bool &IsTbx) {
-  switch (Opcode) {
-  case ARM64::TBXv8i8One:
-  case ARM64::TBXv8i8Two:
-  case ARM64::TBXv8i8Three:
-  case ARM64::TBXv8i8Four:
-    IsTbx = true;
-    Layout = ".8b";
-    return true;
-  case ARM64::TBLv8i8One:
-  case ARM64::TBLv8i8Two:
-  case ARM64::TBLv8i8Three:
-  case ARM64::TBLv8i8Four:
-    IsTbx = false;
-    Layout = ".8b";
-    return true;
-  case ARM64::TBXv16i8One:
-  case ARM64::TBXv16i8Two:
-  case ARM64::TBXv16i8Three:
-  case ARM64::TBXv16i8Four:
-    IsTbx = true;
-    Layout = ".16b";
-    return true;
-  case ARM64::TBLv16i8One:
-  case ARM64::TBLv16i8Two:
-  case ARM64::TBLv16i8Three:
-  case ARM64::TBLv16i8Four:
-    IsTbx = false;
-    Layout = ".16b";
-    return true;
-  default:
-    return false;
-  }
-}
-
-struct LdStNInstrDesc {
-  unsigned Opcode;
-  const char *Mnemonic;
-  const char *Layout;
-  int LaneOperand;
-  int NaturalOffset;
-};
-
-static LdStNInstrDesc LdStNInstInfo[] = {
-  { ARM64::LD1i8,             "ld1",  ".b",     2, 0  },
-  { ARM64::LD1i16,            "ld1",  ".h",     2, 0  },
-  { ARM64::LD1i32,            "ld1",  ".s",     2, 0  },
-  { ARM64::LD1i64,            "ld1",  ".d",     2, 0  },
-  { ARM64::LD1i8_POST,        "ld1",  ".b",     2, 1  },
-  { ARM64::LD1i16_POST,       "ld1",  ".h",     2, 2  },
-  { ARM64::LD1i32_POST,       "ld1",  ".s",     2, 4  },
-  { ARM64::LD1i64_POST,       "ld1",  ".d",     2, 8  },
-  { ARM64::LD1Rv16b,          "ld1r", ".16b",   0, 0  },
-  { ARM64::LD1Rv8h,           "ld1r", ".8h",    0, 0  },
-  { ARM64::LD1Rv4s,           "ld1r", ".4s",    0, 0  },
-  { ARM64::LD1Rv2d,           "ld1r", ".2d",    0, 0  },
-  { ARM64::LD1Rv8b,           "ld1r", ".8b",    0, 0  },
-  { ARM64::LD1Rv4h,           "ld1r", ".4h",    0, 0  },
-  { ARM64::LD1Rv2s,           "ld1r", ".2s",    0, 0  },
-  { ARM64::LD1Rv1d,           "ld1r", ".1d",    0, 0  },
-  { ARM64::LD1Rv16b_POST,     "ld1r", ".16b",   0, 1  },
-  { ARM64::LD1Rv8h_POST,      "ld1r", ".8h",    0, 2  },
-  { ARM64::LD1Rv4s_POST,      "ld1r", ".4s",    0, 4  },
-  { ARM64::LD1Rv2d_POST,      "ld1r", ".2d",    0, 8  },
-  { ARM64::LD1Rv8b_POST,      "ld1r", ".8b",    0, 1  },
-  { ARM64::LD1Rv4h_POST,      "ld1r", ".4h",    0, 2  },
-  { ARM64::LD1Rv2s_POST,      "ld1r", ".2s",    0, 4  },
-  { ARM64::LD1Rv1d_POST,      "ld1r", ".1d",    0, 8  },
-  { ARM64::LD1Onev16b,        "ld1",  ".16b",   0, 0  },
-  { ARM64::LD1Onev8h,         "ld1",  ".8h",    0, 0  },
-  { ARM64::LD1Onev4s,         "ld1",  ".4s",    0, 0  },
-  { ARM64::LD1Onev2d,         "ld1",  ".2d",    0, 0  },
-  { ARM64::LD1Onev8b,         "ld1",  ".8b",    0, 0  },
-  { ARM64::LD1Onev4h,         "ld1",  ".4h",    0, 0  },
-  { ARM64::LD1Onev2s,         "ld1",  ".2s",    0, 0  },
-  { ARM64::LD1Onev1d,         "ld1",  ".1d",    0, 0  },
-  { ARM64::LD1Onev16b_POST,   "ld1",  ".16b",   0, 16 },
-  { ARM64::LD1Onev8h_POST,    "ld1",  ".8h",    0, 16 },
-  { ARM64::LD1Onev4s_POST,    "ld1",  ".4s",    0, 16 },
-  { ARM64::LD1Onev2d_POST,    "ld1",  ".2d",    0, 16 },
-  { ARM64::LD1Onev8b_POST,    "ld1",  ".8b",    0, 8  },
-  { ARM64::LD1Onev4h_POST,    "ld1",  ".4h",    0, 8  },
-  { ARM64::LD1Onev2s_POST,    "ld1",  ".2s",    0, 8  },
-  { ARM64::LD1Onev1d_POST,    "ld1",  ".1d",    0, 8  },
-  { ARM64::LD1Twov16b,        "ld1",  ".16b",   0, 0  },
-  { ARM64::LD1Twov8h,         "ld1",  ".8h",    0, 0  },
-  { ARM64::LD1Twov4s,         "ld1",  ".4s",    0, 0  },
-  { ARM64::LD1Twov2d,         "ld1",  ".2d",    0, 0  },
-  { ARM64::LD1Twov8b,         "ld1",  ".8b",    0, 0  },
-  { ARM64::LD1Twov4h,         "ld1",  ".4h",    0, 0  },
-  { ARM64::LD1Twov2s,         "ld1",  ".2s",    0, 0  },
-  { ARM64::LD1Twov1d,         "ld1",  ".1d",    0, 0  },
-  { ARM64::LD1Twov16b_POST,   "ld1",  ".16b",   0, 32 },
-  { ARM64::LD1Twov8h_POST,    "ld1",  ".8h",    0, 32 },
-  { ARM64::LD1Twov4s_POST,    "ld1",  ".4s",    0, 32 },
-  { ARM64::LD1Twov2d_POST,    "ld1",  ".2d",    0, 32 },
-  { ARM64::LD1Twov8b_POST,    "ld1",  ".8b",    0, 16 },
-  { ARM64::LD1Twov4h_POST,    "ld1",  ".4h",    0, 16 },
-  { ARM64::LD1Twov2s_POST,    "ld1",  ".2s",    0, 16 },
-  { ARM64::LD1Twov1d_POST,    "ld1",  ".1d",    0, 16 },
-  { ARM64::LD1Threev16b,      "ld1",  ".16b",   0, 0  },
-  { ARM64::LD1Threev8h,       "ld1",  ".8h",    0, 0  },
-  { ARM64::LD1Threev4s,       "ld1",  ".4s",    0, 0  },
-  { ARM64::LD1Threev2d,       "ld1",  ".2d",    0, 0  },
-  { ARM64::LD1Threev8b,       "ld1",  ".8b",    0, 0  },
-  { ARM64::LD1Threev4h,       "ld1",  ".4h",    0, 0  },
-  { ARM64::LD1Threev2s,       "ld1",  ".2s",    0, 0  },
-  { ARM64::LD1Threev1d,       "ld1",  ".1d",    0, 0  },
-  { ARM64::LD1Threev16b_POST, "ld1",  ".16b",   0, 48 },
-  { ARM64::LD1Threev8h_POST,  "ld1",  ".8h",    0, 48 },
-  { ARM64::LD1Threev4s_POST,  "ld1",  ".4s",    0, 48 },
-  { ARM64::LD1Threev2d_POST,  "ld1",  ".2d",    0, 48 },
-  { ARM64::LD1Threev8b_POST,  "ld1",  ".8b",    0, 24 },
-  { ARM64::LD1Threev4h_POST,  "ld1",  ".4h",    0, 24 },
-  { ARM64::LD1Threev2s_POST,  "ld1",  ".2s",    0, 24 },
-  { ARM64::LD1Threev1d_POST,  "ld1",  ".1d",    0, 24 },
-  { ARM64::LD1Fourv16b,       "ld1",  ".16b",   0, 0  },
-  { ARM64::LD1Fourv8h,        "ld1",  ".8h",    0, 0  },
-  { ARM64::LD1Fourv4s,        "ld1",  ".4s",    0, 0  },
-  { ARM64::LD1Fourv2d,        "ld1",  ".2d",    0, 0  },
-  { ARM64::LD1Fourv8b,        "ld1",  ".8b",    0, 0  },
-  { ARM64::LD1Fourv4h,        "ld1",  ".4h",    0, 0  },
-  { ARM64::LD1Fourv2s,        "ld1",  ".2s",    0, 0  },
-  { ARM64::LD1Fourv1d,        "ld1",  ".1d",    0, 0  },
-  { ARM64::LD1Fourv16b_POST,  "ld1",  ".16b",   0, 64 },
-  { ARM64::LD1Fourv8h_POST,   "ld1",  ".8h",    0, 64 },
-  { ARM64::LD1Fourv4s_POST,   "ld1",  ".4s",    0, 64 },
-  { ARM64::LD1Fourv2d_POST,   "ld1",  ".2d",    0, 64 },
-  { ARM64::LD1Fourv8b_POST,   "ld1",  ".8b",    0, 32 },
-  { ARM64::LD1Fourv4h_POST,   "ld1",  ".4h",    0, 32 },
-  { ARM64::LD1Fourv2s_POST,   "ld1",  ".2s",    0, 32 },
-  { ARM64::LD1Fourv1d_POST,   "ld1",  ".1d",    0, 32 },
-  { ARM64::LD2i8,             "ld2",  ".b",     2, 0  },
-  { ARM64::LD2i16,            "ld2",  ".h",     2, 0  },
-  { ARM64::LD2i32,            "ld2",  ".s",     2, 0  },
-  { ARM64::LD2i64,            "ld2",  ".d",     2, 0  },
-  { ARM64::LD2i8_POST,        "ld2",  ".b",     2, 2  },
-  { ARM64::LD2i16_POST,       "ld2",  ".h",     2, 4  },
-  { ARM64::LD2i32_POST,       "ld2",  ".s",     2, 8  },
-  { ARM64::LD2i64_POST,       "ld2",  ".d",     2, 16  },
-  { ARM64::LD2Rv16b,          "ld2r", ".16b",   0, 0  },
-  { ARM64::LD2Rv8h,           "ld2r", ".8h",    0, 0  },
-  { ARM64::LD2Rv4s,           "ld2r", ".4s",    0, 0  },
-  { ARM64::LD2Rv2d,           "ld2r", ".2d",    0, 0  },
-  { ARM64::LD2Rv8b,           "ld2r", ".8b",    0, 0  },
-  { ARM64::LD2Rv4h,           "ld2r", ".4h",    0, 0  },
-  { ARM64::LD2Rv2s,           "ld2r", ".2s",    0, 0  },
-  { ARM64::LD2Rv1d,           "ld2r", ".1d",    0, 0  },
-  { ARM64::LD2Rv16b_POST,     "ld2r", ".16b",   0, 2  },
-  { ARM64::LD2Rv8h_POST,      "ld2r", ".8h",    0, 4  },
-  { ARM64::LD2Rv4s_POST,      "ld2r", ".4s",    0, 8  },
-  { ARM64::LD2Rv2d_POST,      "ld2r", ".2d",    0, 16 },
-  { ARM64::LD2Rv8b_POST,      "ld2r", ".8b",    0, 2  },
-  { ARM64::LD2Rv4h_POST,      "ld2r", ".4h",    0, 4  },
-  { ARM64::LD2Rv2s_POST,      "ld2r", ".2s",    0, 8  },
-  { ARM64::LD2Rv1d_POST,      "ld2r", ".1d",    0, 16 },
-  { ARM64::LD2Twov16b,        "ld2",  ".16b",   0, 0  },
-  { ARM64::LD2Twov8h,         "ld2",  ".8h",    0, 0  },
-  { ARM64::LD2Twov4s,         "ld2",  ".4s",    0, 0  },
-  { ARM64::LD2Twov2d,         "ld2",  ".2d",    0, 0  },
-  { ARM64::LD2Twov8b,         "ld2",  ".8b",    0, 0  },
-  { ARM64::LD2Twov4h,         "ld2",  ".4h",    0, 0  },
-  { ARM64::LD2Twov2s,         "ld2",  ".2s",    0, 0  },
-  { ARM64::LD2Twov16b_POST,   "ld2",  ".16b",   0, 32 },
-  { ARM64::LD2Twov8h_POST,    "ld2",  ".8h",    0, 32 },
-  { ARM64::LD2Twov4s_POST,    "ld2",  ".4s",    0, 32 },
-  { ARM64::LD2Twov2d_POST,    "ld2",  ".2d",    0, 32 },
-  { ARM64::LD2Twov8b_POST,    "ld2",  ".8b",    0, 16 },
-  { ARM64::LD2Twov4h_POST,    "ld2",  ".4h",    0, 16 },
-  { ARM64::LD2Twov2s_POST,    "ld2",  ".2s",    0, 16 },
-  { ARM64::LD3i8,             "ld3",  ".b",     2, 0  },
-  { ARM64::LD3i16,            "ld3",  ".h",     2, 0  },
-  { ARM64::LD3i32,            "ld3",  ".s",     2, 0  },
-  { ARM64::LD3i64,            "ld3",  ".d",     2, 0  },
-  { ARM64::LD3i8_POST,        "ld3",  ".b",     2, 3  },
-  { ARM64::LD3i16_POST,       "ld3",  ".h",     2, 6  },
-  { ARM64::LD3i32_POST,       "ld3",  ".s",     2, 12  },
-  { ARM64::LD3i64_POST,       "ld3",  ".d",     2, 24  },
-  { ARM64::LD3Rv16b,          "ld3r", ".16b",   0, 0  },
-  { ARM64::LD3Rv8h,           "ld3r", ".8h",    0, 0  },
-  { ARM64::LD3Rv4s,           "ld3r", ".4s",    0, 0  },
-  { ARM64::LD3Rv2d,           "ld3r", ".2d",    0, 0  },
-  { ARM64::LD3Rv8b,           "ld3r", ".8b",    0, 0  },
-  { ARM64::LD3Rv4h,           "ld3r", ".4h",    0, 0  },
-  { ARM64::LD3Rv2s,           "ld3r", ".2s",    0, 0  },
-  { ARM64::LD3Rv1d,           "ld3r", ".1d",    0, 0  },
-  { ARM64::LD3Rv16b_POST,     "ld3r", ".16b",   0, 3  },
-  { ARM64::LD3Rv8h_POST,      "ld3r", ".8h",    0, 6  },
-  { ARM64::LD3Rv4s_POST,      "ld3r", ".4s",    0, 12 },
-  { ARM64::LD3Rv2d_POST,      "ld3r", ".2d",    0, 24 },
-  { ARM64::LD3Rv8b_POST,      "ld3r", ".8b",    0, 3  },
-  { ARM64::LD3Rv4h_POST,      "ld3r", ".4h",    0, 6  },
-  { ARM64::LD3Rv2s_POST,      "ld3r", ".2s",    0, 12 },
-  { ARM64::LD3Rv1d_POST,      "ld3r", ".1d",    0, 24 },
-  { ARM64::LD3Threev16b,      "ld3",  ".16b",   0, 0  },
-  { ARM64::LD3Threev8h,       "ld3",  ".8h",    0, 0  },
-  { ARM64::LD3Threev4s,       "ld3",  ".4s",    0, 0  },
-  { ARM64::LD3Threev2d,       "ld3",  ".2d",    0, 0  },
-  { ARM64::LD3Threev8b,       "ld3",  ".8b",    0, 0  },
-  { ARM64::LD3Threev4h,       "ld3",  ".4h",    0, 0  },
-  { ARM64::LD3Threev2s,       "ld3",  ".2s",    0, 0  },
-  { ARM64::LD3Threev16b_POST, "ld3",  ".16b",   0, 48 },
-  { ARM64::LD3Threev8h_POST,  "ld3",  ".8h",    0, 48 },
-  { ARM64::LD3Threev4s_POST,  "ld3",  ".4s",    0, 48 },
-  { ARM64::LD3Threev2d_POST,  "ld3",  ".2d",    0, 48 },
-  { ARM64::LD3Threev8b_POST,  "ld3",  ".8b",    0, 24 },
-  { ARM64::LD3Threev4h_POST,  "ld3",  ".4h",    0, 24 },
-  { ARM64::LD3Threev2s_POST,  "ld3",  ".2s",    0, 24 },
-  { ARM64::LD4i8,             "ld4",  ".b",     2, 0  },
-  { ARM64::LD4i16,            "ld4",  ".h",     2, 0  },
-  { ARM64::LD4i32,            "ld4",  ".s",     2, 0  },
-  { ARM64::LD4i64,            "ld4",  ".d",     2, 0  },
-  { ARM64::LD4i8_POST,        "ld4",  ".b",     2, 4  },
-  { ARM64::LD4i16_POST,       "ld4",  ".h",     2, 8  },
-  { ARM64::LD4i32_POST,       "ld4",  ".s",     2, 16 },
-  { ARM64::LD4i64_POST,       "ld4",  ".d",     2, 32 },
-  { ARM64::LD4Rv16b,          "ld4r", ".16b",   0, 0  },
-  { ARM64::LD4Rv8h,           "ld4r", ".8h",    0, 0  },
-  { ARM64::LD4Rv4s,           "ld4r", ".4s",    0, 0  },
-  { ARM64::LD4Rv2d,           "ld4r", ".2d",    0, 0  },
-  { ARM64::LD4Rv8b,           "ld4r", ".8b",    0, 0  },
-  { ARM64::LD4Rv4h,           "ld4r", ".4h",    0, 0  },
-  { ARM64::LD4Rv2s,           "ld4r", ".2s",    0, 0  },
-  { ARM64::LD4Rv1d,           "ld4r", ".1d",    0, 0  },
-  { ARM64::LD4Rv16b_POST,     "ld4r", ".16b",   0, 4  },
-  { ARM64::LD4Rv8h_POST,      "ld4r", ".8h",    0, 8  },
-  { ARM64::LD4Rv4s_POST,      "ld4r", ".4s",    0, 16 },
-  { ARM64::LD4Rv2d_POST,      "ld4r", ".2d",    0, 32 },
-  { ARM64::LD4Rv8b_POST,      "ld4r", ".8b",    0, 4  },
-  { ARM64::LD4Rv4h_POST,      "ld4r", ".4h",    0, 8  },
-  { ARM64::LD4Rv2s_POST,      "ld4r", ".2s",    0, 16 },
-  { ARM64::LD4Rv1d_POST,      "ld4r", ".1d",    0, 32 },
-  { ARM64::LD4Fourv16b,       "ld4",  ".16b",   0, 0  },
-  { ARM64::LD4Fourv8h,        "ld4",  ".8h",    0, 0  },
-  { ARM64::LD4Fourv4s,        "ld4",  ".4s",    0, 0  },
-  { ARM64::LD4Fourv2d,        "ld4",  ".2d",    0, 0  },
-  { ARM64::LD4Fourv8b,        "ld4",  ".8b",    0, 0  },
-  { ARM64::LD4Fourv4h,        "ld4",  ".4h",    0, 0  },
-  { ARM64::LD4Fourv2s,        "ld4",  ".2s",    0, 0  },
-  { ARM64::LD4Fourv16b_POST,  "ld4",  ".16b",   0, 64 },
-  { ARM64::LD4Fourv8h_POST,   "ld4",  ".8h",    0, 64 },
-  { ARM64::LD4Fourv4s_POST,   "ld4",  ".4s",    0, 64 },
-  { ARM64::LD4Fourv2d_POST,   "ld4",  ".2d",    0, 64 },
-  { ARM64::LD4Fourv8b_POST,   "ld4",  ".8b",    0, 32 },
-  { ARM64::LD4Fourv4h_POST,   "ld4",  ".4h",    0, 32 },
-  { ARM64::LD4Fourv2s_POST,   "ld4",  ".2s",    0, 32 },
-  { ARM64::ST1i8,             "st1",  ".b",     1, 0  },
-  { ARM64::ST1i16,            "st1",  ".h",     1, 0  },
-  { ARM64::ST1i32,            "st1",  ".s",     1, 0  },
-  { ARM64::ST1i64,            "st1",  ".d",     1, 0  },
-  { ARM64::ST1i8_POST,        "st1",  ".b",     1, 1  },
-  { ARM64::ST1i16_POST,       "st1",  ".h",     1, 2  },
-  { ARM64::ST1i32_POST,       "st1",  ".s",     1, 4  },
-  { ARM64::ST1i64_POST,       "st1",  ".d",     1, 8  },
-  { ARM64::ST1Onev16b,        "st1",  ".16b",   0, 0  },
-  { ARM64::ST1Onev8h,         "st1",  ".8h",    0, 0  },
-  { ARM64::ST1Onev4s,         "st1",  ".4s",    0, 0  },
-  { ARM64::ST1Onev2d,         "st1",  ".2d",    0, 0  },
-  { ARM64::ST1Onev8b,         "st1",  ".8b",    0, 0  },
-  { ARM64::ST1Onev4h,         "st1",  ".4h",    0, 0  },
-  { ARM64::ST1Onev2s,         "st1",  ".2s",    0, 0  },
-  { ARM64::ST1Onev1d,         "st1",  ".1d",    0, 0  },
-  { ARM64::ST1Onev16b_POST,   "st1",  ".16b",   0, 16 },
-  { ARM64::ST1Onev8h_POST,    "st1",  ".8h",    0, 16 },
-  { ARM64::ST1Onev4s_POST,    "st1",  ".4s",    0, 16 },
-  { ARM64::ST1Onev2d_POST,    "st1",  ".2d",    0, 16 },
-  { ARM64::ST1Onev8b_POST,    "st1",  ".8b",    0, 8  },
-  { ARM64::ST1Onev4h_POST,    "st1",  ".4h",    0, 8  },
-  { ARM64::ST1Onev2s_POST,    "st1",  ".2s",    0, 8  },
-  { ARM64::ST1Onev1d_POST,    "st1",  ".1d",    0, 8  },
-  { ARM64::ST1Twov16b,        "st1",  ".16b",   0, 0  },
-  { ARM64::ST1Twov8h,         "st1",  ".8h",    0, 0  },
-  { ARM64::ST1Twov4s,         "st1",  ".4s",    0, 0  },
-  { ARM64::ST1Twov2d,         "st1",  ".2d",    0, 0  },
-  { ARM64::ST1Twov8b,         "st1",  ".8b",    0, 0  },
-  { ARM64::ST1Twov4h,         "st1",  ".4h",    0, 0  },
-  { ARM64::ST1Twov2s,         "st1",  ".2s",    0, 0  },
-  { ARM64::ST1Twov1d,         "st1",  ".1d",    0, 0  },
-  { ARM64::ST1Twov16b_POST,   "st1",  ".16b",   0, 32 },
-  { ARM64::ST1Twov8h_POST,    "st1",  ".8h",    0, 32 },
-  { ARM64::ST1Twov4s_POST,    "st1",  ".4s",    0, 32 },
-  { ARM64::ST1Twov2d_POST,    "st1",  ".2d",    0, 32 },
-  { ARM64::ST1Twov8b_POST,    "st1",  ".8b",    0, 16 },
-  { ARM64::ST1Twov4h_POST,    "st1",  ".4h",    0, 16 },
-  { ARM64::ST1Twov2s_POST,    "st1",  ".2s",    0, 16 },
-  { ARM64::ST1Twov1d_POST,    "st1",  ".1d",    0, 16 },
-  { ARM64::ST1Threev16b,      "st1",  ".16b",   0, 0  },
-  { ARM64::ST1Threev8h,       "st1",  ".8h",    0, 0  },
-  { ARM64::ST1Threev4s,       "st1",  ".4s",    0, 0  },
-  { ARM64::ST1Threev2d,       "st1",  ".2d",    0, 0  },
-  { ARM64::ST1Threev8b,       "st1",  ".8b",    0, 0  },
-  { ARM64::ST1Threev4h,       "st1",  ".4h",    0, 0  },
-  { ARM64::ST1Threev2s,       "st1",  ".2s",    0, 0  },
-  { ARM64::ST1Threev1d,       "st1",  ".1d",    0, 0  },
-  { ARM64::ST1Threev16b_POST, "st1",  ".16b",   0, 48 },
-  { ARM64::ST1Threev8h_POST,  "st1",  ".8h",    0, 48 },
-  { ARM64::ST1Threev4s_POST,  "st1",  ".4s",    0, 48 },
-  { ARM64::ST1Threev2d_POST,  "st1",  ".2d",    0, 48 },
-  { ARM64::ST1Threev8b_POST,  "st1",  ".8b",    0, 24 },
-  { ARM64::ST1Threev4h_POST,  "st1",  ".4h",    0, 24 },
-  { ARM64::ST1Threev2s_POST,  "st1",  ".2s",    0, 24 },
-  { ARM64::ST1Threev1d_POST,  "st1",  ".1d",    0, 24 },
-  { ARM64::ST1Fourv16b,       "st1",  ".16b",   0, 0  },
-  { ARM64::ST1Fourv8h,        "st1",  ".8h",    0, 0  },
-  { ARM64::ST1Fourv4s,        "st1",  ".4s",    0, 0  },
-  { ARM64::ST1Fourv2d,        "st1",  ".2d",    0, 0  },
-  { ARM64::ST1Fourv8b,        "st1",  ".8b",    0, 0  },
-  { ARM64::ST1Fourv4h,        "st1",  ".4h",    0, 0  },
-  { ARM64::ST1Fourv2s,        "st1",  ".2s",    0, 0  },
-  { ARM64::ST1Fourv1d,        "st1",  ".1d",    0, 0  },
-  { ARM64::ST1Fourv16b_POST,  "st1",  ".16b",   0, 64 },
-  { ARM64::ST1Fourv8h_POST,   "st1",  ".8h",    0, 64 },
-  { ARM64::ST1Fourv4s_POST,   "st1",  ".4s",    0, 64 },
-  { ARM64::ST1Fourv2d_POST,   "st1",  ".2d",    0, 64 },
-  { ARM64::ST1Fourv8b_POST,   "st1",  ".8b",    0, 32 },
-  { ARM64::ST1Fourv4h_POST,   "st1",  ".4h",    0, 32 },
-  { ARM64::ST1Fourv2s_POST,   "st1",  ".2s",    0, 32 },
-  { ARM64::ST1Fourv1d_POST,   "st1",  ".1d",    0, 32 },
-  { ARM64::ST2i8,             "st2",  ".b",     1, 0  },
-  { ARM64::ST2i16,            "st2",  ".h",     1, 0  },
-  { ARM64::ST2i32,            "st2",  ".s",     1, 0  },
-  { ARM64::ST2i64,            "st2",  ".d",     1, 0  },
-  { ARM64::ST2i8_POST,        "st2",  ".b",     1, 2  },
-  { ARM64::ST2i16_POST,       "st2",  ".h",     1, 4  },
-  { ARM64::ST2i32_POST,       "st2",  ".s",     1, 8  },
-  { ARM64::ST2i64_POST,       "st2",  ".d",     1, 16 },
-  { ARM64::ST2Twov16b,        "st2",  ".16b",   0, 0  },
-  { ARM64::ST2Twov8h,         "st2",  ".8h",    0, 0  },
-  { ARM64::ST2Twov4s,         "st2",  ".4s",    0, 0  },
-  { ARM64::ST2Twov2d,         "st2",  ".2d",    0, 0  },
-  { ARM64::ST2Twov8b,         "st2",  ".8b",    0, 0  },
-  { ARM64::ST2Twov4h,         "st2",  ".4h",    0, 0  },
-  { ARM64::ST2Twov2s,         "st2",  ".2s",    0, 0  },
-  { ARM64::ST2Twov16b_POST,   "st2",  ".16b",   0, 32 },
-  { ARM64::ST2Twov8h_POST,    "st2",  ".8h",    0, 32 },
-  { ARM64::ST2Twov4s_POST,    "st2",  ".4s",    0, 32 },
-  { ARM64::ST2Twov2d_POST,    "st2",  ".2d",    0, 32 },
-  { ARM64::ST2Twov8b_POST,    "st2",  ".8b",    0, 16 },
-  { ARM64::ST2Twov4h_POST,    "st2",  ".4h",    0, 16 },
-  { ARM64::ST2Twov2s_POST,    "st2",  ".2s",    0, 16 },
-  { ARM64::ST3i8,             "st3",  ".b",     1, 0  },
-  { ARM64::ST3i16,            "st3",  ".h",     1, 0  },
-  { ARM64::ST3i32,            "st3",  ".s",     1, 0  },
-  { ARM64::ST3i64,            "st3",  ".d",     1, 0  },
-  { ARM64::ST3i8_POST,        "st3",  ".b",     1, 3  },
-  { ARM64::ST3i16_POST,       "st3",  ".h",     1, 6  },
-  { ARM64::ST3i32_POST,       "st3",  ".s",     1, 12 },
-  { ARM64::ST3i64_POST,       "st3",  ".d",     1, 24 },
-  { ARM64::ST3Threev16b,      "st3",  ".16b",   0, 0  },
-  { ARM64::ST3Threev8h,       "st3",  ".8h",    0, 0  },
-  { ARM64::ST3Threev4s,       "st3",  ".4s",    0, 0  },
-  { ARM64::ST3Threev2d,       "st3",  ".2d",    0, 0  },
-  { ARM64::ST3Threev8b,       "st3",  ".8b",    0, 0  },
-  { ARM64::ST3Threev4h,       "st3",  ".4h",    0, 0  },
-  { ARM64::ST3Threev2s,       "st3",  ".2s",    0, 0  },
-  { ARM64::ST3Threev16b_POST, "st3",  ".16b",   0, 48 },
-  { ARM64::ST3Threev8h_POST,  "st3",  ".8h",    0, 48 },
-  { ARM64::ST3Threev4s_POST,  "st3",  ".4s",    0, 48 },
-  { ARM64::ST3Threev2d_POST,  "st3",  ".2d",    0, 48 },
-  { ARM64::ST3Threev8b_POST,  "st3",  ".8b",    0, 24 },
-  { ARM64::ST3Threev4h_POST,  "st3",  ".4h",    0, 24 },
-  { ARM64::ST3Threev2s_POST,  "st3",  ".2s",    0, 24 },
-  { ARM64::ST4i8,             "st4",  ".b",     1, 0  },
-  { ARM64::ST4i16,            "st4",  ".h",     1, 0  },
-  { ARM64::ST4i32,            "st4",  ".s",     1, 0  },
-  { ARM64::ST4i64,            "st4",  ".d",     1, 0  },
-  { ARM64::ST4i8_POST,        "st4",  ".b",     1, 4  },
-  { ARM64::ST4i16_POST,       "st4",  ".h",     1, 8  },
-  { ARM64::ST4i32_POST,       "st4",  ".s",     1, 16 },
-  { ARM64::ST4i64_POST,       "st4",  ".d",     1, 32 },
-  { ARM64::ST4Fourv16b,       "st4",  ".16b",   0, 0  },
-  { ARM64::ST4Fourv8h,        "st4",  ".8h",    0, 0  },
-  { ARM64::ST4Fourv4s,        "st4",  ".4s",    0, 0  },
-  { ARM64::ST4Fourv2d,        "st4",  ".2d",    0, 0  },
-  { ARM64::ST4Fourv8b,        "st4",  ".8b",    0, 0  },
-  { ARM64::ST4Fourv4h,        "st4",  ".4h",    0, 0  },
-  { ARM64::ST4Fourv2s,        "st4",  ".2s",    0, 0  },
-  { ARM64::ST4Fourv16b_POST,  "st4",  ".16b",   0, 64 },
-  { ARM64::ST4Fourv8h_POST,   "st4",  ".8h",    0, 64 },
-  { ARM64::ST4Fourv4s_POST,   "st4",  ".4s",    0, 64 },
-  { ARM64::ST4Fourv2d_POST,   "st4",  ".2d",    0, 64 },
-  { ARM64::ST4Fourv8b_POST,   "st4",  ".8b",    0, 32 },
-  { ARM64::ST4Fourv4h_POST,   "st4",  ".4h",    0, 32 },
-  { ARM64::ST4Fourv2s_POST,   "st4",  ".2s",    0, 32 },
-};
-
-static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
-  unsigned Idx;
-  for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
-    if (LdStNInstInfo[Idx].Opcode == Opcode)
-      return &LdStNInstInfo[Idx];
-
-  return 0;
-}
-
-void ARM64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                      StringRef Annot) {
-  unsigned Opcode = MI->getOpcode();
-  StringRef Layout, Mnemonic;
-
-  bool IsTbx;
-  if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) {
-    O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t'
-      << getRegisterName(MI->getOperand(0).getReg(), ARM64::vreg) << ", ";
-
-    unsigned ListOpNum = IsTbx ? 2 : 1;
-    printVectorList(MI, ListOpNum, O, "");
-
-    O << ", "
-      << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), ARM64::vreg);
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
-    O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
-
-    // Now onto the operands: first a vector list with possible lane
-    // specifier. E.g. { v0 }[2]
-    printVectorList(MI, 0, O, "");
-
-    if (LdStDesc->LaneOperand != 0)
-      O << '[' << MI->getOperand(LdStDesc->LaneOperand).getImm() << ']';
-
-    // Next the address: [xN]
-    unsigned AddrOpNum = LdStDesc->LaneOperand + 1;
-    unsigned AddrReg = MI->getOperand(AddrOpNum).getReg();
-    O << ", [" << getRegisterName(AddrReg) << ']';
-
-    // Finally, there might be a post-indexed offset.
-    if (LdStDesc->NaturalOffset != 0) {
-      unsigned Reg = MI->getOperand(AddrOpNum + 1).getReg();
-      if (Reg != ARM64::XZR)
-        O << ", " << getRegisterName(Reg);
-      else {
-        assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
-        O << ", #" << LdStDesc->NaturalOffset;
-      }
-    }
-
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  ARM64InstPrinter::printInst(MI, O, Annot);
-}
-
-bool ARM64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
-#ifndef NDEBUG
-  unsigned Opcode = MI->getOpcode();
-  assert((Opcode == ARM64::SYS || Opcode == ARM64::SYSxt) &&
-         "Invalid opcode for SYS alias!");
-#endif
-
-  const char *Asm = 0;
-  const MCOperand &Op1 = MI->getOperand(0);
-  const MCOperand &Cn = MI->getOperand(1);
-  const MCOperand &Cm = MI->getOperand(2);
-  const MCOperand &Op2 = MI->getOperand(3);
-
-  unsigned Op1Val = Op1.getImm();
-  unsigned CnVal = Cn.getImm();
-  unsigned CmVal = Cm.getImm();
-  unsigned Op2Val = Op2.getImm();
-
-  if (CnVal == 7) {
-    switch (CmVal) {
-    default:
-      break;
-
-    // IC aliases
-    case 1:
-      if (Op1Val == 0 && Op2Val == 0)
-        Asm = "ic\tialluis";
-      break;
-    case 5:
-      if (Op1Val == 0 && Op2Val == 0)
-        Asm = "ic\tiallu";
-      else if (Op1Val == 3 && Op2Val == 1)
-        Asm = "ic\tivau";
-      break;
-
-    // DC aliases
-    case 4:
-      if (Op1Val == 3 && Op2Val == 1)
-        Asm = "dc\tzva";
-      break;
-    case 6:
-      if (Op1Val == 0 && Op2Val == 1)
-        Asm = "dc\tivac";
-      if (Op1Val == 0 && Op2Val == 2)
-        Asm = "dc\tisw";
-      break;
-    case 10:
-      if (Op1Val == 3 && Op2Val == 1)
-        Asm = "dc\tcvac";
-      else if (Op1Val == 0 && Op2Val == 2)
-        Asm = "dc\tcsw";
-      break;
-    case 11:
-      if (Op1Val == 3 && Op2Val == 1)
-        Asm = "dc\tcvau";
-      break;
-    case 14:
-      if (Op1Val == 3 && Op2Val == 1)
-        Asm = "dc\tcivac";
-      else if (Op1Val == 0 && Op2Val == 2)
-        Asm = "dc\tcisw";
-      break;
-
-    // AT aliases
-    case 8:
-      switch (Op1Val) {
-      default:
-        break;
-      case 0:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "at\ts1e1r"; break;
-        case 1: Asm = "at\ts1e1w"; break;
-        case 2: Asm = "at\ts1e0r"; break;
-        case 3: Asm = "at\ts1e0w"; break;
-        }
-        break;
-      case 4:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "at\ts1e2r"; break;
-        case 1: Asm = "at\ts1e2w"; break;
-        case 4: Asm = "at\ts12e1r"; break;
-        case 5: Asm = "at\ts12e1w"; break;
-        case 6: Asm = "at\ts12e0r"; break;
-        case 7: Asm = "at\ts12e0w"; break;
-        }
-        break;
-      case 6:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "at\ts1e3r"; break;
-        case 1: Asm = "at\ts1e3w"; break;
-        }
-        break;
-      }
-      break;
-    }
-  } else if (CnVal == 8) {
-    // TLBI aliases
-    switch (CmVal) {
-    default:
-      break;
-    case 3:
-      switch (Op1Val) {
-      default:
-        break;
-      case 0:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\tvmalle1is"; break;
-        case 1: Asm = "tlbi\tvae1is"; break;
-        case 2: Asm = "tlbi\taside1is"; break;
-        case 3: Asm = "tlbi\tvaae1is"; break;
-        case 5: Asm = "tlbi\tvale1is"; break;
-        case 7: Asm = "tlbi\tvaale1is"; break;
-        }
-        break;
-      case 4:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\talle2is"; break;
-        case 1: Asm = "tlbi\tvae2is"; break;
-        case 4: Asm = "tlbi\talle1is"; break;
-        case 5: Asm = "tlbi\tvale2is"; break;
-        case 6: Asm = "tlbi\tvmalls12e1is"; break;
-        }
-        break;
-      case 6:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\talle3is"; break;
-        case 1: Asm = "tlbi\tvae3is"; break;
-        case 5: Asm = "tlbi\tvale3is"; break;
-        }
-        break;
-      }
-      break;
-    case 4:
-      switch (Op1Val) {
-      default:
-        break;
-      case 4:
-        switch (Op2Val) {
-        default:
-          break;
-        case 1: Asm = "tlbi\tipas2e1"; break;
-        case 5: Asm = "tlbi\tipas2le1"; break;
-        }
-        break;
-      }
-      break;
-    case 7:
-      switch (Op1Val) {
-      default:
-        break;
-      case 0:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\tvmalle1"; break;
-        case 1: Asm = "tlbi\tvae1"; break;
-        case 2: Asm = "tlbi\taside1"; break;
-        case 3: Asm = "tlbi\tvaae1"; break;
-        case 5: Asm = "tlbi\tvale1"; break;
-        case 7: Asm = "tlbi\tvaale1"; break;
-        }
-        break;
-      case 4:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\talle2"; break;
-        case 1: Asm = "tlbi\tvae2"; break;
-        case 4: Asm = "tlbi\talle1"; break;
-        case 5: Asm = "tlbi\tvale2"; break;
-        case 6: Asm = "tlbi\tvmalls12e1"; break;
-        }
-        break;
-      case 6:
-        switch (Op2Val) {
-        default:
-          break;
-        case 0: Asm = "tlbi\talle3"; break;
-        case 1: Asm = "tlbi\tvae3";  break;
-        case 5: Asm = "tlbi\tvale3"; break;
-        }
-        break;
-      }
-      break;
-    }
-  }
-
-  if (Asm) {
-    O << '\t' << Asm;
-    if (MI->getNumOperands() == 5)
-      O << ", " << getRegisterName(MI->getOperand(4).getReg());
-  }
-
-  return Asm != 0;
-}
-
-void ARM64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    O << getRegisterName(Reg);
-  } else if (Op.isImm()) {
-    O << '#' << Op.getImm();
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << *Op.getExpr();
-  }
-}
-
-void ARM64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
-                                           unsigned Imm, raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    if (Reg == ARM64::XZR)
-      O << "#" << Imm;
-    else
-      O << getRegisterName(Reg);
-  } else
-    assert(0 && "unknown operand kind in printPostIncOperand64");
-}
-
-void ARM64InstPrinter::printPostIncOperand1(const MCInst *MI, unsigned OpNo,
-                                            raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 1, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand2(const MCInst *MI, unsigned OpNo,
-                                            raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 2, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand3(const MCInst *MI, unsigned OpNo,
-                                            raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 3, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand4(const MCInst *MI, unsigned OpNo,
-                                            raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 4, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand6(const MCInst *MI, unsigned OpNo,
-                                            raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 6, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand8(const MCInst *MI, unsigned OpNo,
-                                            raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 8, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand12(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 12, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand16(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 16, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand24(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 24, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand32(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 32, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand48(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 48, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand64(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
-  printPostIncOperand(MI, OpNo, 64, O);
-}
-
-void ARM64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  assert(Op.isReg() && "Non-register vreg operand!");
-  unsigned Reg = Op.getReg();
-  O << getRegisterName(Reg, ARM64::vreg);
-}
-
-void ARM64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
-                                         raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
-  O << "c" << Op.getImm();
-}
-
-void ARM64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  if (MO.isImm()) {
-    unsigned Val = (MO.getImm() & 0xfff);
-    assert(Val == MO.getImm() && "Add/sub immediate out of range!");
-    unsigned Shift =
-        ARM64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
-    O << '#' << (Val << Shift);
-    // Distinguish "0, lsl #12" from "0, lsl #0".
-    if (Val == 0 && Shift != 0)
-      printShifter(MI, OpNum + 1, O);
-  } else {
-    assert(MO.isExpr() && "Unexpected operand type!");
-    O << *MO.getExpr();
-    printShifter(MI, OpNum + 1, O);
-  }
-}
-
-void ARM64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
-                                         raw_ostream &O) {
-  uint64_t Val = MI->getOperand(OpNum).getImm();
-  O << "#0x";
-  O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 32));
-}
-
-void ARM64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
-                                         raw_ostream &O) {
-  uint64_t Val = MI->getOperand(OpNum).getImm();
-  O << "#0x";
-  O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 64));
-}
-
-void ARM64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNum).getImm();
-  // LSL #0 should not be printed.
-  if (ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
-      ARM64_AM::getShiftValue(Val) == 0)
-    return;
-  O << ", " << ARM64_AM::getShiftName(ARM64_AM::getShiftType(Val)) << " #"
-    << ARM64_AM::getShiftValue(Val);
-}
-
-void ARM64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
-                                            raw_ostream &O) {
-  O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printShifter(MI, OpNum + 1, O);
-}
-
-void ARM64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
-                                             raw_ostream &O) {
-  O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printExtend(MI, OpNum + 1, O);
-}
-
-void ARM64InstPrinter::printExtend(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNum).getImm();
-  ARM64_AM::ExtendType ExtType = ARM64_AM::getArithExtendType(Val);
-  unsigned ShiftVal = ARM64_AM::getArithShiftValue(Val);
-
-  // If the destination or first source register operand is [W]SP, print
-  // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at
-  // all.
-  if (ExtType == ARM64_AM::UXTW || ExtType == ARM64_AM::UXTX) {
-    unsigned Dest = MI->getOperand(0).getReg();
-    unsigned Src1 = MI->getOperand(1).getReg();
-    if (Dest == ARM64::SP || Dest == ARM64::WSP || Src1 == ARM64::SP ||
-        Src1 == ARM64::WSP) {
-      if (ShiftVal != 0)
-        O << ", lsl #" << ShiftVal;
-      return;
-    }
-  }
-  O << ", " << ARM64_AM::getExtendName(ExtType);
-  if (ShiftVal != 0)
-    O << " #" << ShiftVal;
-}
-
-void ARM64InstPrinter::printDotCondCode(const MCInst *MI, unsigned OpNum,
-                                        raw_ostream &O) {
-  ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm();
-  if (CC != ARM64CC::AL)
-    O << '.' << ARM64CC::getCondCodeName(CC);
-}
-
-void ARM64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
-                                     raw_ostream &O) {
-  ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm();
-  O << ARM64CC::getCondCodeName(CC);
-}
-
-void ARM64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O) {
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
-}
-
-void ARM64InstPrinter::printImmScale4(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O) {
-  O << '#' << 4 * MI->getOperand(OpNum).getImm();
-}
-
-void ARM64InstPrinter::printImmScale8(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O) {
-  O << '#' << 8 * MI->getOperand(OpNum).getImm();
-}
-
-void ARM64InstPrinter::printImmScale16(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
-  O << '#' << 16 * MI->getOperand(OpNum).getImm();
-}
-
-void ARM64InstPrinter::printAMIndexed(const MCInst *MI, unsigned OpNum,
-                                      unsigned Scale, raw_ostream &O) {
-  const MCOperand MO1 = MI->getOperand(OpNum + 1);
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg());
-  if (MO1.isImm()) {
-    if (MO1.getImm() != 0)
-      O << ", #" << (MO1.getImm() * Scale);
-  } else {
-    assert(MO1.isExpr() && "Unexpected operand type!");
-    O << ", " << *MO1.getExpr();
-  }
-  O << ']';
-}
-
-void ARM64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
-  unsigned prfop = MI->getOperand(OpNum).getImm();
-  if (ARM64_AM::isNamedPrefetchOp(prfop))
-    O << ARM64_AM::getPrefetchOpName((ARM64_AM::PrefetchOp)prfop);
-  else
-    O << '#' << prfop;
-}
-
-void ARM64InstPrinter::printMemoryPostIndexed32(const MCInst *MI,
-                                                unsigned OpNum,
-                                                raw_ostream &O) {
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
-    << 4 * MI->getOperand(OpNum + 1).getImm();
-}
-
-void ARM64InstPrinter::printMemoryPostIndexed64(const MCInst *MI,
-                                                unsigned OpNum,
-                                                raw_ostream &O) {
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
-    << 8 * MI->getOperand(OpNum + 1).getImm();
-}
-
-void ARM64InstPrinter::printMemoryPostIndexed128(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 raw_ostream &O) {
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
-    << 16 * MI->getOperand(OpNum + 1).getImm();
-}
-
-void ARM64InstPrinter::printMemoryPostIndexed(const MCInst *MI, unsigned OpNum,
-                                              raw_ostream &O) {
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
-    << MI->getOperand(OpNum + 1).getImm();
-}
-
-void ARM64InstPrinter::printMemoryRegOffset(const MCInst *MI, unsigned OpNum,
-                                            raw_ostream &O, int LegalShiftAmt) {
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
-    << getRegisterName(MI->getOperand(OpNum + 1).getReg());
-
-  unsigned Val = MI->getOperand(OpNum + 2).getImm();
-  ARM64_AM::ExtendType ExtType = ARM64_AM::getMemExtendType(Val);
-  bool DoShift = ARM64_AM::getMemDoShift(Val);
-
-  if (ExtType == ARM64_AM::UXTX) {
-    if (DoShift)
-      O << ", lsl";
-  } else
-    O << ", " << ARM64_AM::getExtendName(ExtType);
-
-  if (DoShift)
-    O << " #" << LegalShiftAmt;
-
-  O << "]";
-}
-
-void ARM64InstPrinter::printMemoryRegOffset8(const MCInst *MI, unsigned OpNum,
-                                             raw_ostream &O) {
-  printMemoryRegOffset(MI, OpNum, O, 0);
-}
-
-void ARM64InstPrinter::printMemoryRegOffset16(const MCInst *MI, unsigned OpNum,
-                                              raw_ostream &O) {
-  printMemoryRegOffset(MI, OpNum, O, 1);
-}
-
-void ARM64InstPrinter::printMemoryRegOffset32(const MCInst *MI, unsigned OpNum,
-                                              raw_ostream &O) {
-  printMemoryRegOffset(MI, OpNum, O, 2);
-}
-
-void ARM64InstPrinter::printMemoryRegOffset64(const MCInst *MI, unsigned OpNum,
-                                              raw_ostream &O) {
-  printMemoryRegOffset(MI, OpNum, O, 3);
-}
-
-void ARM64InstPrinter::printMemoryRegOffset128(const MCInst *MI, unsigned OpNum,
-                                               raw_ostream &O) {
-  printMemoryRegOffset(MI, OpNum, O, 4);
-}
-
-void ARM64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
-                                         raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  O << '#';
-  if (MO.isFPImm())
-    // FIXME: Should this ever happen?
-    O << MO.getFPImm();
-  else
-    O << ARM64_AM::getFPImmFloat(MO.getImm());
-}
-
-static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
-  while (Stride--) {
-    switch (Reg) {
-    default:
-      assert(0 && "Vector register expected!");
-    case ARM64::Q0:  Reg = ARM64::Q1;  break;
-    case ARM64::Q1:  Reg = ARM64::Q2;  break;
-    case ARM64::Q2:  Reg = ARM64::Q3;  break;
-    case ARM64::Q3:  Reg = ARM64::Q4;  break;
-    case ARM64::Q4:  Reg = ARM64::Q5;  break;
-    case ARM64::Q5:  Reg = ARM64::Q6;  break;
-    case ARM64::Q6:  Reg = ARM64::Q7;  break;
-    case ARM64::Q7:  Reg = ARM64::Q8;  break;
-    case ARM64::Q8:  Reg = ARM64::Q9;  break;
-    case ARM64::Q9:  Reg = ARM64::Q10; break;
-    case ARM64::Q10: Reg = ARM64::Q11; break;
-    case ARM64::Q11: Reg = ARM64::Q12; break;
-    case ARM64::Q12: Reg = ARM64::Q13; break;
-    case ARM64::Q13: Reg = ARM64::Q14; break;
-    case ARM64::Q14: Reg = ARM64::Q15; break;
-    case ARM64::Q15: Reg = ARM64::Q16; break;
-    case ARM64::Q16: Reg = ARM64::Q17; break;
-    case ARM64::Q17: Reg = ARM64::Q18; break;
-    case ARM64::Q18: Reg = ARM64::Q19; break;
-    case ARM64::Q19: Reg = ARM64::Q20; break;
-    case ARM64::Q20: Reg = ARM64::Q21; break;
-    case ARM64::Q21: Reg = ARM64::Q22; break;
-    case ARM64::Q22: Reg = ARM64::Q23; break;
-    case ARM64::Q23: Reg = ARM64::Q24; break;
-    case ARM64::Q24: Reg = ARM64::Q25; break;
-    case ARM64::Q25: Reg = ARM64::Q26; break;
-    case ARM64::Q26: Reg = ARM64::Q27; break;
-    case ARM64::Q27: Reg = ARM64::Q28; break;
-    case ARM64::Q28: Reg = ARM64::Q29; break;
-    case ARM64::Q29: Reg = ARM64::Q30; break;
-    case ARM64::Q30: Reg = ARM64::Q31; break;
-    // Vector lists can wrap around.
-    case ARM64::Q31:
-      Reg = ARM64::Q0;
-      break;
-    }
-  }
-  return Reg;
-}
-
-void ARM64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O, StringRef LayoutSuffix) {
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-
-  O << "{ ";
-
-  // Work out how many registers there are in the list (if there is an actual
-  // list).
-  unsigned NumRegs = 1;
-  if (MRI.getRegClass(ARM64::DDRegClassID).contains(Reg) ||
-      MRI.getRegClass(ARM64::QQRegClassID).contains(Reg))
-    NumRegs = 2;
-  else if (MRI.getRegClass(ARM64::DDDRegClassID).contains(Reg) ||
-           MRI.getRegClass(ARM64::QQQRegClassID).contains(Reg))
-    NumRegs = 3;
-  else if (MRI.getRegClass(ARM64::DDDDRegClassID).contains(Reg) ||
-           MRI.getRegClass(ARM64::QQQQRegClassID).contains(Reg))
-    NumRegs = 4;
-
-  // Now forget about the list and find out what the first register is.
-  if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::dsub0))
-    Reg = FirstReg;
-  else if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::qsub0))
-    Reg = FirstReg;
-
-  // If it's a D-reg, we need to promote it to the equivalent Q-reg before
-  // printing (otherwise getRegisterName fails).
-  if (MRI.getRegClass(ARM64::FPR64RegClassID).contains(Reg)) {
-    const MCRegisterClass &FPR128RC = MRI.getRegClass(ARM64::FPR128RegClassID);
-    Reg = MRI.getMatchingSuperReg(Reg, ARM64::dsub, &FPR128RC);
-  }
-
-  for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
-    O << getRegisterName(Reg, ARM64::vreg) << LayoutSuffix;
-    if (i + 1 != NumRegs)
-      O << ", ";
-  }
-
-  O << " }";
-}
-
-void ARM64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
-                                                      unsigned OpNum,
-                                                      raw_ostream &O) {
-  printVectorList(MI, OpNum, O, "");
-}
-
-template <unsigned NumLanes, char LaneKind>
-void ARM64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
-                                            raw_ostream &O) {
-  std::string Suffix(".");
-  if (NumLanes)
-    Suffix += itostr(NumLanes) + LaneKind;
-  else
-    Suffix += LaneKind;
-
-  printVectorList(MI, OpNum, O, Suffix);
-}
-
-void ARM64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
-                                        raw_ostream &O) {
-  O << "[" << MI->getOperand(OpNum).getImm() << "]";
-}
-
-void ARM64InstPrinter::printAlignedBranchTarget(const MCInst *MI,
-                                                unsigned OpNum,
-                                                raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-
-  // If the label has already been resolved to an immediate offset (say, when
-  // we're running the disassembler), just print the immediate.
-  if (Op.isImm()) {
-    O << "#" << (Op.getImm() << 2);
-    return;
-  }
-
-  // If the branch target is simply an address then print it in hex.
-  const MCConstantExpr *BranchTarget =
-      dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr());
-  int64_t Address;
-  if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
-    O << "0x";
-    O.write_hex(Address);
-  } else {
-    // Otherwise, just print the expression.
-    O << *MI->getOperand(OpNum).getExpr();
-  }
-}
-
-void ARM64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-
-  // If the label has already been resolved to an immediate offset (say, when
-  // we're running the disassembler), just print the immediate.
-  if (Op.isImm()) {
-    O << "#" << (Op.getImm() << 12);
-    return;
-  }
-
-  // Otherwise, just print the expression.
-  O << *MI->getOperand(OpNum).getExpr();
-}
-
-void ARM64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
-                                          raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-  const char *Name = ARM64SYS::getBarrierOptName((ARM64SYS::BarrierOption)Val);
-  if (Name)
-    O << Name;
-  else
-    O << "#" << Val;
-}
-
-void ARM64InstPrinter::printSystemRegister(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-  const char *Name =
-      ARM64SYS::getSystemRegisterName((ARM64SYS::SystemRegister)Val);
-  if (Name) {
-    O << Name;
-    return;
-  }
-
-  unsigned Op0 = 2 | ((Val >> 14) & 1);
-  unsigned Op1 = (Val >> 11) & 7;
-  unsigned CRn = (Val >> 7) & 0xf;
-  unsigned CRm = (Val >> 3) & 0xf;
-  unsigned Op2 = Val & 7;
-
-  O << 'S' << Op0 << '_' << Op1 << "_C" << CRn << "_C" << CRm << '_' << Op2;
-}
-
-void ARM64InstPrinter::printSystemCPSRField(const MCInst *MI, unsigned OpNo,
-                                            raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-  const char *Name = ARM64SYS::getCPSRFieldName((ARM64SYS::CPSRField)Val);
-  O << Name;
-}
-
-void ARM64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
-                                              raw_ostream &O) {
-  unsigned RawVal = MI->getOperand(OpNo).getImm();
-  uint64_t Val = ARM64_AM::decodeAdvSIMDModImmType10(RawVal);
-  O << format("#%#016llx", Val);
-}
diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h
deleted file mode 100644
index ff66ff0..0000000
--- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h
+++ /dev/null
@@ -1,157 +0,0 @@
-//===-- ARM64InstPrinter.h - Convert ARM64 MCInst to assembly syntax ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an ARM64 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64INSTPRINTER_H
-#define ARM64INSTPRINTER_H
-
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-
-namespace llvm {
-
-class MCOperand;
-
-class ARM64InstPrinter : public MCInstPrinter {
-public:
-  ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                   const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
-
-  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
-  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
-
-  // Autogenerated by tblgen.
-  virtual void printInstruction(const MCInst *MI, raw_ostream &O);
-  virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
-  virtual StringRef getRegName(unsigned RegNo) const {
-    return getRegisterName(RegNo);
-  }
-  static const char *getRegisterName(unsigned RegNo,
-                                     unsigned AltIdx = ARM64::NoRegAltName);
-
-protected:
-  bool printSysAlias(const MCInst *MI, raw_ostream &O);
-  // Operand printers
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
-                           raw_ostream &O);
-  void printPostIncOperand1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand2(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand3(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand4(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand6(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand8(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand12(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand16(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand24(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand32(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand48(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPostIncOperand64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printDotCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAlignedBranchTarget(const MCInst *MI, unsigned OpNum,
-                                raw_ostream &O);
-  void printAMIndexed(const MCInst *MI, unsigned OpNum, unsigned Scale,
-                      raw_ostream &O);
-  void printAMIndexed128(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
-    printAMIndexed(MI, OpNum, 16, O);
-  }
-
-  void printAMIndexed64(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
-    printAMIndexed(MI, OpNum, 8, O);
-  }
-
-  void printAMIndexed32(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
-    printAMIndexed(MI, OpNum, 4, O);
-  }
-
-  void printAMIndexed16(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
-    printAMIndexed(MI, OpNum, 2, O);
-  }
-
-  void printAMIndexed8(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
-    printAMIndexed(MI, OpNum, 1, O);
-  }
-  void printAMUnscaled(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
-    printAMIndexed(MI, OpNum, 1, O);
-  }
-  void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printImmScale4(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printImmScale8(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printImmScale16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMemoryPostIndexed32(const MCInst *MI, unsigned OpNum,
-                                raw_ostream &O);
-  void printMemoryPostIndexed64(const MCInst *MI, unsigned OpNum,
-                                raw_ostream &O);
-  void printMemoryPostIndexed128(const MCInst *MI, unsigned OpNum,
-                                 raw_ostream &O);
-  void printMemoryRegOffset(const MCInst *MI, unsigned OpNum, raw_ostream &O,
-                            int LegalShiftAmt);
-  void printMemoryRegOffset8(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMemoryRegOffset16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMemoryRegOffset32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMemoryRegOffset64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMemoryRegOffset128(const MCInst *MI, unsigned OpNum,
-                               raw_ostream &O);
-
-  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
-                       StringRef LayoutSuffix);
-
-  /// Print a list of vector registers where the type suffix is implicit
-  /// (i.e. attached to the instruction rather than the registers).
-  void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
-                                      raw_ostream &O);
-
-  template <unsigned NumLanes, char LaneKind>
-  void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSystemCPSRField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-};
-
-class ARM64AppleInstPrinter : public ARM64InstPrinter {
-public:
-  ARM64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                        const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
-
-  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
-
-  virtual void printInstruction(const MCInst *MI, raw_ostream &O);
-  virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
-  virtual StringRef getRegName(unsigned RegNo) const {
-    return getRegisterName(RegNo);
-  }
-  static const char *getRegisterName(unsigned RegNo,
-                                     unsigned AltIdx = ARM64::NoRegAltName);
-};
-}
-
-#endif
diff --git a/lib/Target/ARM64/InstPrinter/CMakeLists.txt b/lib/Target/ARM64/InstPrinter/CMakeLists.txt
deleted file mode 100644
index b8ee12c..0000000
--- a/lib/Target/ARM64/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARM64AsmPrinter
-  ARM64InstPrinter.cpp
-  )
-
-add_dependencies(LLVMARM64AsmPrinter ARM64CommonTableGen)
diff --git a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt b/lib/Target/ARM64/InstPrinter/LLVMBuild.txt
deleted file mode 100644
index 2ec83d2..0000000
--- a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-;===- ./lib/Target/ARM64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ARM64AsmPrinter
-parent = ARM64
-required_libraries = MC Support
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/InstPrinter/Makefile b/lib/Target/ARM64/InstPrinter/Makefile
deleted file mode 100644
index a59efb0..0000000
--- a/lib/Target/ARM64/InstPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM64/AsmPrinter/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64AsmPrinter
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/LLVMBuild.txt b/lib/Target/ARM64/LLVMBuild.txt
deleted file mode 100644
index 45b0628..0000000
--- a/lib/Target/ARM64/LLVMBuild.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-;===- ./lib/Target/ARM64/LLVMBuild.txt -------------------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[common]
-subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
-
-[component_0]
-type = TargetGroup
-name = ARM64
-parent = Target
-has_asmparser = 1
-has_asmprinter = 1
-has_disassembler = 1
-has_jit = 1
-
-[component_1]
-type = Library
-name = ARM64CodeGen
-parent = ARM64
-required_libraries = ARM64AsmPrinter ARM64Desc ARM64Info Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h b/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h
deleted file mode 100644
index 7717743..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h
+++ /dev/null
@@ -1,758 +0,0 @@
-//===- ARM64AddressingModes.h - ARM64 Addressing Modes ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 addressing mode implementation stuff.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
-#define LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
-
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
-
-namespace llvm {
-
-/// ARM64_AM - ARM64 Addressing Mode Stuff
-namespace ARM64_AM {
-
-//===----------------------------------------------------------------------===//
-// Shifts
-//
-
-enum ShiftType {
-  InvalidShift = -1,
-  LSL = 0,
-  LSR = 1,
-  ASR = 2,
-  ROR = 3,
-  MSL = 4
-};
-
-/// getShiftName - Get the string encoding for the shift type.
-static inline const char *getShiftName(ARM64_AM::ShiftType ST) {
-  switch (ST) {
-  default: assert(false && "unhandled shift type!");
-  case ARM64_AM::LSL: return "lsl";
-  case ARM64_AM::LSR: return "lsr";
-  case ARM64_AM::ASR: return "asr";
-  case ARM64_AM::ROR: return "ror";
-  case ARM64_AM::MSL: return "msl";
-  }
-  return 0;
-}
-
-/// getShiftType - Extract the shift type.
-static inline ARM64_AM::ShiftType getShiftType(unsigned Imm) {
-  return ARM64_AM::ShiftType((Imm >> 6) & 0x7);
-}
-
-/// getShiftValue - Extract the shift value.
-static inline unsigned getShiftValue(unsigned Imm) {
-  return Imm & 0x3f;
-}
-
-/// getShifterImm - Encode the shift type and amount:
-///   imm:     6-bit shift amount
-///   shifter: 000 ==> lsl
-///            001 ==> lsr
-///            010 ==> asr
-///            011 ==> ror
-///            100 ==> msl
-///   {8-6}  = shifter
-///   {5-0}  = imm
-static inline unsigned getShifterImm(ARM64_AM::ShiftType ST, unsigned Imm) {
-  assert((Imm & 0x3f) == Imm && "Illegal shifted immedate value!");
-  return (unsigned(ST) << 6) | (Imm & 0x3f);
-}
-
-//===----------------------------------------------------------------------===//
-// Extends
-//
-
-enum ExtendType {
-  InvalidExtend = -1,
-  UXTB = 0,
-  UXTH = 1,
-  UXTW = 2,
-  UXTX = 3,
-  SXTB = 4,
-  SXTH = 5,
-  SXTW = 6,
-  SXTX = 7
-};
-
-/// getExtendName - Get the string encoding for the extend type.
-static inline const char *getExtendName(ARM64_AM::ExtendType ET) {
-  switch (ET) {
-  default: assert(false && "unhandled extend type!");
-  case ARM64_AM::UXTB: return "uxtb";
-  case ARM64_AM::UXTH: return "uxth";
-  case ARM64_AM::UXTW: return "uxtw";
-  case ARM64_AM::UXTX: return "uxtx";
-  case ARM64_AM::SXTB: return "sxtb";
-  case ARM64_AM::SXTH: return "sxth";
-  case ARM64_AM::SXTW: return "sxtw";
-  case ARM64_AM::SXTX: return "sxtx";
-  }
-  return 0;
-}
-
-/// getArithShiftValue - get the arithmetic shift value.
-static inline unsigned getArithShiftValue(unsigned Imm) {
-  return Imm & 0x7;
-}
-
-/// getExtendType - Extract the extend type for operands of arithmetic ops.
-static inline ARM64_AM::ExtendType getArithExtendType(unsigned Imm) {
-  return ARM64_AM::ExtendType((Imm >> 3) & 0x7);
-}
-
-/// getArithExtendImm - Encode the extend type and shift amount for an
-///                     arithmetic instruction:
-///   imm:     3-bit extend amount
-///   shifter: 000 ==> uxtb
-///            001 ==> uxth
-///            010 ==> uxtw
-///            011 ==> uxtx
-///            100 ==> sxtb
-///            101 ==> sxth
-///            110 ==> sxtw
-///            111 ==> sxtx
-///   {5-3}  = shifter
-///   {2-0}  = imm3
-static inline unsigned getArithExtendImm(ARM64_AM::ExtendType ET,
-                                         unsigned Imm) {
-  assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!");
-  return (unsigned(ET) << 3) | (Imm & 0x7);
-}
-
-/// getMemDoShift - Extract the "do shift" flag value for load/store
-/// instructions.
-static inline bool getMemDoShift(unsigned Imm) {
-  return (Imm & 0x1) != 0;
-}
-
-/// getExtendType - Extract the extend type for the offset operand of
-/// loads/stores.
-static inline ARM64_AM::ExtendType getMemExtendType(unsigned Imm) {
-  return ARM64_AM::ExtendType((Imm >> 1) & 0x7);
-}
-
-/// getExtendImm - Encode the extend type and amount for a load/store inst:
-///   doshift:     should the offset be scaled by the access size
-///   shifter: 000 ==> uxtb
-///            001 ==> uxth
-///            010 ==> uxtw
-///            011 ==> uxtx
-///            100 ==> sxtb
-///            101 ==> sxth
-///            110 ==> sxtw
-///            111 ==> sxtx
-///   {3-1}  = shifter
-///   {0}  = doshift
-static inline unsigned getMemExtendImm(ARM64_AM::ExtendType ET, bool DoShift) {
-  return (unsigned(ET) << 1) | unsigned(DoShift);
-}
-
-//===----------------------------------------------------------------------===//
-// Prefetch
-//
-
-/// Pre-fetch operator names.
-/// The enum values match the encoding values:
-///   prfop<4:3> 00=preload data, 10=prepare for store
-///   prfop<2:1> 00=target L1 cache, 01=target L2 cache, 10=target L3 cache,
-///   prfop<0> 0=non-streaming (temporal), 1=streaming (non-temporal)
-enum PrefetchOp {
-  InvalidPrefetchOp = -1,
-  PLDL1KEEP = 0x00,
-  PLDL1STRM = 0x01,
-  PLDL2KEEP = 0x02,
-  PLDL2STRM = 0x03,
-  PLDL3KEEP = 0x04,
-  PLDL3STRM = 0x05,
-  PSTL1KEEP = 0x10,
-  PSTL1STRM = 0x11,
-  PSTL2KEEP = 0x12,
-  PSTL2STRM = 0x13,
-  PSTL3KEEP = 0x14,
-  PSTL3STRM = 0x15
-};
-
-/// isNamedPrefetchOp - Check if the prefetch-op 5-bit value has a name.
-static inline bool isNamedPrefetchOp(unsigned prfop) {
-  switch (prfop) {
-  default: return false;
-  case ARM64_AM::PLDL1KEEP: case ARM64_AM::PLDL1STRM: case ARM64_AM::PLDL2KEEP:
-  case ARM64_AM::PLDL2STRM: case ARM64_AM::PLDL3KEEP: case ARM64_AM::PLDL3STRM:
-  case ARM64_AM::PSTL1KEEP: case ARM64_AM::PSTL1STRM: case ARM64_AM::PSTL2KEEP:
-  case ARM64_AM::PSTL2STRM: case ARM64_AM::PSTL3KEEP: case ARM64_AM::PSTL3STRM:
-    return true;
-  }
-}
-
-
-/// getPrefetchOpName - Get the string encoding for the prefetch operator.
-static inline const char *getPrefetchOpName(ARM64_AM::PrefetchOp prfop) {
-  switch (prfop) {
-  default: assert(false && "unhandled prefetch-op type!");
-  case ARM64_AM::PLDL1KEEP: return "pldl1keep";
-  case ARM64_AM::PLDL1STRM: return "pldl1strm";
-  case ARM64_AM::PLDL2KEEP: return "pldl2keep";
-  case ARM64_AM::PLDL2STRM: return "pldl2strm";
-  case ARM64_AM::PLDL3KEEP: return "pldl3keep";
-  case ARM64_AM::PLDL3STRM: return "pldl3strm";
-  case ARM64_AM::PSTL1KEEP: return "pstl1keep";
-  case ARM64_AM::PSTL1STRM: return "pstl1strm";
-  case ARM64_AM::PSTL2KEEP: return "pstl2keep";
-  case ARM64_AM::PSTL2STRM: return "pstl2strm";
-  case ARM64_AM::PSTL3KEEP: return "pstl3keep";
-  case ARM64_AM::PSTL3STRM: return "pstl3strm";
-  }
-  return 0;
-}
-
-static inline uint64_t ror(uint64_t elt, unsigned size) {
-  return ((elt & 1) << (size-1)) | (elt >> 1);
-}
-
-/// processLogicalImmediate - Determine if an immediate value can be encoded
-/// as the immediate operand of a logical instruction for the given register
-/// size.  If so, return true with "encoding" set to the encoded value in
-/// the form N:immr:imms.
-static inline bool processLogicalImmediate(uint64_t imm, unsigned regSize,
-                                           uint64_t &encoding) {
-  if (imm == 0ULL || imm == ~0ULL ||
-      (regSize != 64 && (imm >> regSize != 0 || imm == ~0U)))
-    return false;
-
-  unsigned size = 2;
-  uint64_t eltVal = imm;
-
-  // First, determine the element size.
-  while (size < regSize) {
-    unsigned numElts = regSize / size;
-    unsigned mask = (1ULL << size) - 1;
-    uint64_t lowestEltVal = imm & mask;
-
-    bool allMatched = true;
-    for (unsigned i = 1; i < numElts; ++i) {
-     uint64_t currEltVal = (imm >> (i*size)) & mask;
-      if (currEltVal != lowestEltVal) {
-        allMatched = false;
-        break;
-      }
-    }
-
-    if (allMatched) {
-      eltVal = lowestEltVal;
-      break;
-    }
-
-    size *= 2;
-  }
-
-  // Second, determine the rotation to make the element be: 0^m 1^n.
-  for (unsigned i = 0; i < size; ++i) {
-    eltVal = ror(eltVal, size);
-    uint32_t clz = countLeadingZeros(eltVal) - (64 - size);
-    uint32_t cto = CountTrailingOnes_64(eltVal);
-
-    if (clz + cto == size) {
-      // Encode in immr the number of RORs it would take to get *from* this
-      // element value to our target value, where i+1 is the number of RORs
-      // to go the opposite direction.
-      unsigned immr = size - (i + 1);
-
-      // If size has a 1 in the n'th bit, create a value that has zeroes in
-      // bits [0, n] and ones above that.
-      uint64_t nimms = ~(size-1) << 1;
-
-      // Or the CTO value into the low bits, which must be below the Nth bit
-      // bit mentioned above.
-      nimms |= (cto-1);
-
-      // Extract the seventh bit and toggle it to create the N field.
-      unsigned N = ((nimms >> 6) & 1) ^ 1;
-
-      encoding = (N << 12) | (immr << 6) | (nimms & 0x3f);
-      return true;
-    }
-  }
-
-  return false;
-}
-
-/// isLogicalImmediate - Return true if the immediate is valid for a logical
-/// immediate instruction of the given register size. Return false otherwise.
-static inline bool isLogicalImmediate(uint64_t imm, unsigned regSize) {
-  uint64_t encoding;
-  return processLogicalImmediate(imm, regSize, encoding);
-}
-
-/// encodeLogicalImmediate - Return the encoded immediate value for a logical
-/// immediate instruction of the given register size.
-static inline uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize) {
-  uint64_t encoding = 0;
-  bool res = processLogicalImmediate(imm, regSize, encoding);
-  assert(res && "invalid logical immediate");
-  (void)res;
-  return encoding;
-}
-
-/// decodeLogicalImmediate - Decode a logical immediate value in the form
-/// "N:immr:imms" (where the immr and imms fields are each 6 bits) into the
-/// integer value it represents with regSize bits.
-static inline uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize) {
-  // Extract the N, imms, and immr fields.
-  unsigned N = (val >> 12) & 1;
-  unsigned immr = (val >> 6) & 0x3f;
-  unsigned imms = val & 0x3f;
-
-  assert((regSize == 64 || N == 0) && "undefined logical immediate encoding");
-  int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f));
-  assert(len >= 0 && "undefined logical immediate encoding");
-  unsigned size = (1 << len);
-  unsigned R = immr & (size - 1);
-  unsigned S = imms & (size - 1);
-  assert(S != size - 1 && "undefined logical immediate encoding");
-  uint64_t pattern = (1ULL << (S + 1)) - 1;
-  for (unsigned i = 0; i < R; ++i)
-    pattern = ror(pattern, size);
-
-  // Replicate the pattern to fill the regSize.
-  while (size != regSize) {
-    pattern |= (pattern << size);
-    size *= 2;
-  }
-  return pattern;
-}
-
-/// isValidDecodeLogicalImmediate - Check to see if the logical immediate value
-/// in the form "N:immr:imms" (where the immr and imms fields are each 6 bits)
-/// is a valid encoding for an integer value with regSize bits.
-static inline bool isValidDecodeLogicalImmediate(uint64_t val,
-                                                 unsigned regSize) {
-  // Extract the N and imms fields needed for checking.
-  unsigned N = (val >> 12) & 1;
-  unsigned imms = val & 0x3f;
-
-  if (regSize == 32 && N != 0) // undefined logical immediate encoding
-    return false;
-  int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f));
-  if (len < 0) // undefined logical immediate encoding
-    return false;
-  unsigned size = (1 << len);
-  unsigned S = imms & (size - 1);
-  if (S == size - 1) // undefined logical immediate encoding
-    return false;
-
-  return true;
-}
-
-//===----------------------------------------------------------------------===//
-// Floating-point Immediates
-//
-static inline float getFPImmFloat(unsigned Imm) {
-  // We expect an 8-bit binary encoding of a floating-point number here.
-  union {
-    uint32_t I;
-    float F;
-  } FPUnion;
-
-  uint8_t Sign = (Imm >> 7) & 0x1;
-  uint8_t Exp = (Imm >> 4) & 0x7;
-  uint8_t Mantissa = Imm & 0xf;
-
-  //   8-bit FP    iEEEE Float Encoding
-  //   abcd efgh   aBbbbbbc defgh000 00000000 00000000
-  //
-  // where B = NOT(b);
-
-  FPUnion.I = 0;
-  FPUnion.I |= Sign << 31;
-  FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
-  FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
-  FPUnion.I |= (Exp & 0x3) << 23;
-  FPUnion.I |= Mantissa << 19;
-  return FPUnion.F;
-}
-
-/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
-/// floating-point value. If the value cannot be represented as an 8-bit
-/// floating-point value, then return -1.
-static inline int getFP32Imm(const APInt &Imm) {
-  uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
-  int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
-  int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
-
-  // We can handle 4 bits of mantissa.
-  // mantissa = (16+UInt(e:f:g:h))/16.
-  if (Mantissa & 0x7ffff)
-    return -1;
-  Mantissa >>= 19;
-  if ((Mantissa & 0xf) != Mantissa)
-    return -1;
-
-  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
-  if (Exp < -3 || Exp > 4)
-    return -1;
-  Exp = ((Exp+3) & 0x7) ^ 4;
-
-  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
-}
-
-static inline int getFP32Imm(const APFloat &FPImm) {
-  return getFP32Imm(FPImm.bitcastToAPInt());
-}
-
-/// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
-/// floating-point value. If the value cannot be represented as an 8-bit
-/// floating-point value, then return -1.
-static inline int getFP64Imm(const APInt &Imm) {
-  uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
-  int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
-  uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
-
-  // We can handle 4 bits of mantissa.
-  // mantissa = (16+UInt(e:f:g:h))/16.
-  if (Mantissa & 0xffffffffffffULL)
-    return -1;
-  Mantissa >>= 48;
-  if ((Mantissa & 0xf) != Mantissa)
-    return -1;
-
-  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
-  if (Exp < -3 || Exp > 4)
-    return -1;
-  Exp = ((Exp+3) & 0x7) ^ 4;
-
-  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
-}
-
-static inline int getFP64Imm(const APFloat &FPImm) {
-  return getFP64Imm(FPImm.bitcastToAPInt());
-}
-
-//===--------------------------------------------------------------------===//
-// AdvSIMD Modified Immediates
-//===--------------------------------------------------------------------===//
-
-// 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh
-static inline bool isAdvSIMDModImmType1(uint64_t Imm) {
-  return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
-         ((Imm & 0xffffff00ffffff00ULL) == 0);
-}
-
-static inline uint8_t encodeAdvSIMDModImmType1(uint64_t Imm) {
-  return (Imm & 0xffULL);
-}
-
-static inline uint64_t decodeAdvSIMDModImmType1(uint8_t Imm) {
-  uint64_t EncVal = Imm;
-  return (EncVal << 32) | EncVal;
-}
-
-// 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00
-static inline bool isAdvSIMDModImmType2(uint64_t Imm) {
-  return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
-         ((Imm & 0xffff00ffffff00ffULL) == 0);
-}
-
-static inline uint8_t encodeAdvSIMDModImmType2(uint64_t Imm) {
-  return (Imm & 0xff00ULL) >> 8;
-}
-
-static inline uint64_t decodeAdvSIMDModImmType2(uint8_t Imm) {
-  uint64_t EncVal = Imm;
-  return (EncVal << 40) | (EncVal << 8);
-}
-
-// 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00
-static inline bool isAdvSIMDModImmType3(uint64_t Imm) {
-  return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
-         ((Imm & 0xff00ffffff00ffffULL) == 0);
-}
-
-static inline uint8_t encodeAdvSIMDModImmType3(uint64_t Imm) {
-  return (Imm & 0xff0000ULL) >> 16;
-}
-
-static inline uint64_t decodeAdvSIMDModImmType3(uint8_t Imm) {
-  uint64_t EncVal = Imm;
-  return (EncVal << 48) | (EncVal << 16);
-}
-
-// abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00
-static inline bool isAdvSIMDModImmType4(uint64_t Imm) {
-  return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
-         ((Imm & 0x00ffffff00ffffffULL) == 0);
-}
-
-static inline uint8_t encodeAdvSIMDModImmType4(uint64_t Imm) {
-  return (Imm & 0xff000000ULL) >> 24;
-}
-
-static inline uint64_t decodeAdvSIMDModImmType4(uint8_t Imm) {
-  uint64_t EncVal = Imm;
-  return (EncVal << 56) | (EncVal << 24);
-}
-
-// 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh
-static inline bool isAdvSIMDModImmType5(uint64_t Imm) {
-  return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
-         (((Imm & 0x00ff0000ULL) >> 16) == (Imm & 0x000000ffULL)) &&
-         ((Imm & 0xff00ff00ff00ff00ULL) == 0);
-}
-
-static inline uint8_t encodeAdvSIMDModImmType5(uint64_t Imm) {
-  return (Imm & 0xffULL);
-}
-
-static inline uint64_t decodeAdvSIMDModImmType5(uint8_t Imm) {
-  uint64_t EncVal = Imm;
-  return (EncVal << 48) | (EncVal << 32) | (EncVal << 16) | EncVal;
-}
-
-// abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00
-static inline bool isAdvSIMDModImmType6(uint64_t Imm) {
-  return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
-         (((Imm & 0xff000000ULL) >> 16) == (Imm & 0x0000ff00ULL)) &&
-         ((Imm & 0x00ff00ff00ff00ffULL) == 0);
-}
-
-static inline uint8_t encodeAdvSIMDModImmType6(uint64_t Imm) {
-  return (Imm & 0xff00ULL) >> 8;
-}
-
-static inline uint64_t decodeAdvSIMDModImmType6(uint8_t Imm) {
-  uint64_t EncVal = Imm;
-  return (EncVal << 56) | (EncVal << 40) | (EncVal << 24) | (EncVal << 8);
-}
-
-// 0x00 0x00 abcdefgh 0xFF 0x00 0x00 abcdefgh 0xFF
-static inline bool isAdvSIMDModImmType7(uint64_t Imm) {
-  return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
-         ((Imm & 0xffff00ffffff00ffULL) == 0x000000ff000000ffULL);
-}
-
-static inline uint8_t encodeAdvSIMDModImmType7(uint64_t Imm) {
-  return (Imm & 0xff00ULL) >> 8;
-}
-
-static inline uint64_t decodeAdvSIMDModImmType7(uint8_t Imm) {
-  uint64_t EncVal = Imm;
-  return (EncVal << 40) | (EncVal << 8) | 0x000000ff000000ffULL;
-}
-
-// 0x00 abcdefgh 0xFF 0xFF 0x00 abcdefgh 0xFF 0xFF
-static inline bool isAdvSIMDModImmType8(uint64_t Imm) {
-  return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
-         ((Imm & 0xff00ffffff00ffffULL) == 0x0000ffff0000ffffULL);
-}
-
-static inline uint64_t decodeAdvSIMDModImmType8(uint8_t Imm) {
-  uint64_t EncVal = Imm;
-  return (EncVal << 48) | (EncVal << 16) | 0x0000ffff0000ffffULL;
-}
-
-static inline uint8_t encodeAdvSIMDModImmType8(uint64_t Imm) {
-  return (Imm & 0x00ff0000ULL) >> 16;
-}
-
-// abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh
-static inline bool isAdvSIMDModImmType9(uint64_t Imm) {
-  return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
-         ((Imm >> 48) == (Imm & 0x0000ffffULL)) &&
-         ((Imm >> 56) == (Imm & 0x000000ffULL));
-}
-
-static inline uint8_t encodeAdvSIMDModImmType9(uint64_t Imm) {
-  return (Imm & 0xffULL);
-}
-
-static inline uint64_t decodeAdvSIMDModImmType9(uint8_t Imm) {
-  uint64_t EncVal = Imm;
-  EncVal |= (EncVal << 8);
-  EncVal |= (EncVal << 16);
-  EncVal |= (EncVal << 32);
-  return EncVal;
-}
-
-// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
-// cmode: 1110, op: 1
-static inline bool isAdvSIMDModImmType10(uint64_t Imm) {
-  uint64_t ByteA = Imm & 0xff00000000000000ULL;
-  uint64_t ByteB = Imm & 0x00ff000000000000ULL;
-  uint64_t ByteC = Imm & 0x0000ff0000000000ULL;
-  uint64_t ByteD = Imm & 0x000000ff00000000ULL;
-  uint64_t ByteE = Imm & 0x00000000ff000000ULL;
-  uint64_t ByteF = Imm & 0x0000000000ff0000ULL;
-  uint64_t ByteG = Imm & 0x000000000000ff00ULL;
-  uint64_t ByteH = Imm & 0x00000000000000ffULL;
-
-  return (ByteA == 0ULL || ByteA == 0xff00000000000000ULL) &&
-         (ByteB == 0ULL || ByteB == 0x00ff000000000000ULL) &&
-         (ByteC == 0ULL || ByteC == 0x0000ff0000000000ULL) &&
-         (ByteD == 0ULL || ByteD == 0x000000ff00000000ULL) &&
-         (ByteE == 0ULL || ByteE == 0x00000000ff000000ULL) &&
-         (ByteF == 0ULL || ByteF == 0x0000000000ff0000ULL) &&
-         (ByteG == 0ULL || ByteG == 0x000000000000ff00ULL) &&
-         (ByteH == 0ULL || ByteH == 0x00000000000000ffULL);
-}
-
-static inline uint8_t encodeAdvSIMDModImmType10(uint64_t Imm) {
-  uint8_t BitA = (Imm & 0xff00000000000000ULL) != 0;
-  uint8_t BitB = (Imm & 0x00ff000000000000ULL) != 0;
-  uint8_t BitC = (Imm & 0x0000ff0000000000ULL) != 0;
-  uint8_t BitD = (Imm & 0x000000ff00000000ULL) != 0;
-  uint8_t BitE = (Imm & 0x00000000ff000000ULL) != 0;
-  uint8_t BitF = (Imm & 0x0000000000ff0000ULL) != 0;
-  uint8_t BitG = (Imm & 0x000000000000ff00ULL) != 0;
-  uint8_t BitH = (Imm & 0x00000000000000ffULL) != 0;
-
-  uint8_t EncVal = BitA;
-  EncVal <<= 1;
-  EncVal |= BitB;
-  EncVal <<= 1;
-  EncVal |= BitC;
-  EncVal <<= 1;
-  EncVal |= BitD;
-  EncVal <<= 1;
-  EncVal |= BitE;
-  EncVal <<= 1;
-  EncVal |= BitF;
-  EncVal <<= 1;
-  EncVal |= BitG;
-  EncVal <<= 1;
-  EncVal |= BitH;
-  return EncVal;
-}
-
-static inline uint64_t decodeAdvSIMDModImmType10(uint8_t Imm) {
-  uint64_t EncVal = 0;
-  if (Imm & 0x80) EncVal |= 0xff00000000000000ULL;
-  if (Imm & 0x40) EncVal |= 0x00ff000000000000ULL;
-  if (Imm & 0x20) EncVal |= 0x0000ff0000000000ULL;
-  if (Imm & 0x10) EncVal |= 0x000000ff00000000ULL;
-  if (Imm & 0x08) EncVal |= 0x00000000ff000000ULL;
-  if (Imm & 0x04) EncVal |= 0x0000000000ff0000ULL;
-  if (Imm & 0x02) EncVal |= 0x000000000000ff00ULL;
-  if (Imm & 0x01) EncVal |= 0x00000000000000ffULL;
-  return EncVal;
-}
-
-// aBbbbbbc defgh000 0x00 0x00 aBbbbbbc defgh000 0x00 0x00
-static inline bool isAdvSIMDModImmType11(uint64_t Imm) {
-  uint64_t BString = (Imm & 0x7E000000ULL) >> 25;
-  return ((Imm >> 32) == (Imm & 0xffffffffULL)) &&
-         (BString == 0x1f || BString == 0x20) &&
-         ((Imm & 0x0007ffff0007ffffULL) == 0);
-}
-
-static inline uint8_t encodeAdvSIMDModImmType11(uint64_t Imm) {
-  uint8_t BitA = (Imm & 0x80000000ULL) != 0;
-  uint8_t BitB = (Imm & 0x20000000ULL) != 0;
-  uint8_t BitC = (Imm & 0x01000000ULL) != 0;
-  uint8_t BitD = (Imm & 0x00800000ULL) != 0;
-  uint8_t BitE = (Imm & 0x00400000ULL) != 0;
-  uint8_t BitF = (Imm & 0x00200000ULL) != 0;
-  uint8_t BitG = (Imm & 0x00100000ULL) != 0;
-  uint8_t BitH = (Imm & 0x00080000ULL) != 0;
-
-  uint8_t EncVal = BitA;
-  EncVal <<= 1;
-  EncVal |= BitB;
-  EncVal <<= 1;
-  EncVal |= BitC;
-  EncVal <<= 1;
-  EncVal |= BitD;
-  EncVal <<= 1;
-  EncVal |= BitE;
-  EncVal <<= 1;
-  EncVal |= BitF;
-  EncVal <<= 1;
-  EncVal |= BitG;
-  EncVal <<= 1;
-  EncVal |= BitH;
-  return EncVal;
-}
-
-static inline uint64_t decodeAdvSIMDModImmType11(uint8_t Imm) {
-  uint64_t EncVal = 0;
-  if (Imm & 0x80) EncVal |= 0x80000000ULL;
-  if (Imm & 0x40) EncVal |= 0x3e000000ULL;
-  else            EncVal |= 0x40000000ULL;
-  if (Imm & 0x20) EncVal |= 0x01000000ULL;
-  if (Imm & 0x10) EncVal |= 0x00800000ULL;
-  if (Imm & 0x08) EncVal |= 0x00400000ULL;
-  if (Imm & 0x04) EncVal |= 0x00200000ULL;
-  if (Imm & 0x02) EncVal |= 0x00100000ULL;
-  if (Imm & 0x01) EncVal |= 0x00080000ULL;
-  return (EncVal << 32) | EncVal;
-}
-
-// aBbbbbbb bbcdefgh 0x00 0x00 0x00 0x00 0x00 0x00
-static inline bool isAdvSIMDModImmType12(uint64_t Imm) {
-  uint64_t BString = (Imm & 0x7fc0000000000000ULL) >> 54;
-  return ((BString == 0xff || BString == 0x100) &&
-         ((Imm & 0x0000ffffffffffffULL) == 0));
-}
-
-static inline uint8_t encodeAdvSIMDModImmType12(uint64_t Imm) {
-  uint8_t BitA = (Imm & 0x8000000000000000ULL) != 0;
-  uint8_t BitB = (Imm & 0x0040000000000000ULL) != 0;
-  uint8_t BitC = (Imm & 0x0020000000000000ULL) != 0;
-  uint8_t BitD = (Imm & 0x0010000000000000ULL) != 0;
-  uint8_t BitE = (Imm & 0x0008000000000000ULL) != 0;
-  uint8_t BitF = (Imm & 0x0004000000000000ULL) != 0;
-  uint8_t BitG = (Imm & 0x0002000000000000ULL) != 0;
-  uint8_t BitH = (Imm & 0x0001000000000000ULL) != 0;
-
-  uint8_t EncVal = BitA;
-  EncVal <<= 1;
-  EncVal |= BitB;
-  EncVal <<= 1;
-  EncVal |= BitC;
-  EncVal <<= 1;
-  EncVal |= BitD;
-  EncVal <<= 1;
-  EncVal |= BitE;
-  EncVal <<= 1;
-  EncVal |= BitF;
-  EncVal <<= 1;
-  EncVal |= BitG;
-  EncVal <<= 1;
-  EncVal |= BitH;
-  return EncVal;
-}
-
-static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) {
-  uint64_t EncVal = 0;
-  if (Imm & 0x80) EncVal |= 0x8000000000000000ULL;
-  if (Imm & 0x40) EncVal |= 0x3fc0000000000000ULL;
-  else            EncVal |= 0x4000000000000000ULL;
-  if (Imm & 0x20) EncVal |= 0x0020000000000000ULL;
-  if (Imm & 0x10) EncVal |= 0x0010000000000000ULL;
-  if (Imm & 0x08) EncVal |= 0x0008000000000000ULL;
-  if (Imm & 0x04) EncVal |= 0x0004000000000000ULL;
-  if (Imm & 0x02) EncVal |= 0x0002000000000000ULL;
-  if (Imm & 0x01) EncVal |= 0x0001000000000000ULL;
-  return (EncVal << 32) | EncVal;
-}
-
-} // end namespace ARM64_AM
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp
deleted file mode 100644
index 26813e2..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp
+++ /dev/null
@@ -1,533 +0,0 @@
-//===-- ARM64AsmBackend.cpp - ARM64 Assembler Backend ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64.h"
-#include "ARM64RegisterInfo.h"
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
-using namespace llvm;
-
-namespace {
-
-class ARM64AsmBackend : public MCAsmBackend {
-  static const unsigned PCRelFlagVal =
-      MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel;
-
-public:
-  ARM64AsmBackend(const Target &T) : MCAsmBackend() {}
-
-  unsigned getNumFixupKinds() const { return ARM64::NumTargetFixupKinds; }
-
-  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
-    const static MCFixupKindInfo Infos[ARM64::NumTargetFixupKinds] = {
-      // This table *must* be in the order that the fixup_* kinds are defined in
-      // ARM64FixupKinds.h.
-      //
-      // Name                           Offset (bits) Size (bits)     Flags
-      { "fixup_arm64_pcrel_adr_imm21", 0, 32, PCRelFlagVal },
-      { "fixup_arm64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal },
-      { "fixup_arm64_add_imm12", 10, 12, 0 },
-      { "fixup_arm64_ldst_imm12_scale1", 10, 12, 0 },
-      { "fixup_arm64_ldst_imm12_scale2", 10, 12, 0 },
-      { "fixup_arm64_ldst_imm12_scale4", 10, 12, 0 },
-      { "fixup_arm64_ldst_imm12_scale8", 10, 12, 0 },
-      { "fixup_arm64_ldst_imm12_scale16", 10, 12, 0 },
-      { "fixup_arm64_movw", 5, 16, 0 },
-      { "fixup_arm64_pcrel_branch14", 5, 14, PCRelFlagVal },
-      { "fixup_arm64_pcrel_imm19", 5, 19, PCRelFlagVal },
-      { "fixup_arm64_pcrel_branch26", 0, 26, PCRelFlagVal },
-      { "fixup_arm64_pcrel_call26", 0, 26, PCRelFlagVal },
-      { "fixup_arm64_tlsdesc_call", 0, 0, 0 }
-    };
-
-    if (Kind < FirstTargetFixupKind)
-      return MCAsmBackend::getFixupKindInfo(Kind);
-
-    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
-           "Invalid kind!");
-    return Infos[Kind - FirstTargetFixupKind];
-  }
-
-  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-                  uint64_t Value, bool IsPCRel) const;
-
-  bool mayNeedRelaxation(const MCInst &Inst) const;
-  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
-                            const MCRelaxableFragment *DF,
-                            const MCAsmLayout &Layout) const;
-  void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
-  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
-
-  void HandleAssemblerFlag(MCAssemblerFlag Flag) {}
-
-  unsigned getPointerSize() const { return 8; }
-};
-
-} // end anonymous namespace
-
-/// \brief The number of bytes the fixup may change.
-static unsigned getFixupKindNumBytes(unsigned Kind) {
-  switch (Kind) {
-  default:
-    assert(0 && "Unknown fixup kind!");
-
-  case ARM64::fixup_arm64_tlsdesc_call:
-    return 0;
-
-  case FK_Data_1:
-    return 1;
-
-  case FK_Data_2:
-  case ARM64::fixup_arm64_movw:
-    return 2;
-
-  case ARM64::fixup_arm64_pcrel_branch14:
-  case ARM64::fixup_arm64_add_imm12:
-  case ARM64::fixup_arm64_ldst_imm12_scale1:
-  case ARM64::fixup_arm64_ldst_imm12_scale2:
-  case ARM64::fixup_arm64_ldst_imm12_scale4:
-  case ARM64::fixup_arm64_ldst_imm12_scale8:
-  case ARM64::fixup_arm64_ldst_imm12_scale16:
-  case ARM64::fixup_arm64_pcrel_imm19:
-    return 3;
-
-  case ARM64::fixup_arm64_pcrel_adr_imm21:
-  case ARM64::fixup_arm64_pcrel_adrp_imm21:
-  case ARM64::fixup_arm64_pcrel_branch26:
-  case ARM64::fixup_arm64_pcrel_call26:
-  case FK_Data_4:
-    return 4;
-
-  case FK_Data_8:
-    return 8;
-  }
-}
-
-static unsigned AdrImmBits(unsigned Value) {
-  unsigned lo2 = Value & 0x3;
-  unsigned hi19 = (Value & 0x1ffffc) >> 2;
-  return (hi19 << 5) | (lo2 << 29);
-}
-
-static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
-  int64_t SignedValue = static_cast<int64_t>(Value);
-  switch (Kind) {
-  default:
-    assert(false && "Unknown fixup kind!");
-  case ARM64::fixup_arm64_pcrel_adr_imm21:
-    if (SignedValue > 2097151 || SignedValue < -2097152)
-      report_fatal_error("fixup value out of range");
-    return AdrImmBits(Value & 0x1fffffULL);
-  case ARM64::fixup_arm64_pcrel_adrp_imm21:
-    return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
-  case ARM64::fixup_arm64_pcrel_imm19:
-    // Signed 21-bit immediate
-    if (SignedValue > 2097151 || SignedValue < -2097152)
-      report_fatal_error("fixup value out of range");
-    // Low two bits are not encoded.
-    return (Value >> 2) & 0x7ffff;
-  case ARM64::fixup_arm64_add_imm12:
-  case ARM64::fixup_arm64_ldst_imm12_scale1:
-    // Unsigned 12-bit immediate
-    if (Value >= 0x1000)
-      report_fatal_error("invalid imm12 fixup value");
-    return Value;
-  case ARM64::fixup_arm64_ldst_imm12_scale2:
-    // Unsigned 12-bit immediate which gets multiplied by 2
-    if (Value & 1 || Value >= 0x2000)
-      report_fatal_error("invalid imm12 fixup value");
-    return Value >> 1;
-  case ARM64::fixup_arm64_ldst_imm12_scale4:
-    // Unsigned 12-bit immediate which gets multiplied by 4
-    if (Value & 3 || Value >= 0x4000)
-      report_fatal_error("invalid imm12 fixup value");
-    return Value >> 2;
-  case ARM64::fixup_arm64_ldst_imm12_scale8:
-    // Unsigned 12-bit immediate which gets multiplied by 8
-    if (Value & 7 || Value >= 0x8000)
-      report_fatal_error("invalid imm12 fixup value");
-    return Value >> 3;
-  case ARM64::fixup_arm64_ldst_imm12_scale16:
-    // Unsigned 12-bit immediate which gets multiplied by 16
-    if (Value & 15 || Value >= 0x10000)
-      report_fatal_error("invalid imm12 fixup value");
-    return Value >> 4;
-  case ARM64::fixup_arm64_movw:
-    report_fatal_error("no resolvable MOVZ/MOVK fixups supported yet");
-    return Value;
-  case ARM64::fixup_arm64_pcrel_branch14:
-    // Signed 16-bit immediate
-    if (SignedValue > 32767 || SignedValue < -32768)
-      report_fatal_error("fixup value out of range");
-    // Low two bits are not encoded (4-byte alignment assumed).
-    if (Value & 0x3)
-      report_fatal_error("fixup not sufficiently aligned");
-    return (Value >> 2) & 0x3fff;
-  case ARM64::fixup_arm64_pcrel_branch26:
-  case ARM64::fixup_arm64_pcrel_call26:
-    // Signed 28-bit immediate
-    if (SignedValue > 134217727 || SignedValue < -134217728)
-      report_fatal_error("fixup value out of range");
-    // Low two bits are not encoded (4-byte alignment assumed).
-    if (Value & 0x3)
-      report_fatal_error("fixup not sufficiently aligned");
-    return (Value >> 2) & 0x3ffffff;
-  case FK_Data_1:
-  case FK_Data_2:
-  case FK_Data_4:
-  case FK_Data_8:
-    return Value;
-  }
-}
-
-void ARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
-                                 unsigned DataSize, uint64_t Value,
-                                 bool IsPCRel) const {
-  unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
-  if (!Value)
-    return; // Doesn't change encoding.
-  MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
-  // Apply any target-specific value adjustments.
-  Value = adjustFixupValue(Fixup.getKind(), Value);
-
-  // Shift the value into position.
-  Value <<= Info.TargetOffset;
-
-  unsigned Offset = Fixup.getOffset();
-  assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
-
-  // For each byte of the fragment that the fixup touches, mask in the
-  // bits from the fixup value.
-  for (unsigned i = 0; i != NumBytes; ++i)
-    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
-}
-
-bool ARM64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
-  return false;
-}
-
-bool ARM64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
-                                           const MCRelaxableFragment *DF,
-                                           const MCAsmLayout &Layout) const {
-  // FIXME:  This isn't correct for ARM64. Just moving the "generic" logic
-  // into the targets for now.
-  //
-  // Relax if the value is too big for a (signed) i8.
-  return int64_t(Value) != int64_t(int8_t(Value));
-}
-
-void ARM64AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
-  assert(false && "ARM64AsmBackend::relaxInstruction() unimplemented");
-}
-
-bool ARM64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
-  // If the count is not 4-byte aligned, we must be writing data into the text
-  // section (otherwise we have unaligned instructions, and thus have far
-  // bigger problems), so just write zeros instead.
-  if ((Count & 3) != 0) {
-    for (uint64_t i = 0, e = (Count & 3); i != e; ++i)
-      OW->Write8(0);
-  }
-
-  // We are properly aligned, so write NOPs as requested.
-  Count /= 4;
-  for (uint64_t i = 0; i != Count; ++i)
-    OW->Write32(0xd503201f);
-  return true;
-}
-
-namespace {
-
-namespace CU {
-
-/// \brief Compact unwind encoding values.
-enum CompactUnwindEncodings {
-  /// \brief A "frameless" leaf function, where no non-volatile registers are
-  /// saved. The return remains in LR throughout the function.
-  UNWIND_ARM64_MODE_FRAMELESS = 0x02000000,
-
-  /// \brief No compact unwind encoding available. Instead the low 23-bits of
-  /// the compact unwind encoding is the offset of the DWARF FDE in the
-  /// __eh_frame section. This mode is never used in object files. It is only
-  /// generated by the linker in final linked images, which have only DWARF info
-  /// for a function.
-  UNWIND_ARM64_MODE_DWARF = 0x03000000,
-
-  /// \brief This is a standard arm64 prologue where FP/LR are immediately
-  /// pushed on the stack, then SP is copied to FP. If there are any
-  /// non-volatile register saved, they are copied into the stack fame in pairs
-  /// in a contiguous ranger right below the saved FP/LR pair. Any subset of the
-  /// five X pairs and four D pairs can be saved, but the memory layout must be
-  /// in register number order.
-  UNWIND_ARM64_MODE_FRAME = 0x04000000,
-
-  /// \brief Frame register pair encodings.
-  UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001,
-  UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002,
-  UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004,
-  UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008,
-  UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010,
-  UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100,
-  UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200,
-  UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400,
-  UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800
-};
-
-} // end CU namespace
-
-// FIXME: This should be in a separate file.
-class DarwinARM64AsmBackend : public ARM64AsmBackend {
-  const MCRegisterInfo &MRI;
-
-  /// \brief Encode compact unwind stack adjustment for frameless functions.
-  /// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h.
-  /// The stack size always needs to be 16 byte aligned.
-  uint32_t encodeStackAdjustment(uint32_t StackSize) const {
-    return (StackSize / 16) << 12;
-  }
-
-public:
-  DarwinARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
-      : ARM64AsmBackend(T), MRI(MRI) {}
-
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createARM64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
-                                       MachO::CPU_SUBTYPE_ARM64_ALL);
-  }
-
-  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
-    // Any section for which the linker breaks things into atoms needs to
-    // preserve symbols, including assembler local symbols, to identify
-    // those atoms. These sections are:
-    // Sections of type:
-    //
-    //    S_CSTRING_LITERALS  (e.g. __cstring)
-    //    S_LITERAL_POINTERS  (e.g.  objc selector pointers)
-    //    S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
-    //
-    // Sections named:
-    //
-    //    __TEXT,__eh_frame
-    //    __TEXT,__ustring
-    //    __DATA,__cfstring
-    //    __DATA,__objc_classrefs
-    //    __DATA,__objc_catlist
-    //
-    // FIXME: It would be better if the compiler used actual linker local
-    // symbols for each of these sections rather than preserving what
-    // are ostensibly assembler local symbols.
-    const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
-    return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
-            SMO.getType() == MachO::S_4BYTE_LITERALS ||
-            SMO.getType() == MachO::S_8BYTE_LITERALS ||
-            SMO.getType() == MachO::S_16BYTE_LITERALS ||
-            SMO.getType() == MachO::S_LITERAL_POINTERS ||
-            (SMO.getSegmentName() == "__TEXT" &&
-             (SMO.getSectionName() == "__eh_frame" ||
-              SMO.getSectionName() == "__ustring")) ||
-            (SMO.getSegmentName() == "__DATA" &&
-             (SMO.getSectionName() == "__cfstring" ||
-              SMO.getSectionName() == "__objc_classrefs" ||
-              SMO.getSectionName() == "__objc_catlist")));
-  }
-
-  /// \brief Generate the compact unwind encoding from the CFI directives.
-  virtual uint32_t
-  generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const
-      override {
-    if (Instrs.empty())
-      return CU::UNWIND_ARM64_MODE_FRAMELESS;
-
-    bool HasFP = false;
-    unsigned StackSize = 0;
-
-    uint32_t CompactUnwindEncoding = 0;
-    for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
-      const MCCFIInstruction &Inst = Instrs[i];
-
-      switch (Inst.getOperation()) {
-      default:
-        // Cannot handle this directive:  bail out.
-        return CU::UNWIND_ARM64_MODE_DWARF;
-      case MCCFIInstruction::OpDefCfa: {
-        // Defines a frame pointer.
-        assert(getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)) ==
-                   ARM64::FP &&
-               "Invalid frame pointer!");
-        assert(i + 2 < e && "Insufficient CFI instructions to define a frame!");
-
-        const MCCFIInstruction &LRPush = Instrs[++i];
-        assert(LRPush.getOperation() == MCCFIInstruction::OpOffset &&
-               "Link register not pushed!");
-        const MCCFIInstruction &FPPush = Instrs[++i];
-        assert(FPPush.getOperation() == MCCFIInstruction::OpOffset &&
-               "Frame pointer not pushed!");
-
-        unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true);
-        unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true);
-
-        LRReg = getXRegFromWReg(LRReg);
-        FPReg = getXRegFromWReg(FPReg);
-
-        assert(LRReg == ARM64::LR && FPReg == ARM64::FP &&
-               "Pushing invalid registers for frame!");
-
-        // Indicate that the function has a frame.
-        CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAME;
-        HasFP = true;
-        break;
-      }
-      case MCCFIInstruction::OpDefCfaOffset: {
-        assert(StackSize == 0 && "We already have the CFA offset!");
-        StackSize = std::abs(Inst.getOffset());
-        break;
-      }
-      case MCCFIInstruction::OpOffset: {
-        // Registers are saved in pairs. We expect there to be two consecutive
-        // `.cfi_offset' instructions with the appropriate registers specified.
-        unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true);
-        if (i + 1 == e)
-          return CU::UNWIND_ARM64_MODE_DWARF;
-
-        const MCCFIInstruction &Inst2 = Instrs[++i];
-        if (Inst2.getOperation() != MCCFIInstruction::OpOffset)
-          return CU::UNWIND_ARM64_MODE_DWARF;
-        unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true);
-
-        // N.B. The encodings must be in register number order, and the X
-        // registers before the D registers.
-
-        // X19/X20 pair = 0x00000001,
-        // X21/X22 pair = 0x00000002,
-        // X23/X24 pair = 0x00000004,
-        // X25/X26 pair = 0x00000008,
-        // X27/X28 pair = 0x00000010
-        Reg1 = getXRegFromWReg(Reg1);
-        Reg2 = getXRegFromWReg(Reg2);
-
-        if (Reg1 == ARM64::X19 && Reg2 == ARM64::X20 &&
-            (CompactUnwindEncoding & 0xF1E) == 0)
-          CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X19_X20_PAIR;
-        else if (Reg1 == ARM64::X21 && Reg2 == ARM64::X22 &&
-                 (CompactUnwindEncoding & 0xF1C) == 0)
-          CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X21_X22_PAIR;
-        else if (Reg1 == ARM64::X23 && Reg2 == ARM64::X24 &&
-                 (CompactUnwindEncoding & 0xF18) == 0)
-          CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X23_X24_PAIR;
-        else if (Reg1 == ARM64::X25 && Reg2 == ARM64::X26 &&
-                 (CompactUnwindEncoding & 0xF10) == 0)
-          CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X25_X26_PAIR;
-        else if (Reg1 == ARM64::X27 && Reg2 == ARM64::X28 &&
-                 (CompactUnwindEncoding & 0xF00) == 0)
-          CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X27_X28_PAIR;
-        else {
-          Reg1 = getDRegFromBReg(Reg1);
-          Reg2 = getDRegFromBReg(Reg2);
-
-          // D8/D9 pair   = 0x00000100,
-          // D10/D11 pair = 0x00000200,
-          // D12/D13 pair = 0x00000400,
-          // D14/D15 pair = 0x00000800
-          if (Reg1 == ARM64::D8 && Reg2 == ARM64::D9 &&
-              (CompactUnwindEncoding & 0xE00) == 0)
-            CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D8_D9_PAIR;
-          else if (Reg1 == ARM64::D10 && Reg2 == ARM64::D11 &&
-                   (CompactUnwindEncoding & 0xC00) == 0)
-            CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D10_D11_PAIR;
-          else if (Reg1 == ARM64::D12 && Reg2 == ARM64::D13 &&
-                   (CompactUnwindEncoding & 0x800) == 0)
-            CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D12_D13_PAIR;
-          else if (Reg1 == ARM64::D14 && Reg2 == ARM64::D15)
-            CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D14_D15_PAIR;
-          else
-            // A pair was pushed which we cannot handle.
-            return CU::UNWIND_ARM64_MODE_DWARF;
-        }
-
-        break;
-      }
-      }
-    }
-
-    if (!HasFP) {
-      // With compact unwind info we can only represent stack adjustments of up
-      // to 65520 bytes.
-      if (StackSize > 65520)
-        return CU::UNWIND_ARM64_MODE_DWARF;
-
-      CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAMELESS;
-      CompactUnwindEncoding |= encodeStackAdjustment(StackSize);
-    }
-
-    return CompactUnwindEncoding;
-  }
-};
-
-} // end anonymous namespace
-
-namespace {
-
-class ELFARM64AsmBackend : public ARM64AsmBackend {
-public:
-  uint8_t OSABI;
-
-  ELFARM64AsmBackend(const Target &T, uint8_t OSABI)
-      : ARM64AsmBackend(T), OSABI(OSABI) {}
-
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createARM64ELFObjectWriter(OS, OSABI);
-  }
-
-  void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                         const MCFixup &Fixup, const MCFragment *DF,
-                         const MCValue &Target, uint64_t &Value,
-                         bool &IsResolved) override;
-};
-
-void ELFARM64AsmBackend::processFixupValue(const MCAssembler &Asm,
-                                           const MCAsmLayout &Layout,
-                                           const MCFixup &Fixup,
-                                           const MCFragment *DF,
-                                           const MCValue &Target,
-                                           uint64_t &Value, bool &IsResolved) {
-  // The ADRP instruction adds some multiple of 0x1000 to the current PC &
-  // ~0xfff. This means that the required offset to reach a symbol can vary by
-  // up to one step depending on where the ADRP is in memory. For example:
-  //
-  //     ADRP x0, there
-  //  there:
-  //
-  // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
-  // we'll need that as an offset. At any other address "there" will be in the
-  // same page as the ADRP and the instruction should encode 0x0. Assuming the
-  // section isn't 0x1000-aligned, we therefore need to delegate this decision
-  // to the linker -- a relocation!
-  if ((uint32_t)Fixup.getKind() == ARM64::fixup_arm64_pcrel_adrp_imm21)
-    IsResolved = false;
-}
-}
-
-MCAsmBackend *llvm::createARM64AsmBackend(const Target &T,
-                                          const MCRegisterInfo &MRI,
-                                          StringRef TT, StringRef CPU) {
-  Triple TheTriple(TT);
-
-  if (TheTriple.isOSDarwin())
-    return new DarwinARM64AsmBackend(T, MRI);
-
-  assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
-  return new ELFARM64AsmBackend(T, TheTriple.getOS());
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h b/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h
deleted file mode 100644
index d3c2cf7..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h
+++ /dev/null
@@ -1,998 +0,0 @@
-//===-- ARM64BaseInfo.h - Top level definitions for ARM64 -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the ARM64 target useful for the compiler back-end and the MC libraries.
-// As such, it deliberately does not include references to LLVM core
-// code gen types, passes, etc..
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64BASEINFO_H
-#define ARM64BASEINFO_H
-
-#include "ARM64MCTargetDesc.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
-inline static unsigned getWRegFromXReg(unsigned Reg) {
-  switch (Reg) {
-  case ARM64::X0: return ARM64::W0;
-  case ARM64::X1: return ARM64::W1;
-  case ARM64::X2: return ARM64::W2;
-  case ARM64::X3: return ARM64::W3;
-  case ARM64::X4: return ARM64::W4;
-  case ARM64::X5: return ARM64::W5;
-  case ARM64::X6: return ARM64::W6;
-  case ARM64::X7: return ARM64::W7;
-  case ARM64::X8: return ARM64::W8;
-  case ARM64::X9: return ARM64::W9;
-  case ARM64::X10: return ARM64::W10;
-  case ARM64::X11: return ARM64::W11;
-  case ARM64::X12: return ARM64::W12;
-  case ARM64::X13: return ARM64::W13;
-  case ARM64::X14: return ARM64::W14;
-  case ARM64::X15: return ARM64::W15;
-  case ARM64::X16: return ARM64::W16;
-  case ARM64::X17: return ARM64::W17;
-  case ARM64::X18: return ARM64::W18;
-  case ARM64::X19: return ARM64::W19;
-  case ARM64::X20: return ARM64::W20;
-  case ARM64::X21: return ARM64::W21;
-  case ARM64::X22: return ARM64::W22;
-  case ARM64::X23: return ARM64::W23;
-  case ARM64::X24: return ARM64::W24;
-  case ARM64::X25: return ARM64::W25;
-  case ARM64::X26: return ARM64::W26;
-  case ARM64::X27: return ARM64::W27;
-  case ARM64::X28: return ARM64::W28;
-  case ARM64::FP: return ARM64::W29;
-  case ARM64::LR: return ARM64::W30;
-  case ARM64::SP: return ARM64::WSP;
-  case ARM64::XZR: return ARM64::WZR;
-  }
-  // For anything else, return it unchanged.
-  return Reg;
-}
-
-inline static unsigned getXRegFromWReg(unsigned Reg) {
-  switch (Reg) {
-  case ARM64::W0: return ARM64::X0;
-  case ARM64::W1: return ARM64::X1;
-  case ARM64::W2: return ARM64::X2;
-  case ARM64::W3: return ARM64::X3;
-  case ARM64::W4: return ARM64::X4;
-  case ARM64::W5: return ARM64::X5;
-  case ARM64::W6: return ARM64::X6;
-  case ARM64::W7: return ARM64::X7;
-  case ARM64::W8: return ARM64::X8;
-  case ARM64::W9: return ARM64::X9;
-  case ARM64::W10: return ARM64::X10;
-  case ARM64::W11: return ARM64::X11;
-  case ARM64::W12: return ARM64::X12;
-  case ARM64::W13: return ARM64::X13;
-  case ARM64::W14: return ARM64::X14;
-  case ARM64::W15: return ARM64::X15;
-  case ARM64::W16: return ARM64::X16;
-  case ARM64::W17: return ARM64::X17;
-  case ARM64::W18: return ARM64::X18;
-  case ARM64::W19: return ARM64::X19;
-  case ARM64::W20: return ARM64::X20;
-  case ARM64::W21: return ARM64::X21;
-  case ARM64::W22: return ARM64::X22;
-  case ARM64::W23: return ARM64::X23;
-  case ARM64::W24: return ARM64::X24;
-  case ARM64::W25: return ARM64::X25;
-  case ARM64::W26: return ARM64::X26;
-  case ARM64::W27: return ARM64::X27;
-  case ARM64::W28: return ARM64::X28;
-  case ARM64::W29: return ARM64::FP;
-  case ARM64::W30: return ARM64::LR;
-  case ARM64::WSP: return ARM64::SP;
-  case ARM64::WZR: return ARM64::XZR;
-  }
-  // For anything else, return it unchanged.
-  return Reg;
-}
-
-static inline unsigned getBRegFromDReg(unsigned Reg) {
-  switch (Reg) {
-  case ARM64::D0:  return ARM64::B0;
-  case ARM64::D1:  return ARM64::B1;
-  case ARM64::D2:  return ARM64::B2;
-  case ARM64::D3:  return ARM64::B3;
-  case ARM64::D4:  return ARM64::B4;
-  case ARM64::D5:  return ARM64::B5;
-  case ARM64::D6:  return ARM64::B6;
-  case ARM64::D7:  return ARM64::B7;
-  case ARM64::D8:  return ARM64::B8;
-  case ARM64::D9:  return ARM64::B9;
-  case ARM64::D10: return ARM64::B10;
-  case ARM64::D11: return ARM64::B11;
-  case ARM64::D12: return ARM64::B12;
-  case ARM64::D13: return ARM64::B13;
-  case ARM64::D14: return ARM64::B14;
-  case ARM64::D15: return ARM64::B15;
-  case ARM64::D16: return ARM64::B16;
-  case ARM64::D17: return ARM64::B17;
-  case ARM64::D18: return ARM64::B18;
-  case ARM64::D19: return ARM64::B19;
-  case ARM64::D20: return ARM64::B20;
-  case ARM64::D21: return ARM64::B21;
-  case ARM64::D22: return ARM64::B22;
-  case ARM64::D23: return ARM64::B23;
-  case ARM64::D24: return ARM64::B24;
-  case ARM64::D25: return ARM64::B25;
-  case ARM64::D26: return ARM64::B26;
-  case ARM64::D27: return ARM64::B27;
-  case ARM64::D28: return ARM64::B28;
-  case ARM64::D29: return ARM64::B29;
-  case ARM64::D30: return ARM64::B30;
-  case ARM64::D31: return ARM64::B31;
-  }
-  // For anything else, return it unchanged.
-  return Reg;
-}
-
-
-static inline unsigned getDRegFromBReg(unsigned Reg) {
-  switch (Reg) {
-  case ARM64::B0:  return ARM64::D0;
-  case ARM64::B1:  return ARM64::D1;
-  case ARM64::B2:  return ARM64::D2;
-  case ARM64::B3:  return ARM64::D3;
-  case ARM64::B4:  return ARM64::D4;
-  case ARM64::B5:  return ARM64::D5;
-  case ARM64::B6:  return ARM64::D6;
-  case ARM64::B7:  return ARM64::D7;
-  case ARM64::B8:  return ARM64::D8;
-  case ARM64::B9:  return ARM64::D9;
-  case ARM64::B10: return ARM64::D10;
-  case ARM64::B11: return ARM64::D11;
-  case ARM64::B12: return ARM64::D12;
-  case ARM64::B13: return ARM64::D13;
-  case ARM64::B14: return ARM64::D14;
-  case ARM64::B15: return ARM64::D15;
-  case ARM64::B16: return ARM64::D16;
-  case ARM64::B17: return ARM64::D17;
-  case ARM64::B18: return ARM64::D18;
-  case ARM64::B19: return ARM64::D19;
-  case ARM64::B20: return ARM64::D20;
-  case ARM64::B21: return ARM64::D21;
-  case ARM64::B22: return ARM64::D22;
-  case ARM64::B23: return ARM64::D23;
-  case ARM64::B24: return ARM64::D24;
-  case ARM64::B25: return ARM64::D25;
-  case ARM64::B26: return ARM64::D26;
-  case ARM64::B27: return ARM64::D27;
-  case ARM64::B28: return ARM64::D28;
-  case ARM64::B29: return ARM64::D29;
-  case ARM64::B30: return ARM64::D30;
-  case ARM64::B31: return ARM64::D31;
-  }
-  // For anything else, return it unchanged.
-  return Reg;
-}
-
-namespace ARM64CC {
-
-// The CondCodes constants map directly to the 4-bit encoding of the condition
-// field for predicated instructions.
-enum CondCode {  // Meaning (integer)          Meaning (floating-point)
-  EQ = 0x0,      // Equal                      Equal
-  NE = 0x1,      // Not equal                  Not equal, or unordered
-  CS = 0x2,      // Carry set                  >, ==, or unordered
-  CC = 0x3,      // Carry clear                Less than
-  MI = 0x4,      // Minus, negative            Less than
-  PL = 0x5,      // Plus, positive or zero     >, ==, or unordered
-  VS = 0x6,      // Overflow                   Unordered
-  VC = 0x7,      // No overflow                Not unordered
-  HI = 0x8,      // Unsigned higher            Greater than, or unordered
-  LS = 0x9,      // Unsigned lower or same     Less than or equal
-  GE = 0xa,      // Greater than or equal      Greater than or equal
-  LT = 0xb,      // Less than                  Less than, or unordered
-  GT = 0xc,      // Greater than               Greater than
-  LE = 0xd,      // Less than or equal         <, ==, or unordered
-  AL = 0xe       // Always (unconditional)     Always (unconditional)
-};
-
-inline static const char *getCondCodeName(CondCode Code) {
-  // cond<0> is ignored when cond<3:1> = 111, where 1110 is 0xe (aka AL).
-  if ((Code & AL) == AL)
-    Code = AL;
-  switch (Code) {
-  case EQ:  return "eq";
-  case NE:  return "ne";
-  case CS:  return "cs";
-  case CC:  return "cc";
-  case MI:  return "mi";
-  case PL:  return "pl";
-  case VS:  return "vs";
-  case VC:  return "vc";
-  case HI:  return "hi";
-  case LS:  return "ls";
-  case GE:  return "ge";
-  case LT:  return "lt";
-  case GT:  return "gt";
-  case LE:  return "le";
-  case AL:  return "al";
-  }
-  llvm_unreachable("Unknown condition code");
-}
-
-inline static CondCode getInvertedCondCode(CondCode Code) {
-  switch (Code) {
-  default: llvm_unreachable("Unknown condition code");
-  case EQ:  return NE;
-  case NE:  return EQ;
-  case CS:  return CC;
-  case CC:  return CS;
-  case MI:  return PL;
-  case PL:  return MI;
-  case VS:  return VC;
-  case VC:  return VS;
-  case HI:  return LS;
-  case LS:  return HI;
-  case GE:  return LT;
-  case LT:  return GE;
-  case GT:  return LE;
-  case LE:  return GT;
-  }
-}
-
-/// Given a condition code, return NZCV flags that would satisfy that condition.
-/// The flag bits are in the format expected by the ccmp instructions.
-/// Note that many different flag settings can satisfy a given condition code,
-/// this function just returns one of them.
-inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
-  // NZCV flags encoded as expected by ccmp instructions, ARMv8 ISA 5.5.7.
-  enum { N = 8, Z = 4, C = 2, V = 1 };
-  switch (Code) {
-  default: llvm_unreachable("Unknown condition code");
-  case EQ: return Z; // Z == 1
-  case NE: return 0; // Z == 0
-  case CS: return C; // C == 1
-  case CC: return 0; // C == 0
-  case MI: return N; // N == 1
-  case PL: return 0; // N == 0
-  case VS: return V; // V == 1
-  case VC: return 0; // V == 0
-  case HI: return C; // C == 1 && Z == 0
-  case LS: return 0; // C == 0 || Z == 1
-  case GE: return 0; // N == V
-  case LT: return N; // N != V
-  case GT: return 0; // Z == 0 && N == V
-  case LE: return Z; // Z == 1 || N != V
-  }
-}
-} // end namespace ARM64CC
-
-namespace ARM64SYS {
-enum BarrierOption {
-  InvalidBarrier = 0xff,
-  OSHLD = 0x1,
-  OSHST = 0x2,
-  OSH =   0x3,
-  NSHLD = 0x5,
-  NSHST = 0x6,
-  NSH =   0x7,
-  ISHLD = 0x9,
-  ISHST = 0xa,
-  ISH =   0xb,
-  LD =    0xd,
-  ST =    0xe,
-  SY =    0xf
-};
-
-inline static const char *getBarrierOptName(BarrierOption Opt) {
-  switch (Opt) {
-  default: return NULL;
-  case 0x1: return "oshld";
-  case 0x2: return "oshst";
-  case 0x3: return "osh";
-  case 0x5: return "nshld";
-  case 0x6: return "nshst";
-  case 0x7: return "nsh";
-  case 0x9: return "ishld";
-  case 0xa: return "ishst";
-  case 0xb: return "ish";
-  case 0xd: return "ld";
-  case 0xe: return "st";
-  case 0xf: return "sy";
-  }
-}
-
-#define A64_SYSREG_ENC(op0,CRn,op2,CRm,op1) ((op0) << 14 | (op1) << 11 | \
-                                             (CRn) << 7  | (CRm) << 3 | (op2))
-enum SystemRegister {
-  InvalidSystemReg = 0,
-  // Table in section 3.10.3
-  SPSR_EL1  = 0xc200,
-  SPSR_svc  = SPSR_EL1,
-  ELR_EL1   = 0xc201,
-  SP_EL0    = 0xc208,
-  SPSel     = 0xc210,
-  CurrentEL = 0xc212,
-  DAIF      = 0xda11,
-  NZCV      = 0xda10,
-  FPCR      = 0xda20,
-  FPSR      = 0xda21,
-  DSPSR     = 0xda28,
-  DLR       = 0xda29,
-  SPSR_EL2  = 0xe200,
-  SPSR_hyp  = SPSR_EL2,
-  ELR_EL2   = 0xe201,
-  SP_EL1    = 0xe208,
-  SPSR_irq  = 0xe218,
-  SPSR_abt  = 0xe219,
-  SPSR_und  = 0xe21a,
-  SPSR_fiq  = 0xe21b,
-  SPSR_EL3  = 0xf200,
-  ELR_EL3   = 0xf201,
-  SP_EL2    = 0xf208,
-
-
-  // Table in section 3.10.8
-  MIDR_EL1 = 0xc000,
-  CTR_EL0 = 0xd801,
-  MPIDR_EL1 = 0xc005,
-  ECOIDR_EL1 = 0xc006,
-  DCZID_EL0 = 0xd807,
-  MVFR0_EL1 = 0xc018,
-  MVFR1_EL1 = 0xc019,
-  ID_AA64PFR0_EL1 = 0xc020,
-  ID_AA64PFR1_EL1 = 0xc021,
-  ID_AA64DFR0_EL1 = 0xc028,
-  ID_AA64DFR1_EL1 = 0xc029,
-  ID_AA64ISAR0_EL1 = 0xc030,
-  ID_AA64ISAR1_EL1 = 0xc031,
-  ID_AA64MMFR0_EL1 = 0xc038,
-  ID_AA64MMFR1_EL1 = 0xc039,
-  CCSIDR_EL1 = 0xc800,
-  CLIDR_EL1 = 0xc801,
-  AIDR_EL1 = 0xc807,
-  CSSELR_EL1 = 0xd000,
-  VPIDR_EL2 = 0xe000,
-  VMPIDR_EL2 = 0xe005,
-  SCTLR_EL1 = 0xc080,
-  SCTLR_EL2 = 0xe080,
-  SCTLR_EL3 = 0xf080,
-  ACTLR_EL1 = 0xc081,
-  ACTLR_EL2 = 0xe081,
-  ACTLR_EL3 = 0xf081,
-  CPACR_EL1 = 0xc082,
-  CPTR_EL2 = 0xe08a,
-  CPTR_EL3 = 0xf08a,
-  SCR_EL3 = 0xf088,
-  HCR_EL2 = 0xe088,
-  MDCR_EL2 = 0xe089,
-  MDCR_EL3 = 0xf099,
-  HSTR_EL2 = 0xe08b,
-  HACR_EL2 = 0xe08f,
-  TTBR0_EL1 = 0xc100,
-  TTBR1_EL1 = 0xc101,
-  TTBR0_EL2 = 0xe100,
-  TTBR0_EL3 = 0xf100,
-  VTTBR_EL2 = 0xe108,
-  TCR_EL1 = 0xc102,
-  TCR_EL2 = 0xe102,
-  TCR_EL3 = 0xf102,
-  VTCR_EL2 = 0xe10a,
-  ADFSR_EL1 = 0xc288,
-  AIFSR_EL1 = 0xc289,
-  ADFSR_EL2 = 0xe288,
-  AIFSR_EL2 = 0xe289,
-  ADFSR_EL3 = 0xf288,
-  AIFSR_EL3 = 0xf289,
-  ESR_EL1 = 0xc290,
-  ESR_EL2 = 0xe290,
-  ESR_EL3 = 0xf290,
-  FAR_EL1 = 0xc300,
-  FAR_EL2 = 0xe300,
-  FAR_EL3 = 0xf300,
-  HPFAR_EL2 = 0xe304,
-  PAR_EL1 = 0xc3a0,
-  MAIR_EL1 = 0xc510,
-  MAIR_EL2 = 0xe510,
-  MAIR_EL3 = 0xf510,
-  AMAIR_EL1 = 0xc518,
-  AMAIR_EL2 = 0xe518,
-  AMAIR_EL3 = 0xf518,
-  VBAR_EL1 = 0xc600,
-  VBAR_EL2 = 0xe600,
-  VBAR_EL3 = 0xf600,
-  RVBAR_EL1 = 0xc601,
-  RVBAR_EL2 = 0xe601,
-  RVBAR_EL3 = 0xf601,
-  ISR_EL1 = 0xc608,
-  CONTEXTIDR_EL1 = 0xc681,
-  TPIDR_EL0 = 0xde82,
-  TPIDRRO_EL0 = 0xde83,
-  TPIDR_EL1 = 0xc684,
-  TPIDR_EL2 = 0xe682,
-  TPIDR_EL3 = 0xf682,
-  TEECR32_EL1 = 0x9000,
-  CNTFRQ_EL0 = 0xdf00,
-  CNTPCT_EL0 = 0xdf01,
-  CNTVCT_EL0 = 0xdf02,
-  CNTVOFF_EL2 = 0xe703,
-  CNTKCTL_EL1 = 0xc708,
-  CNTHCTL_EL2 = 0xe708,
-  CNTP_TVAL_EL0 = 0xdf10,
-  CNTP_CTL_EL0 = 0xdf11,
-  CNTP_CVAL_EL0 = 0xdf12,
-  CNTV_TVAL_EL0 = 0xdf18,
-  CNTV_CTL_EL0 = 0xdf19,
-  CNTV_CVAL_EL0 = 0xdf1a,
-  CNTHP_TVAL_EL2 = 0xe710,
-  CNTHP_CTL_EL2 = 0xe711,
-  CNTHP_CVAL_EL2 = 0xe712,
-  CNTPS_TVAL_EL1 = 0xff10,
-  CNTPS_CTL_EL1 = 0xff11,
-  CNTPS_CVAL_EL1= 0xff12,
-
-  PMEVCNTR0_EL0  = 0xdf40,
-  PMEVCNTR1_EL0  = 0xdf41,
-  PMEVCNTR2_EL0  = 0xdf42,
-  PMEVCNTR3_EL0  = 0xdf43,
-  PMEVCNTR4_EL0  = 0xdf44,
-  PMEVCNTR5_EL0  = 0xdf45,
-  PMEVCNTR6_EL0  = 0xdf46,
-  PMEVCNTR7_EL0  = 0xdf47,
-  PMEVCNTR8_EL0  = 0xdf48,
-  PMEVCNTR9_EL0  = 0xdf49,
-  PMEVCNTR10_EL0 = 0xdf4a,
-  PMEVCNTR11_EL0 = 0xdf4b,
-  PMEVCNTR12_EL0 = 0xdf4c,
-  PMEVCNTR13_EL0 = 0xdf4d,
-  PMEVCNTR14_EL0 = 0xdf4e,
-  PMEVCNTR15_EL0 = 0xdf4f,
-  PMEVCNTR16_EL0 = 0xdf50,
-  PMEVCNTR17_EL0 = 0xdf51,
-  PMEVCNTR18_EL0 = 0xdf52,
-  PMEVCNTR19_EL0 = 0xdf53,
-  PMEVCNTR20_EL0 = 0xdf54,
-  PMEVCNTR21_EL0 = 0xdf55,
-  PMEVCNTR22_EL0 = 0xdf56,
-  PMEVCNTR23_EL0 = 0xdf57,
-  PMEVCNTR24_EL0 = 0xdf58,
-  PMEVCNTR25_EL0 = 0xdf59,
-  PMEVCNTR26_EL0 = 0xdf5a,
-  PMEVCNTR27_EL0 = 0xdf5b,
-  PMEVCNTR28_EL0 = 0xdf5c,
-  PMEVCNTR29_EL0 = 0xdf5d,
-  PMEVCNTR30_EL0 = 0xdf5e,
-
-  PMEVTYPER0_EL0  = 0xdf60,
-  PMEVTYPER1_EL0  = 0xdf61,
-  PMEVTYPER2_EL0  = 0xdf62,
-  PMEVTYPER3_EL0  = 0xdf63,
-  PMEVTYPER4_EL0  = 0xdf64,
-  PMEVTYPER5_EL0  = 0xdf65,
-  PMEVTYPER6_EL0  = 0xdf66,
-  PMEVTYPER7_EL0  = 0xdf67,
-  PMEVTYPER8_EL0  = 0xdf68,
-  PMEVTYPER9_EL0  = 0xdf69,
-  PMEVTYPER10_EL0 = 0xdf6a,
-  PMEVTYPER11_EL0 = 0xdf6b,
-  PMEVTYPER12_EL0 = 0xdf6c,
-  PMEVTYPER13_EL0 = 0xdf6d,
-  PMEVTYPER14_EL0 = 0xdf6e,
-  PMEVTYPER15_EL0 = 0xdf6f,
-  PMEVTYPER16_EL0 = 0xdf70,
-  PMEVTYPER17_EL0 = 0xdf71,
-  PMEVTYPER18_EL0 = 0xdf72,
-  PMEVTYPER19_EL0 = 0xdf73,
-  PMEVTYPER20_EL0 = 0xdf74,
-  PMEVTYPER21_EL0 = 0xdf75,
-  PMEVTYPER22_EL0 = 0xdf76,
-  PMEVTYPER23_EL0 = 0xdf77,
-  PMEVTYPER24_EL0 = 0xdf78,
-  PMEVTYPER25_EL0 = 0xdf79,
-  PMEVTYPER26_EL0 = 0xdf7a,
-  PMEVTYPER27_EL0 = 0xdf7b,
-  PMEVTYPER28_EL0 = 0xdf7c,
-  PMEVTYPER29_EL0 = 0xdf7d,
-  PMEVTYPER30_EL0 = 0xdf7e,
-
-  PMCCFILTR_EL0  = 0xdf7f,
-
-  RMR_EL3 = 0xf602,
-  RMR_EL2 = 0xd602,
-  RMR_EL1 = 0xce02,
-
-  // Debug Architecture 5.3, Table 17.
-  MDCCSR_EL0   = A64_SYSREG_ENC(2, 0, 0, 1, 3),
-  MDCCINT_EL1  = A64_SYSREG_ENC(2, 0, 0, 2, 0),
-  DBGDTR_EL0   = A64_SYSREG_ENC(2, 0, 0, 4, 3),
-  DBGDTRRX_EL0 = A64_SYSREG_ENC(2, 0, 0, 5, 3),
-  DBGDTRTX_EL0 = DBGDTRRX_EL0,
-  DBGVCR32_EL2 = A64_SYSREG_ENC(2, 0, 0, 7, 4),
-  OSDTRRX_EL1  = A64_SYSREG_ENC(2, 0, 2, 0, 0),
-  MDSCR_EL1    = A64_SYSREG_ENC(2, 0, 2, 2, 0),
-  OSDTRTX_EL1  = A64_SYSREG_ENC(2, 0, 2, 3, 0),
-  OSECCR_EL11  = A64_SYSREG_ENC(2, 0, 2, 6, 0),
-
-  DBGBVR0_EL1  = A64_SYSREG_ENC(2, 0, 4, 0, 0),
-  DBGBVR1_EL1  = A64_SYSREG_ENC(2, 0, 4, 1, 0),
-  DBGBVR2_EL1  = A64_SYSREG_ENC(2, 0, 4, 2, 0),
-  DBGBVR3_EL1  = A64_SYSREG_ENC(2, 0, 4, 3, 0),
-  DBGBVR4_EL1  = A64_SYSREG_ENC(2, 0, 4, 4, 0),
-  DBGBVR5_EL1  = A64_SYSREG_ENC(2, 0, 4, 5, 0),
-  DBGBVR6_EL1  = A64_SYSREG_ENC(2, 0, 4, 6, 0),
-  DBGBVR7_EL1  = A64_SYSREG_ENC(2, 0, 4, 7, 0),
-  DBGBVR8_EL1  = A64_SYSREG_ENC(2, 0, 4, 8, 0),
-  DBGBVR9_EL1  = A64_SYSREG_ENC(2, 0, 4, 9, 0),
-  DBGBVR10_EL1 = A64_SYSREG_ENC(2, 0, 4, 10, 0),
-  DBGBVR11_EL1 = A64_SYSREG_ENC(2, 0, 4, 11, 0),
-  DBGBVR12_EL1 = A64_SYSREG_ENC(2, 0, 4, 12, 0),
-  DBGBVR13_EL1 = A64_SYSREG_ENC(2, 0, 4, 13, 0),
-  DBGBVR14_EL1 = A64_SYSREG_ENC(2, 0, 4, 14, 0),
-  DBGBVR15_EL1 = A64_SYSREG_ENC(2, 0, 4, 15, 0),
-
-  DBGBCR0_EL1  = A64_SYSREG_ENC(2, 0, 5, 0, 0),
-  DBGBCR1_EL1  = A64_SYSREG_ENC(2, 0, 5, 1, 0),
-  DBGBCR2_EL1  = A64_SYSREG_ENC(2, 0, 5, 2, 0),
-  DBGBCR3_EL1  = A64_SYSREG_ENC(2, 0, 5, 3, 0),
-  DBGBCR4_EL1  = A64_SYSREG_ENC(2, 0, 5, 4, 0),
-  DBGBCR5_EL1  = A64_SYSREG_ENC(2, 0, 5, 5, 0),
-  DBGBCR6_EL1  = A64_SYSREG_ENC(2, 0, 5, 6, 0),
-  DBGBCR7_EL1  = A64_SYSREG_ENC(2, 0, 5, 7, 0),
-  DBGBCR8_EL1  = A64_SYSREG_ENC(2, 0, 5, 8, 0),
-  DBGBCR9_EL1  = A64_SYSREG_ENC(2, 0, 5, 9, 0),
-  DBGBCR10_EL1 = A64_SYSREG_ENC(2, 0, 5, 10, 0),
-  DBGBCR11_EL1 = A64_SYSREG_ENC(2, 0, 5, 11, 0),
-  DBGBCR12_EL1 = A64_SYSREG_ENC(2, 0, 5, 12, 0),
-  DBGBCR13_EL1 = A64_SYSREG_ENC(2, 0, 5, 13, 0),
-  DBGBCR14_EL1 = A64_SYSREG_ENC(2, 0, 5, 14, 0),
-  DBGBCR15_EL1 = A64_SYSREG_ENC(2, 0, 5, 15, 0),
-
-  DBGWVR0_EL1  = A64_SYSREG_ENC(2, 0, 6, 0, 0),
-  DBGWVR1_EL1  = A64_SYSREG_ENC(2, 0, 6, 1, 0),
-  DBGWVR2_EL1  = A64_SYSREG_ENC(2, 0, 6, 2, 0),
-  DBGWVR3_EL1  = A64_SYSREG_ENC(2, 0, 6, 3, 0),
-  DBGWVR4_EL1  = A64_SYSREG_ENC(2, 0, 6, 4, 0),
-  DBGWVR5_EL1  = A64_SYSREG_ENC(2, 0, 6, 5, 0),
-  DBGWVR6_EL1  = A64_SYSREG_ENC(2, 0, 6, 6, 0),
-  DBGWVR7_EL1  = A64_SYSREG_ENC(2, 0, 6, 7, 0),
-  DBGWVR8_EL1  = A64_SYSREG_ENC(2, 0, 6, 8, 0),
-  DBGWVR9_EL1  = A64_SYSREG_ENC(2, 0, 6, 9, 0),
-  DBGWVR10_EL1 = A64_SYSREG_ENC(2, 0, 6, 10, 0),
-  DBGWVR11_EL1 = A64_SYSREG_ENC(2, 0, 6, 11, 0),
-  DBGWVR12_EL1 = A64_SYSREG_ENC(2, 0, 6, 12, 0),
-  DBGWVR13_EL1 = A64_SYSREG_ENC(2, 0, 6, 13, 0),
-  DBGWVR14_EL1 = A64_SYSREG_ENC(2, 0, 6, 14, 0),
-  DBGWVR15_EL1 = A64_SYSREG_ENC(2, 0, 6, 15, 0),
-
-  DBGWCR0_EL1  = A64_SYSREG_ENC(2, 0, 7, 0, 0),
-  DBGWCR1_EL1  = A64_SYSREG_ENC(2, 0, 7, 1, 0),
-  DBGWCR2_EL1  = A64_SYSREG_ENC(2, 0, 7, 2, 0),
-  DBGWCR3_EL1  = A64_SYSREG_ENC(2, 0, 7, 3, 0),
-  DBGWCR4_EL1  = A64_SYSREG_ENC(2, 0, 7, 4, 0),
-  DBGWCR5_EL1  = A64_SYSREG_ENC(2, 0, 7, 5, 0),
-  DBGWCR6_EL1  = A64_SYSREG_ENC(2, 0, 7, 6, 0),
-  DBGWCR7_EL1  = A64_SYSREG_ENC(2, 0, 7, 7, 0),
-  DBGWCR8_EL1  = A64_SYSREG_ENC(2, 0, 7, 8, 0),
-  DBGWCR9_EL1  = A64_SYSREG_ENC(2, 0, 7, 9, 0),
-  DBGWCR10_EL1 = A64_SYSREG_ENC(2, 0, 7, 10, 0),
-  DBGWCR11_EL1 = A64_SYSREG_ENC(2, 0, 7, 11, 0),
-  DBGWCR12_EL1 = A64_SYSREG_ENC(2, 0, 7, 12, 0),
-  DBGWCR13_EL1 = A64_SYSREG_ENC(2, 0, 7, 13, 0),
-  DBGWCR14_EL1 = A64_SYSREG_ENC(2, 0, 7, 14, 0),
-  DBGWCR15_EL1 = A64_SYSREG_ENC(2, 0, 7, 15, 0),
-
-  MDRAR_EL1    = A64_SYSREG_ENC(2, 1, 0, 0, 0),
-  OSLAR_EL1    = A64_SYSREG_ENC(2, 1, 4, 0, 0),
-  OSLSR_EL1    = A64_SYSREG_ENC(2, 1, 4, 1, 0),
-  OSDLR_EL1    = A64_SYSREG_ENC(2, 1, 4, 3, 0),
-  DBGPRCR_EL1  = A64_SYSREG_ENC(2, 1, 4, 4, 0),
-
-  DBGCLAIMSET_EL1   = A64_SYSREG_ENC(2, 7, 6, 8, 0),
-  DBGCLAIMCLR_EL1   = A64_SYSREG_ENC(2, 7, 6, 9, 0),
-  DBGAUTHSTATUS_EL1 = A64_SYSREG_ENC(2, 7, 6, 14, 0),
-
-  DBGDEVID2    = A64_SYSREG_ENC(2, 7, 7, 0, 0),
-  DBGDEVID1    = A64_SYSREG_ENC(2, 7, 7, 1, 0),
-  DBGDEVID0    = A64_SYSREG_ENC(2, 7, 7, 2, 0),
-
-  // The following registers are defined to allow access from AArch64 to
-  // registers which are only used in the AArch32 architecture.
-  DACR32_EL2 = 0xe180,
-  IFSR32_EL2 = 0xe281,
-  TEEHBR32_EL1 = 0x9080,
-  SDER32_EL3 = 0xf089,
-  FPEXC32_EL2 = 0xe298,
-
-  // Cyclone specific system registers
-  CPM_IOACC_CTL_EL3 = 0xff90,
-
-  // Architectural system registers
-  ID_PFR0_EL1 = 0xc008,
-  ID_PFR1_EL1 = 0xc009,
-  ID_DFR0_EL1 = 0xc00a,
-  ID_AFR0_EL1 = 0xc00b,
-  ID_ISAR0_EL1 = 0xc010,
-  ID_ISAR1_EL1 = 0xc011,
-  ID_ISAR2_EL1 = 0xc012,
-  ID_ISAR3_EL1 = 0xc013,
-  ID_ISAR4_EL1 = 0xc014,
-  ID_ISAR5_EL1 = 0xc015,
-  AFSR1_EL1 = 0xc289, // note same as old AIFSR_EL1
-  AFSR0_EL1 = 0xc288, // note same as old ADFSR_EL1
-  REVIDR_EL1 = 0xc006 // note same as old ECOIDR_EL1
-
-};
-#undef A64_SYSREG_ENC
-
-static inline const char *getSystemRegisterName(SystemRegister Reg) {
-  switch(Reg) {
-  default: return NULL; // Caller is responsible for handling invalid value.
-  case SPSR_EL1: return "SPSR_EL1";
-  case ELR_EL1: return "ELR_EL1";
-  case SP_EL0: return "SP_EL0";
-  case SPSel: return "SPSel";
-  case DAIF: return "DAIF";
-  case CurrentEL: return "CurrentEL";
-  case NZCV: return "NZCV";
-  case FPCR: return "FPCR";
-  case FPSR: return "FPSR";
-  case DSPSR: return "DSPSR";
-  case DLR: return "DLR";
-  case SPSR_EL2: return "SPSR_EL2";
-  case ELR_EL2: return "ELR_EL2";
-  case SP_EL1: return "SP_EL1";
-  case SPSR_irq: return "SPSR_irq";
-  case SPSR_abt: return "SPSR_abt";
-  case SPSR_und: return "SPSR_und";
-  case SPSR_fiq: return "SPSR_fiq";
-  case SPSR_EL3: return "SPSR_EL3";
-  case ELR_EL3: return "ELR_EL3";
-  case SP_EL2: return "SP_EL2";
-  case MIDR_EL1: return "MIDR_EL1";
-  case CTR_EL0: return "CTR_EL0";
-  case MPIDR_EL1: return "MPIDR_EL1";
-  case DCZID_EL0: return "DCZID_EL0";
-  case MVFR0_EL1: return "MVFR0_EL1";
-  case MVFR1_EL1: return "MVFR1_EL1";
-  case ID_AA64PFR0_EL1: return "ID_AA64PFR0_EL1";
-  case ID_AA64PFR1_EL1: return "ID_AA64PFR1_EL1";
-  case ID_AA64DFR0_EL1: return "ID_AA64DFR0_EL1";
-  case ID_AA64DFR1_EL1: return "ID_AA64DFR1_EL1";
-  case ID_AA64ISAR0_EL1: return "ID_AA64ISAR0_EL1";
-  case ID_AA64ISAR1_EL1: return "ID_AA64ISAR1_EL1";
-  case ID_AA64MMFR0_EL1: return "ID_AA64MMFR0_EL1";
-  case ID_AA64MMFR1_EL1: return "ID_AA64MMFR1_EL1";
-  case CCSIDR_EL1: return "CCSIDR_EL1";
-  case CLIDR_EL1: return "CLIDR_EL1";
-  case AIDR_EL1: return "AIDR_EL1";
-  case CSSELR_EL1: return "CSSELR_EL1";
-  case VPIDR_EL2: return "VPIDR_EL2";
-  case VMPIDR_EL2: return "VMPIDR_EL2";
-  case SCTLR_EL1: return "SCTLR_EL1";
-  case SCTLR_EL2: return "SCTLR_EL2";
-  case SCTLR_EL3: return "SCTLR_EL3";
-  case ACTLR_EL1: return "ACTLR_EL1";
-  case ACTLR_EL2: return "ACTLR_EL2";
-  case ACTLR_EL3: return "ACTLR_EL3";
-  case CPACR_EL1: return "CPACR_EL1";
-  case CPTR_EL2: return "CPTR_EL2";
-  case CPTR_EL3: return "CPTR_EL3";
-  case SCR_EL3: return "SCR_EL3";
-  case HCR_EL2: return "HCR_EL2";
-  case MDCR_EL2: return "MDCR_EL2";
-  case MDCR_EL3: return "MDCR_EL3";
-  case HSTR_EL2: return "HSTR_EL2";
-  case HACR_EL2: return "HACR_EL2";
-  case TTBR0_EL1: return "TTBR0_EL1";
-  case TTBR1_EL1: return "TTBR1_EL1";
-  case TTBR0_EL2: return "TTBR0_EL2";
-  case TTBR0_EL3: return "TTBR0_EL3";
-  case VTTBR_EL2: return "VTTBR_EL2";
-  case TCR_EL1: return "TCR_EL1";
-  case TCR_EL2: return "TCR_EL2";
-  case TCR_EL3: return "TCR_EL3";
-  case VTCR_EL2: return "VTCR_EL2";
-  case ADFSR_EL2: return "ADFSR_EL2";
-  case AIFSR_EL2: return "AIFSR_EL2";
-  case ADFSR_EL3: return "ADFSR_EL3";
-  case AIFSR_EL3: return "AIFSR_EL3";
-  case ESR_EL1: return "ESR_EL1";
-  case ESR_EL2: return "ESR_EL2";
-  case ESR_EL3: return "ESR_EL3";
-  case FAR_EL1: return "FAR_EL1";
-  case FAR_EL2: return "FAR_EL2";
-  case FAR_EL3: return "FAR_EL3";
-  case HPFAR_EL2: return "HPFAR_EL2";
-  case PAR_EL1: return "PAR_EL1";
-  case MAIR_EL1: return "MAIR_EL1";
-  case MAIR_EL2: return "MAIR_EL2";
-  case MAIR_EL3: return "MAIR_EL3";
-  case AMAIR_EL1: return "AMAIR_EL1";
-  case AMAIR_EL2: return "AMAIR_EL2";
-  case AMAIR_EL3: return "AMAIR_EL3";
-  case VBAR_EL1: return "VBAR_EL1";
-  case VBAR_EL2: return "VBAR_EL2";
-  case VBAR_EL3: return "VBAR_EL3";
-  case RVBAR_EL1: return "RVBAR_EL1";
-  case RVBAR_EL2: return "RVBAR_EL2";
-  case RVBAR_EL3: return "RVBAR_EL3";
-  case ISR_EL1: return "ISR_EL1";
-  case CONTEXTIDR_EL1: return "CONTEXTIDR_EL1";
-  case TPIDR_EL0: return "TPIDR_EL0";
-  case TPIDRRO_EL0: return "TPIDRRO_EL0";
-  case TPIDR_EL1: return "TPIDR_EL1";
-  case TPIDR_EL2: return "TPIDR_EL2";
-  case TPIDR_EL3: return "TPIDR_EL3";
-  case TEECR32_EL1: return "TEECR32_EL1";
-  case CNTFRQ_EL0: return "CNTFRQ_EL0";
-  case CNTPCT_EL0: return "CNTPCT_EL0";
-  case CNTVCT_EL0: return "CNTVCT_EL0";
-  case CNTVOFF_EL2: return "CNTVOFF_EL2";
-  case CNTKCTL_EL1: return "CNTKCTL_EL1";
-  case CNTHCTL_EL2: return "CNTHCTL_EL2";
-  case CNTP_TVAL_EL0: return "CNTP_TVAL_EL0";
-  case CNTP_CTL_EL0: return "CNTP_CTL_EL0";
-  case CNTP_CVAL_EL0: return "CNTP_CVAL_EL0";
-  case CNTV_TVAL_EL0: return "CNTV_TVAL_EL0";
-  case CNTV_CTL_EL0: return "CNTV_CTL_EL0";
-  case CNTV_CVAL_EL0: return "CNTV_CVAL_EL0";
-  case CNTHP_TVAL_EL2: return "CNTHP_TVAL_EL2";
-  case CNTHP_CTL_EL2: return "CNTHP_CTL_EL2";
-  case CNTHP_CVAL_EL2: return "CNTHP_CVAL_EL2";
-  case CNTPS_TVAL_EL1: return "CNTPS_TVAL_EL1";
-  case CNTPS_CTL_EL1: return "CNTPS_CTL_EL1";
-  case CNTPS_CVAL_EL1: return "CNTPS_CVAL_EL1";
-  case DACR32_EL2: return "DACR32_EL2";
-  case IFSR32_EL2: return "IFSR32_EL2";
-  case TEEHBR32_EL1: return "TEEHBR32_EL1";
-  case SDER32_EL3: return "SDER32_EL3";
-  case FPEXC32_EL2: return "FPEXC32_EL2";
-  case PMEVCNTR0_EL0: return "PMEVCNTR0_EL0";
-  case PMEVCNTR1_EL0: return "PMEVCNTR1_EL0";
-  case PMEVCNTR2_EL0: return "PMEVCNTR2_EL0";
-  case PMEVCNTR3_EL0: return "PMEVCNTR3_EL0";
-  case PMEVCNTR4_EL0: return "PMEVCNTR4_EL0";
-  case PMEVCNTR5_EL0: return "PMEVCNTR5_EL0";
-  case PMEVCNTR6_EL0: return "PMEVCNTR6_EL0";
-  case PMEVCNTR7_EL0: return "PMEVCNTR7_EL0";
-  case PMEVCNTR8_EL0: return "PMEVCNTR8_EL0";
-  case PMEVCNTR9_EL0: return "PMEVCNTR9_EL0";
-  case PMEVCNTR10_EL0: return "PMEVCNTR10_EL0";
-  case PMEVCNTR11_EL0: return "PMEVCNTR11_EL0";
-  case PMEVCNTR12_EL0: return "PMEVCNTR12_EL0";
-  case PMEVCNTR13_EL0: return "PMEVCNTR13_EL0";
-  case PMEVCNTR14_EL0: return "PMEVCNTR14_EL0";
-  case PMEVCNTR15_EL0: return "PMEVCNTR15_EL0";
-  case PMEVCNTR16_EL0: return "PMEVCNTR16_EL0";
-  case PMEVCNTR17_EL0: return "PMEVCNTR17_EL0";
-  case PMEVCNTR18_EL0: return "PMEVCNTR18_EL0";
-  case PMEVCNTR19_EL0: return "PMEVCNTR19_EL0";
-  case PMEVCNTR20_EL0: return "PMEVCNTR20_EL0";
-  case PMEVCNTR21_EL0: return "PMEVCNTR21_EL0";
-  case PMEVCNTR22_EL0: return "PMEVCNTR22_EL0";
-  case PMEVCNTR23_EL0: return "PMEVCNTR23_EL0";
-  case PMEVCNTR24_EL0: return "PMEVCNTR24_EL0";
-  case PMEVCNTR25_EL0: return "PMEVCNTR25_EL0";
-  case PMEVCNTR26_EL0: return "PMEVCNTR26_EL0";
-  case PMEVCNTR27_EL0: return "PMEVCNTR27_EL0";
-  case PMEVCNTR28_EL0: return "PMEVCNTR28_EL0";
-  case PMEVCNTR29_EL0: return "PMEVCNTR29_EL0";
-  case PMEVCNTR30_EL0: return "PMEVCNTR30_EL0";
-  case PMEVTYPER0_EL0: return "PMEVTYPER0_EL0";
-  case PMEVTYPER1_EL0: return "PMEVTYPER1_EL0";
-  case PMEVTYPER2_EL0: return "PMEVTYPER2_EL0";
-  case PMEVTYPER3_EL0: return "PMEVTYPER3_EL0";
-  case PMEVTYPER4_EL0: return "PMEVTYPER4_EL0";
-  case PMEVTYPER5_EL0: return "PMEVTYPER5_EL0";
-  case PMEVTYPER6_EL0: return "PMEVTYPER6_EL0";
-  case PMEVTYPER7_EL0: return "PMEVTYPER7_EL0";
-  case PMEVTYPER8_EL0: return "PMEVTYPER8_EL0";
-  case PMEVTYPER9_EL0: return "PMEVTYPER9_EL0";
-  case PMEVTYPER10_EL0: return "PMEVTYPER10_EL0";
-  case PMEVTYPER11_EL0: return "PMEVTYPER11_EL0";
-  case PMEVTYPER12_EL0: return "PMEVTYPER12_EL0";
-  case PMEVTYPER13_EL0: return "PMEVTYPER13_EL0";
-  case PMEVTYPER14_EL0: return "PMEVTYPER14_EL0";
-  case PMEVTYPER15_EL0: return "PMEVTYPER15_EL0";
-  case PMEVTYPER16_EL0: return "PMEVTYPER16_EL0";
-  case PMEVTYPER17_EL0: return "PMEVTYPER17_EL0";
-  case PMEVTYPER18_EL0: return "PMEVTYPER18_EL0";
-  case PMEVTYPER19_EL0: return "PMEVTYPER19_EL0";
-  case PMEVTYPER20_EL0: return "PMEVTYPER20_EL0";
-  case PMEVTYPER21_EL0: return "PMEVTYPER21_EL0";
-  case PMEVTYPER22_EL0: return "PMEVTYPER22_EL0";
-  case PMEVTYPER23_EL0: return "PMEVTYPER23_EL0";
-  case PMEVTYPER24_EL0: return "PMEVTYPER24_EL0";
-  case PMEVTYPER25_EL0: return "PMEVTYPER25_EL0";
-  case PMEVTYPER26_EL0: return "PMEVTYPER26_EL0";
-  case PMEVTYPER27_EL0: return "PMEVTYPER27_EL0";
-  case PMEVTYPER28_EL0: return "PMEVTYPER28_EL0";
-  case PMEVTYPER29_EL0: return "PMEVTYPER29_EL0";
-  case PMEVTYPER30_EL0: return "PMEVTYPER30_EL0";
-  case PMCCFILTR_EL0: return "PMCCFILTR_EL0";
-  case RMR_EL3: return "RMR_EL3";
-  case RMR_EL2: return "RMR_EL2";
-  case RMR_EL1: return "RMR_EL1";
-  case CPM_IOACC_CTL_EL3: return "CPM_IOACC_CTL_EL3";
-  case MDCCSR_EL0: return "MDCCSR_EL0";
-  case MDCCINT_EL1: return "MDCCINT_EL1";
-  case DBGDTR_EL0: return "DBGDTR_EL0";
-  case DBGDTRRX_EL0: return "DBGDTRRX_EL0";
-  case DBGVCR32_EL2: return "DBGVCR32_EL2";
-  case OSDTRRX_EL1: return "OSDTRRX_EL1";
-  case MDSCR_EL1: return "MDSCR_EL1";
-  case OSDTRTX_EL1: return "OSDTRTX_EL1";
-  case OSECCR_EL11: return "OSECCR_EL11";
-  case DBGBVR0_EL1: return "DBGBVR0_EL1";
-  case DBGBVR1_EL1: return "DBGBVR1_EL1";
-  case DBGBVR2_EL1: return "DBGBVR2_EL1";
-  case DBGBVR3_EL1: return "DBGBVR3_EL1";
-  case DBGBVR4_EL1: return "DBGBVR4_EL1";
-  case DBGBVR5_EL1: return "DBGBVR5_EL1";
-  case DBGBVR6_EL1: return "DBGBVR6_EL1";
-  case DBGBVR7_EL1: return "DBGBVR7_EL1";
-  case DBGBVR8_EL1: return "DBGBVR8_EL1";
-  case DBGBVR9_EL1: return "DBGBVR9_EL1";
-  case DBGBVR10_EL1: return "DBGBVR10_EL1";
-  case DBGBVR11_EL1: return "DBGBVR11_EL1";
-  case DBGBVR12_EL1: return "DBGBVR12_EL1";
-  case DBGBVR13_EL1: return "DBGBVR13_EL1";
-  case DBGBVR14_EL1: return "DBGBVR14_EL1";
-  case DBGBVR15_EL1: return "DBGBVR15_EL1";
-  case DBGBCR0_EL1: return "DBGBCR0_EL1";
-  case DBGBCR1_EL1: return "DBGBCR1_EL1";
-  case DBGBCR2_EL1: return "DBGBCR2_EL1";
-  case DBGBCR3_EL1: return "DBGBCR3_EL1";
-  case DBGBCR4_EL1: return "DBGBCR4_EL1";
-  case DBGBCR5_EL1: return "DBGBCR5_EL1";
-  case DBGBCR6_EL1: return "DBGBCR6_EL1";
-  case DBGBCR7_EL1: return "DBGBCR7_EL1";
-  case DBGBCR8_EL1: return "DBGBCR8_EL1";
-  case DBGBCR9_EL1: return "DBGBCR9_EL1";
-  case DBGBCR10_EL1: return "DBGBCR10_EL1";
-  case DBGBCR11_EL1: return "DBGBCR11_EL1";
-  case DBGBCR12_EL1: return "DBGBCR12_EL1";
-  case DBGBCR13_EL1: return "DBGBCR13_EL1";
-  case DBGBCR14_EL1: return "DBGBCR14_EL1";
-  case DBGBCR15_EL1: return "DBGBCR15_EL1";
-  case DBGWVR0_EL1: return "DBGWVR0_EL1";
-  case DBGWVR1_EL1: return "DBGWVR1_EL1";
-  case DBGWVR2_EL1: return "DBGWVR2_EL1";
-  case DBGWVR3_EL1: return "DBGWVR3_EL1";
-  case DBGWVR4_EL1: return "DBGWVR4_EL1";
-  case DBGWVR5_EL1: return "DBGWVR5_EL1";
-  case DBGWVR6_EL1: return "DBGWVR6_EL1";
-  case DBGWVR7_EL1: return "DBGWVR7_EL1";
-  case DBGWVR8_EL1: return "DBGWVR8_EL1";
-  case DBGWVR9_EL1: return "DBGWVR9_EL1";
-  case DBGWVR10_EL1: return "DBGWVR10_EL1";
-  case DBGWVR11_EL1: return "DBGWVR11_EL1";
-  case DBGWVR12_EL1: return "DBGWVR12_EL1";
-  case DBGWVR13_EL1: return "DBGWVR13_EL1";
-  case DBGWVR14_EL1: return "DBGWVR14_EL1";
-  case DBGWVR15_EL1: return "DBGWVR15_EL1";
-  case DBGWCR0_EL1: return "DBGWCR0_EL1";
-  case DBGWCR1_EL1: return "DBGWCR1_EL1";
-  case DBGWCR2_EL1: return "DBGWCR2_EL1";
-  case DBGWCR3_EL1: return "DBGWCR3_EL1";
-  case DBGWCR4_EL1: return "DBGWCR4_EL1";
-  case DBGWCR5_EL1: return "DBGWCR5_EL1";
-  case DBGWCR6_EL1: return "DBGWCR6_EL1";
-  case DBGWCR7_EL1: return "DBGWCR7_EL1";
-  case DBGWCR8_EL1: return "DBGWCR8_EL1";
-  case DBGWCR9_EL1: return "DBGWCR9_EL1";
-  case DBGWCR10_EL1: return "DBGWCR10_EL1";
-  case DBGWCR11_EL1: return "DBGWCR11_EL1";
-  case DBGWCR12_EL1: return "DBGWCR12_EL1";
-  case DBGWCR13_EL1: return "DBGWCR13_EL1";
-  case DBGWCR14_EL1: return "DBGWCR14_EL1";
-  case DBGWCR15_EL1: return "DBGWCR15_EL1";
-  case MDRAR_EL1: return "MDRAR_EL1";
-  case OSLAR_EL1: return "OSLAR_EL1";
-  case OSLSR_EL1: return "OSLSR_EL1";
-  case OSDLR_EL1: return "OSDLR_EL1";
-  case DBGPRCR_EL1: return "DBGPRCR_EL1";
-  case DBGCLAIMSET_EL1: return "DBGCLAIMSET_EL1";
-  case DBGCLAIMCLR_EL1: return "DBGCLAIMCLR_EL1";
-  case DBGAUTHSTATUS_EL1: return "DBGAUTHSTATUS_EL1";
-  case DBGDEVID2: return "DBGDEVID2";
-  case DBGDEVID1: return "DBGDEVID1";
-  case DBGDEVID0: return "DBGDEVID0";
-  case ID_PFR0_EL1: return "ID_PFR0_EL1";
-  case ID_PFR1_EL1: return "ID_PFR1_EL1";
-  case ID_DFR0_EL1: return "ID_DFR0_EL1";
-  case ID_AFR0_EL1: return "ID_AFR0_EL1";
-  case ID_ISAR0_EL1: return "ID_ISAR0_EL1";
-  case ID_ISAR1_EL1: return "ID_ISAR1_EL1";
-  case ID_ISAR2_EL1: return "ID_ISAR2_EL1";
-  case ID_ISAR3_EL1: return "ID_ISAR3_EL1";
-  case ID_ISAR4_EL1: return "ID_ISAR4_EL1";
-  case ID_ISAR5_EL1: return "ID_ISAR5_EL1";
-  case AFSR1_EL1: return "AFSR1_EL1";
-  case AFSR0_EL1: return "AFSR0_EL1";
-  case REVIDR_EL1: return "REVIDR_EL1";
-  }
-}
-
-enum CPSRField {
-  InvalidCPSRField = 0xff,
-  cpsr_SPSel = 0x5,
-  cpsr_DAIFSet = 0x1e,
-  cpsr_DAIFClr = 0x1f
-};
-
-static inline const char *getCPSRFieldName(CPSRField Val) {
-  switch(Val) {
-  default: assert(0 && "Invalid system register value!");
-  case cpsr_SPSel: return "SPSel";
-  case cpsr_DAIFSet: return "DAIFSet";
-  case cpsr_DAIFClr: return "DAIFClr";
-  }
-}
-
-} // end namespace ARM64SYS
-
-namespace ARM64II {
-  /// Target Operand Flag enum.
-  enum TOF {
-    //===------------------------------------------------------------------===//
-    // ARM64 Specific MachineOperand flags.
-
-    MO_NO_FLAG,
-
-    MO_FRAGMENT = 0x7,
-
-    /// MO_PAGE - A symbol operand with this flag represents the pc-relative
-    /// offset of the 4K page containing the symbol.  This is used with the
-    /// ADRP instruction.
-    MO_PAGE = 1,
-
-    /// MO_PAGEOFF - A symbol operand with this flag represents the offset of
-    /// that symbol within a 4K page.  This offset is added to the page address
-    /// to produce the complete address.
-    MO_PAGEOFF = 2,
-
-    /// MO_G3 - A symbol operand with this flag (granule 3) represents the high
-    /// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction
-    MO_G3 = 3,
-
-    /// MO_G2 - A symbol operand with this flag (granule 2) represents the bits
-    /// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction
-    MO_G2 = 4,
-
-    /// MO_G1 - A symbol operand with this flag (granule 1) represents the bits
-    /// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction
-    MO_G1 = 5,
-
-    /// MO_G0 - A symbol operand with this flag (granule 0) represents the bits
-    /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
-    MO_G0 = 6,
-
-    /// MO_GOT - This flag indicates that a symbol operand represents the
-    /// address of the GOT entry for the symbol, rather than the address of
-    /// the symbol itself.
-    MO_GOT = 8,
-
-    /// MO_NC - Indicates whether the linker is expected to check the symbol
-    /// reference for overflow. For example in an ADRP/ADD pair of relocations
-    /// the ADRP usually does check, but not the ADD.
-    MO_NC = 0x10,
-
-    /// MO_TLS - Indicates that the operand being accessed is some kind of
-    /// thread-local symbol. On Darwin, only one type of thread-local access
-    /// exists (pre linker-relaxation), but on ELF the TLSModel used for the
-    /// referee will affect interpretation.
-    MO_TLS = 0x20
-  };
-} // end namespace ARM64II
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp
deleted file mode 100644
index 1a132a1..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp
+++ /dev/null
@@ -1,237 +0,0 @@
-//===-- ARM64ELFObjectWriter.cpp - ARM64 ELF Writer -----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file handles ELF-specific object emission, converting LLVM's internal
-// fixups into the appropriate relocations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-namespace {
-class ARM64ELFObjectWriter : public MCELFObjectTargetWriter {
-public:
-  ARM64ELFObjectWriter(uint8_t OSABI);
-
-  virtual ~ARM64ELFObjectWriter();
-
-protected:
-  unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                        bool IsPCRel) const override;
-
-private:
-};
-}
-
-ARM64ELFObjectWriter::ARM64ELFObjectWriter(uint8_t OSABI)
-    : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
-                              /*HasRelocationAddend*/ true) {}
-
-ARM64ELFObjectWriter::~ARM64ELFObjectWriter() {}
-
-unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target,
-                                            const MCFixup &Fixup,
-                                            bool IsPCRel) const {
-  ARM64MCExpr::VariantKind RefKind =
-      static_cast<ARM64MCExpr::VariantKind>(Target.getRefKind());
-  ARM64MCExpr::VariantKind SymLoc = ARM64MCExpr::getSymbolLoc(RefKind);
-  bool IsNC = ARM64MCExpr::isNotChecked(RefKind);
-
-  assert((!Target.getSymA() ||
-          Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) &&
-         "Should only be expression-level modifiers here");
-
-  assert((!Target.getSymB() ||
-          Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None) &&
-         "Should only be expression-level modifiers here");
-
-  if (IsPCRel) {
-    switch ((unsigned)Fixup.getKind()) {
-    case FK_Data_2:
-      return ELF::R_AARCH64_PREL16;
-    case FK_Data_4:
-      return ELF::R_AARCH64_PREL32;
-    case FK_Data_8:
-      return ELF::R_AARCH64_PREL64;
-    case ARM64::fixup_arm64_pcrel_adr_imm21:
-      llvm_unreachable("No ELF relocations supported for ADR at the moment");
-    case ARM64::fixup_arm64_pcrel_adrp_imm21:
-      if (SymLoc == ARM64MCExpr::VK_ABS && !IsNC)
-        return ELF::R_AARCH64_ADR_PREL_PG_HI21;
-      if (SymLoc == ARM64MCExpr::VK_GOT && !IsNC)
-        return ELF::R_AARCH64_ADR_GOT_PAGE;
-      if (SymLoc == ARM64MCExpr::VK_GOTTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
-      if (SymLoc == ARM64MCExpr::VK_TLSDESC && !IsNC)
-        return ELF::R_AARCH64_TLSDESC_ADR_PAGE;
-      llvm_unreachable("invalid symbol kind for ADRP relocation");
-    case ARM64::fixup_arm64_pcrel_branch26:
-      return ELF::R_AARCH64_JUMP26;
-    case ARM64::fixup_arm64_pcrel_call26:
-      return ELF::R_AARCH64_CALL26;
-    case ARM64::fixup_arm64_pcrel_imm19:
-      return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
-    default:
-      llvm_unreachable("Unsupported pc-relative fixup kind");
-    }
-  } else {
-    switch ((unsigned)Fixup.getKind()) {
-    case FK_Data_2:
-      return ELF::R_AARCH64_ABS16;
-    case FK_Data_4:
-      return ELF::R_AARCH64_ABS32;
-    case FK_Data_8:
-      return ELF::R_AARCH64_ABS64;
-    case ARM64::fixup_arm64_add_imm12:
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
-        return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
-        return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC)
-        return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_ADD_ABS_LO12_NC;
-
-      report_fatal_error("invalid fixup for add (uimm12) instruction");
-      return 0;
-    case ARM64::fixup_arm64_ldst_imm12_scale1:
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_LDST8_ABS_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
-
-      report_fatal_error("invalid fixup for 8-bit load/store instruction");
-      return 0;
-    case ARM64::fixup_arm64_ldst_imm12_scale2:
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_LDST16_ABS_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
-
-      report_fatal_error("invalid fixup for 16-bit load/store instruction");
-      return 0;
-    case ARM64::fixup_arm64_ldst_imm12_scale4:
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_LDST32_ABS_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
-
-      report_fatal_error("invalid fixup for 32-bit load/store instruction");
-      return 0;
-    case ARM64::fixup_arm64_ldst_imm12_scale8:
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_LDST64_ABS_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_GOT && IsNC)
-        return ELF::R_AARCH64_LD64_GOT_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
-      if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_GOTTPREL && IsNC)
-        return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
-      if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC)
-        return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
-
-      report_fatal_error("invalid fixup for 64-bit load/store instruction");
-      return 0;
-    case ARM64::fixup_arm64_ldst_imm12_scale16:
-      if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_LDST128_ABS_LO12_NC;
-
-      report_fatal_error("invalid fixup for 128-bit load/store instruction");
-      return 0;
-    case ARM64::fixup_arm64_movw:
-      if (RefKind == ARM64MCExpr::VK_ABS_G3)
-        return ELF::R_AARCH64_MOVW_UABS_G3;
-      if (RefKind == ARM64MCExpr::VK_ABS_G2)
-        return ELF::R_AARCH64_MOVW_UABS_G2;
-      if (RefKind == ARM64MCExpr::VK_ABS_G2_NC)
-        return ELF::R_AARCH64_MOVW_UABS_G2_NC;
-      if (RefKind == ARM64MCExpr::VK_ABS_G1)
-        return ELF::R_AARCH64_MOVW_UABS_G1;
-      if (RefKind == ARM64MCExpr::VK_ABS_G1_NC)
-        return ELF::R_AARCH64_MOVW_UABS_G1_NC;
-      if (RefKind == ARM64MCExpr::VK_ABS_G0)
-        return ELF::R_AARCH64_MOVW_UABS_G0;
-      if (RefKind == ARM64MCExpr::VK_ABS_G0_NC)
-        return ELF::R_AARCH64_MOVW_UABS_G0_NC;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_G2)
-        return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_G1)
-        return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_G1_NC)
-        return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_G0)
-        return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
-      if (RefKind == ARM64MCExpr::VK_DTPREL_G0_NC)
-        return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
-      if (RefKind == ARM64MCExpr::VK_TPREL_G2)
-        return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
-      if (RefKind == ARM64MCExpr::VK_TPREL_G1)
-        return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
-      if (RefKind == ARM64MCExpr::VK_TPREL_G1_NC)
-        return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
-      if (RefKind == ARM64MCExpr::VK_TPREL_G0)
-        return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
-      if (RefKind == ARM64MCExpr::VK_TPREL_G0_NC)
-        return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
-      if (RefKind == ARM64MCExpr::VK_GOTTPREL_G1)
-        return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
-      if (RefKind == ARM64MCExpr::VK_GOTTPREL_G0_NC)
-        return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
-      report_fatal_error("invalid fixup for movz/movk instruction");
-      return 0;
-    case ARM64::fixup_arm64_tlsdesc_call:
-      return ELF::R_AARCH64_TLSDESC_CALL;
-    default:
-      llvm_unreachable("Unknown ELF relocation type");
-    }
-  }
-
-  llvm_unreachable("Unimplemented fixup -> relocation");
-}
-
-MCObjectWriter *llvm::createARM64ELFObjectWriter(raw_ostream &OS,
-                                                 uint8_t OSABI) {
-  MCELFObjectTargetWriter *MOTW = new ARM64ELFObjectWriter(OSABI);
-  return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp
deleted file mode 100644
index 97a3493..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===- lib/MC/ARM64ELFStreamer.cpp - ELF Object Output for ARM64 ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file assembles .s files and emits AArch64 ELF .o object files. Different
-// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit
-// regions of data and code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
-#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-
-/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
-/// the appropriate points in the object files. These symbols are defined in the
-/// AArch64 ELF ABI:
-///    infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf
-///
-/// In brief: $x or $d should be emitted at the start of each contiguous region
-/// of A64 code or data in a section. In practice, this emission does not rely
-/// on explicit assembler directives but on inherent properties of the
-/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an
-/// instruction).
-///
-/// As a result this system is orthogonal to the DataRegion infrastructure used
-/// by MachO. Beware!
-class ARM64ELFStreamer : public MCELFStreamer {
-public:
-  ARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
-                   MCCodeEmitter *Emitter)
-      : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
-        LastEMS(EMS_None) {}
-
-  ~ARM64ELFStreamer() {}
-
-  virtual void ChangeSection(const MCSection *Section,
-                             const MCExpr *Subsection) {
-    // We have to keep track of the mapping symbol state of any sections we
-    // use. Each one should start off as EMS_None, which is provided as the
-    // default constructor by DenseMap::lookup.
-    LastMappingSymbols[getPreviousSection().first] = LastEMS;
-    LastEMS = LastMappingSymbols.lookup(Section);
-
-    MCELFStreamer::ChangeSection(Section, Subsection);
-  }
-
-  /// This function is the one used to emit instruction data into the ELF
-  /// streamer. We override it to add the appropriate mapping symbol if
-  /// necessary.
-  virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) {
-    EmitA64MappingSymbol();
-    MCELFStreamer::EmitInstruction(Inst, STI);
-  }
-
-  /// This is one of the functions used to emit data into an ELF section, so the
-  /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d)
-  /// if necessary.
-  virtual void EmitBytes(StringRef Data) {
-    EmitDataMappingSymbol();
-    MCELFStreamer::EmitBytes(Data);
-  }
-
-  /// This is one of the functions used to emit data into an ELF section, so the
-  /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d)
-  /// if necessary.
-  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) {
-    EmitDataMappingSymbol();
-    MCELFStreamer::EmitValueImpl(Value, Size);
-  }
-
-private:
-  enum ElfMappingSymbol {
-    EMS_None,
-    EMS_A64,
-    EMS_Data
-  };
-
-  void EmitDataMappingSymbol() {
-    if (LastEMS == EMS_Data)
-      return;
-    EmitMappingSymbol("$d");
-    LastEMS = EMS_Data;
-  }
-
-  void EmitA64MappingSymbol() {
-    if (LastEMS == EMS_A64)
-      return;
-    EmitMappingSymbol("$x");
-    LastEMS = EMS_A64;
-  }
-
-  void EmitMappingSymbol(StringRef Name) {
-    MCSymbol *Start = getContext().CreateTempSymbol();
-    EmitLabel(Start);
-
-    MCSymbol *Symbol = getContext().GetOrCreateSymbol(
-        Name + "." + Twine(MappingSymbolCounter++));
-
-    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-    MCELF::SetType(SD, ELF::STT_NOTYPE);
-    MCELF::SetBinding(SD, ELF::STB_LOCAL);
-    SD.setExternal(false);
-    Symbol->setSection(*getCurrentSection().first);
-
-    const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
-    Symbol->setVariableValue(Value);
-  }
-
-  int64_t MappingSymbolCounter;
-
-  DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
-  ElfMappingSymbol LastEMS;
-
-  /// @}
-};
-}
-
-namespace llvm {
-MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                      raw_ostream &OS, MCCodeEmitter *Emitter,
-                                      bool RelaxAll, bool NoExecStack) {
-  ARM64ELFStreamer *S = new ARM64ELFStreamer(Context, TAB, OS, Emitter);
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
-  if (NoExecStack)
-    S->getAssembler().setNoExecStack(true);
-  return S;
-}
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h b/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h
deleted file mode 100644
index 72dadbc..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h
+++ /dev/null
@@ -1,26 +0,0 @@
-//===-- ARM64ELFStreamer.h - ELF Streamer for ARM64 -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF streamer information for the ARM64 backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_AARCH64_ELF_STREAMER_H
-#define LLVM_AARCH64_ELF_STREAMER_H
-
-#include "llvm/MC/MCELFStreamer.h"
-
-namespace llvm {
-
-MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                      raw_ostream &OS, MCCodeEmitter *Emitter,
-                                      bool RelaxAll, bool NoExecStack);
-}
-
-#endif // ARM64_ELF_STREAMER_H
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h b/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h
deleted file mode 100644
index 02eb91f..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h
+++ /dev/null
@@ -1,72 +0,0 @@
-//===-- ARM64FixupKinds.h - ARM64 Specific Fixup Entries --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ARM64FIXUPKINDS_H
-#define LLVM_ARM64FIXUPKINDS_H
-
-#include "llvm/MC/MCFixup.h"
-
-namespace llvm {
-namespace ARM64 {
-
-enum Fixups {
-  // fixup_arm64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into
-  // an ADR instruction.
-  fixup_arm64_pcrel_adr_imm21 = FirstTargetFixupKind,
-
-  // fixup_arm64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into
-  // an ADRP instruction.
-  fixup_arm64_pcrel_adrp_imm21,
-
-  // fixup_arm64_imm12 - 12-bit fixup for add/sub instructions.
-  //     No alignment adjustment. All value bits are encoded.
-  fixup_arm64_add_imm12,
-
-  // fixup_arm64_ldst_imm12_* - unsigned 12-bit fixups for load and
-  // store instructions.
-  fixup_arm64_ldst_imm12_scale1,
-  fixup_arm64_ldst_imm12_scale2,
-  fixup_arm64_ldst_imm12_scale4,
-  fixup_arm64_ldst_imm12_scale8,
-  fixup_arm64_ldst_imm12_scale16,
-
-  // FIXME: comment
-  fixup_arm64_movw,
-
-  // fixup_arm64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative
-  // immediate.
-  fixup_arm64_pcrel_branch14,
-
-  // fixup_arm64_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative
-  // immediate. Same encoding as fixup_arm64_pcrel_adrhi, except this
-  // is not used as part of a lo/hi pair and thus generates relocations
-  // directly when necessary.
-  fixup_arm64_pcrel_imm19,
-
-  // fixup_arm64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative
-  // immediate.
-  fixup_arm64_pcrel_branch26,
-
-  // fixup_arm64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative
-  // immediate. Distinguished from branch26 only on ELF.
-  fixup_arm64_pcrel_call26,
-
-  // fixup_arm64_tlsdesc_call - zero-space placeholder for the ELF
-  // R_AARCH64_TLSDESC_CALL relocation.
-  fixup_arm64_tlsdesc_call,
-
-  // Marker
-  LastTargetFixupKind,
-  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
-};
-
-} // end namespace ARM64
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp
deleted file mode 100644
index 97e0d3c..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-//===-- ARM64MCAsmInfo.cpp - ARM64 asm properties -----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the ARM64MCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/CommandLine.h"
-using namespace llvm;
-
-enum AsmWriterVariantTy {
-  Default = -1,
-  Generic = 0,
-  Apple = 1
-};
-
-static cl::opt<AsmWriterVariantTy> AsmWriterVariant(
-    "arm64-neon-syntax", cl::init(Default),
-    cl::desc("Choose style of NEON code to emit from ARM64 backend:"),
-    cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"),
-               clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"),
-               clEnumValEnd));
-
-ARM64MCAsmInfoDarwin::ARM64MCAsmInfoDarwin() {
-  // We prefer NEON instructions to be printed in the short form.
-  AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant;
-
-  PrivateGlobalPrefix = "L";
-  SeparatorString = "%%";
-  CommentString = ";";
-  PointerSize = CalleeSaveStackSlotSize = 8;
-
-  AlignmentIsInBytes = false;
-  UsesELFSectionDirectiveForBSS = true;
-  SupportsDebugInformation = true;
-  UseDataRegionDirectives = true;
-
-  ExceptionsType = ExceptionHandling::DwarfCFI;
-}
-
-const MCExpr *ARM64MCAsmInfoDarwin::getExprForPersonalitySymbol(
-    const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const {
-  // On Darwin, we can reference dwarf symbols with foo@GOT-., which
-  // is an indirect pc-relative reference. The default implementation
-  // won't reference using the GOT, so we need this target-specific
-  // version.
-  MCContext &Context = Streamer.getContext();
-  const MCExpr *Res =
-      MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context);
-  MCSymbol *PCSym = Context.CreateTempSymbol();
-  Streamer.EmitLabel(PCSym);
-  const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
-  return MCBinaryExpr::CreateSub(Res, PC, Context);
-}
-
-ARM64MCAsmInfoELF::ARM64MCAsmInfoELF() {
-  // We prefer NEON instructions to be printed in the short form.
-  AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant;
-
-  PointerSize = 8;
-
-  // ".comm align is in bytes but .align is pow-2."
-  AlignmentIsInBytes = false;
-
-  CommentString = "//";
-  PrivateGlobalPrefix = ".L";
-  Code32Directive = ".code\t32";
-
-  Data16bitsDirective = "\t.hword\t";
-  Data32bitsDirective = "\t.word\t";
-  Data64bitsDirective = "\t.xword\t";
-
-  UseDataRegionDirectives = false;
-
-  WeakRefDirective = "\t.weak\t";
-
-  HasLEB128 = true;
-  SupportsDebugInformation = true;
-
-  // Exceptions handling
-  ExceptionsType = ExceptionHandling::DwarfCFI;
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h
deleted file mode 100644
index f2d33a7..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//=====-- ARM64MCAsmInfo.h - ARM64 asm properties -----------*- C++ -*--====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the ARM64MCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64TARGETASMINFO_H
-#define ARM64TARGETASMINFO_H
-
-#include "llvm/MC/MCAsmInfoDarwin.h"
-
-namespace llvm {
-class Target;
-class StringRef;
-class MCStreamer;
-struct ARM64MCAsmInfoDarwin : public MCAsmInfoDarwin {
-  explicit ARM64MCAsmInfoDarwin();
-  virtual const MCExpr *getExprForPersonalitySymbol(const MCSymbol *Sym,
-                                                    unsigned Encoding,
-                                                    MCStreamer &Streamer) const;
-};
-
-struct ARM64MCAsmInfoELF : public MCAsmInfo {
-  explicit ARM64MCAsmInfoELF();
-};
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp
deleted file mode 100644
index 19559f8..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp
+++ /dev/null
@@ -1,563 +0,0 @@
-//===-- ARM64/ARM64MCCodeEmitter.cpp - Convert ARM64 code to machine code -===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ARM64MCCodeEmitter class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "mccodeemitter"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
-STATISTIC(MCNumFixups, "Number of MC fixups created.");
-
-namespace {
-
-class ARM64MCCodeEmitter : public MCCodeEmitter {
-  MCContext &Ctx;
-
-  ARM64MCCodeEmitter(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT
-  void operator=(const ARM64MCCodeEmitter &);     // DO NOT IMPLEMENT
-public:
-  ARM64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
-                     MCContext &ctx)
-      : Ctx(ctx) {}
-
-  ~ARM64MCCodeEmitter() {}
-
-  // getBinaryCodeForInstr - TableGen'erated function for getting the
-  // binary encoding for an instruction.
-  uint64_t getBinaryCodeForInstr(const MCInst &MI,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-
-  /// getMachineOpValue - Return binary encoding of operand. If the machine
-  /// operand requires relocation, record the relocation and return zero.
-  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
-                             SmallVectorImpl<MCFixup> &Fixups,
-                             const MCSubtargetInfo &STI) const;
-
-  /// getAMIndexed8OpValue - Return encoding info for base register
-  /// and 12-bit unsigned immediate attached to a load, store or prfm
-  /// instruction. If operand requires a relocation, record it and
-  /// return zero in that part of the encoding.
-  template <uint32_t FixupKind>
-  uint32_t getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx,
-                                SmallVectorImpl<MCFixup> &Fixups,
-                                const MCSubtargetInfo &STI) const;
-
-  /// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
-  /// target.
-  uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
-                              SmallVectorImpl<MCFixup> &Fixups,
-                              const MCSubtargetInfo &STI) const;
-
-  /// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and
-  /// the 2-bit shift field.
-  uint32_t getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
-                               SmallVectorImpl<MCFixup> &Fixups,
-                               const MCSubtargetInfo &STI) const;
-
-  /// getCondBranchTargetOpValue - Return the encoded value for a conditional
-  /// branch target.
-  uint32_t getCondBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
-                                      SmallVectorImpl<MCFixup> &Fixups,
-                                      const MCSubtargetInfo &STI) const;
-
-  /// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
-  /// branch target.
-  uint32_t getTestBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
-                                      SmallVectorImpl<MCFixup> &Fixups,
-                                      const MCSubtargetInfo &STI) const;
-
-  /// getBranchTargetOpValue - Return the encoded value for an unconditional
-  /// branch target.
-  uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
-                                  SmallVectorImpl<MCFixup> &Fixups,
-                                  const MCSubtargetInfo &STI) const;
-
-  /// getMoveWideImmOpValue - Return the encoded value for the immediate operand
-  /// of a MOVZ or MOVK instruction.
-  uint32_t getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-
-  /// getVecShifterOpValue - Return the encoded value for the vector shifter.
-  uint32_t getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
-                                SmallVectorImpl<MCFixup> &Fixups,
-                                const MCSubtargetInfo &STI) const;
-
-  /// getMoveVecShifterOpValue - Return the encoded value for the vector move
-  /// shifter (MSL).
-  uint32_t getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
-                                    SmallVectorImpl<MCFixup> &Fixups,
-                                    const MCSubtargetInfo &STI) const;
-
-  /// getFixedPointScaleOpValue - Return the encoded value for the
-  // FP-to-fixed-point scale factor.
-  uint32_t getFixedPointScaleOpValue(const MCInst &MI, unsigned OpIdx,
-                                     SmallVectorImpl<MCFixup> &Fixups,
-                                     const MCSubtargetInfo &STI) const;
-
-  uint32_t getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-  uint32_t getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-  uint32_t getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-  uint32_t getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
-                                SmallVectorImpl<MCFixup> &Fixups,
-                                const MCSubtargetInfo &STI) const;
-  uint32_t getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-  uint32_t getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-  uint32_t getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-  uint32_t getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
-                                SmallVectorImpl<MCFixup> &Fixups,
-                                const MCSubtargetInfo &STI) const;
-
-  /// getSIMDShift64OpValue - Return the encoded value for the
-  // shift-by-immediate AdvSIMD instructions.
-  uint32_t getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-
-  uint32_t getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
-                                    SmallVectorImpl<MCFixup> &Fixups,
-                                    const MCSubtargetInfo &STI) const;
-
-  uint32_t getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-
-  uint32_t getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-
-  unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue,
-                   const MCSubtargetInfo &STI) const;
-
-  void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; }
-
-  void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
-    // Output the constant in little endian byte order.
-    for (unsigned i = 0; i != Size; ++i) {
-      EmitByte(Val & 255, OS);
-      Val >>= 8;
-    }
-  }
-
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-                         SmallVectorImpl<MCFixup> &Fixups,
-                         const MCSubtargetInfo &STI) const;
-};
-
-} // end anonymous namespace
-
-MCCodeEmitter *llvm::createARM64MCCodeEmitter(const MCInstrInfo &MCII,
-                                              const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI,
-                                              MCContext &Ctx) {
-  return new ARM64MCCodeEmitter(MCII, STI, Ctx);
-}
-
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
-unsigned
-ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
-                                      SmallVectorImpl<MCFixup> &Fixups,
-                                      const MCSubtargetInfo &STI) const {
-  if (MO.isReg())
-    return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
-  else {
-    assert(MO.isImm() && "did not expect relocated expression");
-    return static_cast<unsigned>(MO.getImm());
-  }
-
-  assert(0 && "Unable to encode MCOperand!");
-  return 0;
-}
-
-template <uint32_t FixupKind>
-uint32_t
-ARM64MCCodeEmitter::getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx,
-                                         SmallVectorImpl<MCFixup> &Fixups,
-                                         const MCSubtargetInfo &STI) const {
-  unsigned BaseReg = MI.getOperand(OpIdx).getReg();
-  BaseReg = Ctx.getRegisterInfo()->getEncodingValue(BaseReg);
-
-  const MCOperand &MO = MI.getOperand(OpIdx + 1);
-  uint32_t ImmVal = 0;
-
-  if (MO.isImm())
-    ImmVal = static_cast<uint32_t>(MO.getImm());
-  else {
-    assert(MO.isExpr() && "unable to encode load/store imm operand");
-    MCFixupKind Kind = MCFixupKind(FixupKind);
-    Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
-    ++MCNumFixups;
-  }
-
-  return BaseReg | (ImmVal << 5);
-}
-
-/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
-/// target.
-uint32_t
-ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
-                                       SmallVectorImpl<MCFixup> &Fixups,
-                                       const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-
-  // If the destination is an immediate, we have nothing to do.
-  if (MO.isImm())
-    return MO.getImm();
-  assert(MO.isExpr() && "Unexpected ADR target type!");
-  const MCExpr *Expr = MO.getExpr();
-
-  MCFixupKind Kind = MI.getOpcode() == ARM64::ADR
-                         ? MCFixupKind(ARM64::fixup_arm64_pcrel_adr_imm21)
-                         : MCFixupKind(ARM64::fixup_arm64_pcrel_adrp_imm21);
-  Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
-
-  MCNumFixups += 1;
-
-  // All of the information is in the fixup.
-  return 0;
-}
-
-/// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and
-/// the 2-bit shift field.  The shift field is stored in bits 13-14 of the
-/// return value.
-uint32_t
-ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
-                                        SmallVectorImpl<MCFixup> &Fixups,
-                                        const MCSubtargetInfo &STI) const {
-  // Suboperands are [imm, shifter].
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
-  assert(ARM64_AM::getShiftType(MO1.getImm()) == ARM64_AM::LSL &&
-         "unexpected shift type for add/sub immediate");
-  unsigned ShiftVal = ARM64_AM::getShiftValue(MO1.getImm());
-  assert((ShiftVal == 0 || ShiftVal == 12) &&
-         "unexpected shift value for add/sub immediate");
-  if (MO.isImm())
-    return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << 12));
-  assert(MO.isExpr() && "Unable to encode MCOperand!");
-  const MCExpr *Expr = MO.getExpr();
-  assert(ShiftVal == 0 && "shift not allowed on add/sub immediate with fixup");
-
-  // Encode the 12 bits of the fixup.
-  MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_add_imm12);
-  Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
-
-  ++MCNumFixups;
-
-  return 0;
-}
-
-/// getCondBranchTargetOpValue - Return the encoded value for a conditional
-/// branch target.
-uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue(
-    const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
-    const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-
-  // If the destination is an immediate, we have nothing to do.
-  if (MO.isImm())
-    return MO.getImm();
-  assert(MO.isExpr() && "Unexpected target type!");
-
-  MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_imm19);
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
-
-  ++MCNumFixups;
-
-  // All of the information is in the fixup.
-  return 0;
-}
-
-uint32_t
-ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-
-  if (MO.isImm())
-    return MO.getImm();
-  assert(MO.isExpr() && "Unexpected movz/movk immediate");
-
-  Fixups.push_back(MCFixup::Create(
-      0, MO.getExpr(), MCFixupKind(ARM64::fixup_arm64_movw), MI.getLoc()));
-
-  ++MCNumFixups;
-
-  return 0;
-}
-
-/// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
-/// branch target.
-uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue(
-    const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
-    const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-
-  // If the destination is an immediate, we have nothing to do.
-  if (MO.isImm())
-    return MO.getImm();
-  assert(MO.isExpr() && "Unexpected ADR target type!");
-
-  MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch14);
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
-
-  ++MCNumFixups;
-
-  // All of the information is in the fixup.
-  return 0;
-}
-
-/// getBranchTargetOpValue - Return the encoded value for an unconditional
-/// branch target.
-uint32_t
-ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
-                                           SmallVectorImpl<MCFixup> &Fixups,
-                                           const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-
-  // If the destination is an immediate, we have nothing to do.
-  if (MO.isImm())
-    return MO.getImm();
-  assert(MO.isExpr() && "Unexpected ADR target type!");
-
-  MCFixupKind Kind = MI.getOpcode() == ARM64::BL
-                         ? MCFixupKind(ARM64::fixup_arm64_pcrel_call26)
-                         : MCFixupKind(ARM64::fixup_arm64_pcrel_branch26);
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
-
-  ++MCNumFixups;
-
-  // All of the information is in the fixup.
-  return 0;
-}
-
-/// getVecShifterOpValue - Return the encoded value for the vector shifter:
-///
-///   00 -> 0
-///   01 -> 8
-///   10 -> 16
-///   11 -> 24
-uint32_t
-ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
-                                         SmallVectorImpl<MCFixup> &Fixups,
-                                         const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the shift amount!");
-
-  switch (MO.getImm()) {
-  default:
-    break;
-  case 0:
-    return 0;
-  case 8:
-    return 1;
-  case 16:
-    return 2;
-  case 24:
-    return 3;
-  }
-
-  assert(false && "Invalid value for vector shift amount!");
-  return 0;
-}
-
-uint32_t
-ARM64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the shift amount!");
-  return 64 - (MO.getImm());
-}
-
-uint32_t
-ARM64MCCodeEmitter::getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
-                                             SmallVectorImpl<MCFixup> &Fixups,
-                                             const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the shift amount!");
-  return 64 - (MO.getImm() | 32);
-}
-
-uint32_t
-ARM64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the shift amount!");
-  return 32 - (MO.getImm() | 16);
-}
-
-uint32_t
-ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the shift amount!");
-  return 16 - (MO.getImm() | 8);
-}
-
-/// getFixedPointScaleOpValue - Return the encoded value for the
-// FP-to-fixed-point scale factor.
-uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue(
-    const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
-    const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the scale amount!");
-  return 64 - MO.getImm();
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the scale amount!");
-  return 64 - MO.getImm();
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the scale amount!");
-  return 32 - MO.getImm();
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the scale amount!");
-  return 16 - MO.getImm();
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
-                                         SmallVectorImpl<MCFixup> &Fixups,
-                                         const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the scale amount!");
-  return 8 - MO.getImm();
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the scale amount!");
-  return MO.getImm() - 64;
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the scale amount!");
-  return MO.getImm() - 32;
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
-                                          SmallVectorImpl<MCFixup> &Fixups,
-                                          const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the scale amount!");
-  return MO.getImm() - 16;
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
-                                         SmallVectorImpl<MCFixup> &Fixups,
-                                         const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() && "Expected an immediate value for the scale amount!");
-  return MO.getImm() - 8;
-}
-
-/// getMoveVecShifterOpValue - Return the encoded value for the vector move
-/// shifter (MSL).
-uint32_t
-ARM64MCCodeEmitter::getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
-                                             SmallVectorImpl<MCFixup> &Fixups,
-                                             const MCSubtargetInfo &STI) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  assert(MO.isImm() &&
-         "Expected an immediate value for the move shift amount!");
-  unsigned ShiftVal = ARM64_AM::getShiftValue(MO.getImm());
-  assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!");
-  return ShiftVal == 8 ? 0 : 1;
-}
-
-unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
-                                     const MCSubtargetInfo &STI) const {
-  // If one of the signed fixup kinds is applied to a MOVZ instruction, the
-  // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
-  // job to ensure that any bits possibly affected by this are 0. This means we
-  // must zero out bit 30 (essentially emitting a MOVN).
-  MCOperand UImm16MO = MI.getOperand(1);
-
-  // Nothing to do if there's no fixup.
-  if (UImm16MO.isImm())
-    return EncodedValue;
-
-  return EncodedValue & ~(1u << 30);
-}
-
-void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-                                           SmallVectorImpl<MCFixup> &Fixups,
-                                           const MCSubtargetInfo &STI) const {
-  if (MI.getOpcode() == ARM64::TLSDESCCALL) {
-    // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
-    // following (BLR) instruction. It doesn't emit any code itself so it
-    // doesn't go through the normal TableGenerated channels.
-    MCFixupKind Fixup = MCFixupKind(ARM64::fixup_arm64_tlsdesc_call);
-    Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup));
-    return;
-  }
-
-  uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
-  EmitConstant(Binary, 4, OS);
-  ++MCNumEmitted; // Keep track of the # of mi's emitted.
-}
-
-#include "ARM64GenMCCodeEmitter.inc"
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp
deleted file mode 100644
index d4ab140..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-//===-- ARM64MCExpr.cpp - ARM64 specific MC expression classes --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the implementation of the assembly expression modifiers
-// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...).
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "aarch64symbolrefexpr"
-#include "ARM64MCExpr.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-const ARM64MCExpr *ARM64MCExpr::Create(const MCExpr *Expr, VariantKind Kind,
-                                       MCContext &Ctx) {
-  return new (Ctx) ARM64MCExpr(Expr, Kind);
-}
-
-StringRef ARM64MCExpr::getVariantKindName() const {
-  switch (static_cast<uint32_t>(getKind())) {
-  case VK_CALL:                return "";
-  case VK_LO12:                return ":lo12:";
-  case VK_ABS_G3:              return ":abs_g3:";
-  case VK_ABS_G2:              return ":abs_g2:";
-  case VK_ABS_G2_NC:           return ":abs_g2_nc:";
-  case VK_ABS_G1:              return ":abs_g1:";
-  case VK_ABS_G1_NC:           return ":abs_g1_nc:";
-  case VK_ABS_G0:              return ":abs_g0:";
-  case VK_ABS_G0_NC:           return ":abs_g0_nc:";
-  case VK_DTPREL_G2:           return ":dtprel_g2:";
-  case VK_DTPREL_G1:           return ":dtprel_g1:";
-  case VK_DTPREL_G1_NC:        return ":dtprel_g1_nc:";
-  case VK_DTPREL_G0:           return ":dtprel_g0:";
-  case VK_DTPREL_G0_NC:        return ":dtprel_g0_nc:";
-  case VK_DTPREL_LO12:         return ":dtprel_lo12:";
-  case VK_DTPREL_LO12_NC:      return ":dtprel_lo12_nc:";
-  case VK_TPREL_G2:            return ":tprel_g2:";
-  case VK_TPREL_G1:            return ":tprel_g1:";
-  case VK_TPREL_G1_NC:         return ":tprel_g1_nc:";
-  case VK_TPREL_G0:            return ":tprel_g0:";
-  case VK_TPREL_G0_NC:         return ":tprel_g0_nc:";
-  case VK_TPREL_LO12:          return ":tprel_lo12:";
-  case VK_TPREL_LO12_NC:       return ":tprel_lo12_nc:";
-  case VK_TLSDESC_LO12:        return ":tlsdesc_lo12:";
-  case VK_ABS_PAGE:            return "";
-  case VK_GOT_PAGE:            return ":got:";
-  case VK_GOT_LO12:            return ":got_lo12:";
-  case VK_GOTTPREL_PAGE:       return ":gottprel:";
-  case VK_GOTTPREL_LO12_NC:    return ":gottprel_lo12:";
-  case VK_GOTTPREL_G1:         return ":gottprel_g1:";
-  case VK_GOTTPREL_G0_NC:      return ":gottprel_g0_nc:";
-  case VK_TLSDESC:             return "";
-  case VK_TLSDESC_PAGE:        return ":tlsdesc:";
-  default:
-    llvm_unreachable("Invalid ELF symbol kind");
-  }
-}
-
-void ARM64MCExpr::PrintImpl(raw_ostream &OS) const {
-  if (getKind() != VK_NONE)
-    OS << getVariantKindName();
-  OS << *Expr;
-}
-
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-// FIXME: really do above: now that two backends are using it.
-static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
-  switch (Value->getKind()) {
-  case MCExpr::Target:
-    llvm_unreachable("Can't handle nested target expr!");
-    break;
-
-  case MCExpr::Constant:
-    break;
-
-  case MCExpr::Binary: {
-    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
-    AddValueSymbolsImpl(BE->getLHS(), Asm);
-    AddValueSymbolsImpl(BE->getRHS(), Asm);
-    break;
-  }
-
-  case MCExpr::SymbolRef:
-    Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
-    break;
-
-  case MCExpr::Unary:
-    AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
-    break;
-  }
-}
-
-void ARM64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
-  AddValueSymbolsImpl(getSubExpr(), Asm);
-}
-
-const MCSection *ARM64MCExpr::FindAssociatedSection() const {
-  llvm_unreachable("FIXME: what goes here?");
-}
-
-bool ARM64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
-                                            const MCAsmLayout *Layout) const {
-  if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout))
-    return false;
-
-  Res =
-      MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
-
-  return true;
-}
-
-static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
-  switch (Expr->getKind()) {
-  case MCExpr::Target:
-    llvm_unreachable("Can't handle nested target expression");
-    break;
-  case MCExpr::Constant:
-    break;
-
-  case MCExpr::Binary: {
-    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
-    fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
-    fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
-    break;
-  }
-
-  case MCExpr::SymbolRef: {
-    // We're known to be under a TLS fixup, so any symbol should be
-    // modified. There should be only one.
-    const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
-    MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
-    MCELF::SetType(SD, ELF::STT_TLS);
-    break;
-  }
-
-  case MCExpr::Unary:
-    fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
-    break;
-  }
-}
-
-void ARM64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
-  switch (getSymbolLoc(Kind)) {
-  default:
-    return;
-  case VK_DTPREL:
-  case VK_GOTTPREL:
-  case VK_TPREL:
-  case VK_TLSDESC:
-    break;
-  }
-
-  fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h
deleted file mode 100644
index a33fe43..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h
+++ /dev/null
@@ -1,162 +0,0 @@
-//=---- ARM64MCExpr.h - ARM64 specific MC expression classes ------*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes ARM64-specific MCExprs, used for modifiers like
-// ":lo12:" or ":gottprel_g1:".
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ARM64MCEXPR_H
-#define LLVM_ARM64MCEXPR_H
-
-#include "llvm/MC/MCExpr.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
-class ARM64MCExpr : public MCTargetExpr {
-public:
-  enum VariantKind {
-    VK_NONE     = 0x000,
-
-    // Symbol locations specifying (roughly speaking) what calculation should be
-    // performed to construct the final address for the relocated
-    // symbol. E.g. direct, via the GOT, ...
-    VK_ABS      = 0x001,
-    VK_SABS     = 0x002,
-    VK_GOT      = 0x003,
-    VK_DTPREL   = 0x004,
-    VK_GOTTPREL = 0x005,
-    VK_TPREL    = 0x006,
-    VK_TLSDESC  = 0x007,
-    VK_SymLocBits = 0x00f,
-
-    // Variants specifying which part of the final address calculation is
-    // used. E.g. the low 12 bits for an ADD/LDR, the middle 16 bits for a
-    // MOVZ/MOVK.
-    VK_PAGE     = 0x010,
-    VK_PAGEOFF  = 0x020,
-    VK_G0       = 0x030,
-    VK_G1       = 0x040,
-    VK_G2       = 0x050,
-    VK_G3       = 0x060,
-    VK_AddressFragBits = 0x0f0,
-
-    // Whether the final relocation is a checked one (where a linker should
-    // perform a range-check on the final address) or not. Note that this field
-    // is unfortunately sometimes omitted from the assembly syntax. E.g. :lo12:
-    // on its own is a non-checked relocation. We side with ELF on being
-    // explicit about this!
-    VK_NC       = 0x100,
-
-    // Convenience definitions for referring to specific textual representations
-    // of relocation specifiers. Note that this means the "_NC" is sometimes
-    // omitted in line with assembly syntax here (VK_LO12 rather than VK_LO12_NC
-    // since a user would write ":lo12:").
-    VK_CALL              = VK_ABS,
-    VK_ABS_PAGE          = VK_ABS      | VK_PAGE,
-    VK_ABS_G3            = VK_ABS      | VK_G3,
-    VK_ABS_G2            = VK_ABS      | VK_G2,
-    VK_ABS_G2_NC         = VK_ABS      | VK_G2      | VK_NC,
-    VK_ABS_G1            = VK_ABS      | VK_G1,
-    VK_ABS_G1_NC         = VK_ABS      | VK_G1      | VK_NC,
-    VK_ABS_G0            = VK_ABS      | VK_G0,
-    VK_ABS_G0_NC         = VK_ABS      | VK_G0      | VK_NC,
-    VK_LO12              = VK_ABS      | VK_PAGEOFF | VK_NC,
-    VK_GOT_LO12          = VK_GOT      | VK_PAGEOFF | VK_NC,
-    VK_GOT_PAGE          = VK_GOT      | VK_PAGE,
-    VK_DTPREL_G2         = VK_DTPREL   | VK_G2,
-    VK_DTPREL_G1         = VK_DTPREL   | VK_G1,
-    VK_DTPREL_G1_NC      = VK_DTPREL   | VK_G1      | VK_NC,
-    VK_DTPREL_G0         = VK_DTPREL   | VK_G0,
-    VK_DTPREL_G0_NC      = VK_DTPREL   | VK_G0      | VK_NC,
-    VK_DTPREL_LO12       = VK_DTPREL   | VK_PAGEOFF,
-    VK_DTPREL_LO12_NC    = VK_DTPREL   | VK_PAGEOFF | VK_NC,
-    VK_GOTTPREL_PAGE     = VK_GOTTPREL | VK_PAGE,
-    VK_GOTTPREL_LO12_NC  = VK_GOTTPREL | VK_PAGEOFF | VK_NC,
-    VK_GOTTPREL_G1       = VK_GOTTPREL | VK_G1,
-    VK_GOTTPREL_G0_NC    = VK_GOTTPREL | VK_G0      | VK_NC,
-    VK_TPREL_G2          = VK_TPREL    | VK_G2,
-    VK_TPREL_G1          = VK_TPREL    | VK_G1,
-    VK_TPREL_G1_NC       = VK_TPREL    | VK_G1      | VK_NC,
-    VK_TPREL_G0          = VK_TPREL    | VK_G0,
-    VK_TPREL_G0_NC       = VK_TPREL    | VK_G0      | VK_NC,
-    VK_TPREL_LO12        = VK_TPREL    | VK_PAGEOFF,
-    VK_TPREL_LO12_NC     = VK_TPREL    | VK_PAGEOFF | VK_NC,
-    VK_TLSDESC_LO12      = VK_TLSDESC  | VK_PAGEOFF | VK_NC,
-    VK_TLSDESC_PAGE      = VK_TLSDESC  | VK_PAGE,
-
-    VK_INVALID  = 0xfff
-  };
-
-private:
-  const MCExpr *Expr;
-  const VariantKind Kind;
-
-  explicit ARM64MCExpr(const MCExpr *Expr, VariantKind Kind)
-    : Expr(Expr), Kind(Kind) {}
-
-public:
-  /// @name Construction
-  /// @{
-
-  static const ARM64MCExpr *Create(const MCExpr *Expr, VariantKind Kind,
-                                   MCContext &Ctx);
-
-  /// @}
-  /// @name Accessors
-  /// @{
-
-  /// Get the kind of this expression.
-  VariantKind getKind() const { return static_cast<VariantKind>(Kind); }
-
-  /// Get the expression this modifier applies to.
-  const MCExpr *getSubExpr() const { return Expr; }
-
-  /// @}
-  /// @name VariantKind information extractors.
-  /// @{
-
-  static VariantKind getSymbolLoc(VariantKind Kind) {
-    return static_cast<VariantKind>(Kind & VK_SymLocBits);
-  }
-
-  static VariantKind getAddressFrag(VariantKind Kind) {
-    return static_cast<VariantKind>(Kind & VK_AddressFragBits);
-  }
-
-  static bool isNotChecked(VariantKind Kind) { return Kind & VK_NC; }
-
-  /// @}
-
-  /// Convert the variant kind into an ELF-appropriate modifier
-  /// (e.g. ":got:", ":lo12:").
-  StringRef getVariantKindName() const;
-
-  void PrintImpl(raw_ostream &OS) const;
-
-  void AddValueSymbols(MCAssembler *) const;
-
-  const MCSection *FindAssociatedSection() const;
-
-  bool EvaluateAsRelocatableImpl(MCValue &Res,
-                                 const MCAsmLayout *Layout) const;
-
-  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
-
-  static bool classof(const MCExpr *E) {
-    return E->getKind() == MCExpr::Target;
-  }
-
-  static bool classof(const ARM64MCExpr *) { return true; }
-
-};
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
deleted file mode 100644
index 8d54412..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-//===-- ARM64MCTargetDesc.cpp - ARM64 Target Descriptions -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides ARM64 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64MCTargetDesc.h"
-#include "ARM64ELFStreamer.h"
-#include "ARM64MCAsmInfo.h"
-#include "InstPrinter/ARM64InstPrinter.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "ARM64GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "ARM64GenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "ARM64GenRegisterInfo.inc"
-
-using namespace llvm;
-
-static MCInstrInfo *createARM64MCInstrInfo() {
-  MCInstrInfo *X = new MCInstrInfo();
-  InitARM64MCInstrInfo(X);
-  return X;
-}
-
-static MCSubtargetInfo *createARM64MCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                   StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitARM64MCSubtargetInfo(X, TT, CPU, FS);
-  return X;
-}
-
-static MCRegisterInfo *createARM64MCRegisterInfo(StringRef Triple) {
-  MCRegisterInfo *X = new MCRegisterInfo();
-  InitARM64MCRegisterInfo(X, ARM64::LR);
-  return X;
-}
-
-static MCAsmInfo *createARM64MCAsmInfo(const MCRegisterInfo &MRI,
-                                       StringRef TT) {
-  Triple TheTriple(TT);
-
-  MCAsmInfo *MAI;
-  if (TheTriple.isOSDarwin())
-    MAI = new ARM64MCAsmInfoDarwin();
-  else {
-    assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
-    MAI = new ARM64MCAsmInfoELF();
-  }
-
-  // Initial state of the frame pointer is SP.
-  unsigned Reg = MRI.getDwarfRegNum(ARM64::SP, true);
-  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0);
-  MAI->addInitialFrameState(Inst);
-
-  return MAI;
-}
-
-static MCCodeGenInfo *createARM64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                               CodeModel::Model CM,
-                                               CodeGenOpt::Level OL) {
-  Triple TheTriple(TT);
-  assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) &&
-         "Only expect Darwin and ELF targets");
-
-  if (CM == CodeModel::Default)
-    CM = CodeModel::Small;
-  // The default MCJIT memory managers make no guarantees about where they can
-  // find an executable page; JITed code needs to be able to refer to globals
-  // no matter how far away they are.
-  else if (CM == CodeModel::JITDefault)
-    CM = CodeModel::Large;
-  else if (CM != CodeModel::Small && CM != CodeModel::Large)
-    report_fatal_error("Only small and large code models are allowed on ARM64");
-
-  // ARM64 Darwin is always PIC.
-  if (TheTriple.isOSDarwin())
-    RM = Reloc::PIC_;
-  // On ELF platforms the default static relocation model has a smart enough
-  // linker to cope with referencing external symbols defined in a shared
-  // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
-  else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
-    RM = Reloc::Static;
-
-  MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM, OL);
-  return X;
-}
-
-static MCInstPrinter *createARM64MCInstPrinter(const Target &T,
-                                               unsigned SyntaxVariant,
-                                               const MCAsmInfo &MAI,
-                                               const MCInstrInfo &MII,
-                                               const MCRegisterInfo &MRI,
-                                               const MCSubtargetInfo &STI) {
-  if (SyntaxVariant == 0)
-    return new ARM64InstPrinter(MAI, MII, MRI, STI);
-  if (SyntaxVariant == 1)
-    return new ARM64AppleInstPrinter(MAI, MII, MRI, STI);
-
-  return 0;
-}
-
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
-                                    MCContext &Ctx, MCAsmBackend &TAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    const MCSubtargetInfo &STI, bool RelaxAll,
-                                    bool NoExecStack) {
-  Triple TheTriple(TT);
-
-  if (TheTriple.isOSDarwin())
-    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
-                               /*LabelSections*/ true);
-
-  return createARM64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeARM64TargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfoFn X(TheARM64Target, createARM64MCAsmInfo);
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
-                                        createARM64MCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheARM64Target, createARM64MCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheARM64Target, createARM64MCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
-                                          createARM64MCSubtargetInfo);
-
-  // Register the asm backend.
-  TargetRegistry::RegisterMCAsmBackend(TheARM64Target, createARM64AsmBackend);
-
-  // Register the MC Code Emitter
-  TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
-                                        createARM64MCCodeEmitter);
-
-  // Register the object streamer.
-  TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
-
-  // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
-                                        createARM64MCInstPrinter);
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
deleted file mode 100644
index 0db2b22..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===-- ARM64MCTargetDesc.h - ARM64 Target Descriptions ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides ARM64 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64MCTARGETDESC_H
-#define ARM64MCTARGETDESC_H
-
-#include "llvm/Support/DataTypes.h"
-#include <string>
-
-namespace llvm {
-class MCAsmBackend;
-class MCCodeEmitter;
-class MCContext;
-class MCInstrInfo;
-class MCRegisterInfo;
-class MCObjectWriter;
-class MCSubtargetInfo;
-class StringRef;
-class Target;
-class raw_ostream;
-
-extern Target TheARM64Target;
-
-MCCodeEmitter *createARM64MCCodeEmitter(const MCInstrInfo &MCII,
-                                        const MCRegisterInfo &MRI,
-                                        const MCSubtargetInfo &STI,
-                                        MCContext &Ctx);
-MCAsmBackend *createARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    StringRef TT, StringRef CPU);
-
-MCObjectWriter *createARM64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI);
-
-MCObjectWriter *createARM64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
-                                            uint32_t CPUSubtype);
-
-} // End llvm namespace
-
-// Defines symbolic names for ARM64 registers.  This defines a mapping from
-// register name to register number.
-//
-#define GET_REGINFO_ENUM
-#include "ARM64GenRegisterInfo.inc"
-
-// Defines symbolic names for the ARM64 instructions.
-//
-#define GET_INSTRINFO_ENUM
-#include "ARM64GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "ARM64GenSubtargetInfo.inc"
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp
deleted file mode 100644
index 1733dc5..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp
+++ /dev/null
@@ -1,396 +0,0 @@
-//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
-using namespace llvm;
-
-namespace {
-class ARM64MachObjectWriter : public MCMachObjectTargetWriter {
-  bool getARM64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType,
-                                  const MCSymbolRefExpr *Sym,
-                                  unsigned &Log2Size, const MCAssembler &Asm);
-
-public:
-  ARM64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
-      : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
-                                 /*UseAggressiveSymbolFolding=*/true) {}
-
-  void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
-                        const MCAsmLayout &Layout, const MCFragment *Fragment,
-                        const MCFixup &Fixup, MCValue Target,
-                        uint64_t &FixedValue);
-};
-}
-
-bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
-    const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
-    unsigned &Log2Size, const MCAssembler &Asm) {
-  RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED);
-  Log2Size = ~0U;
-
-  switch ((unsigned)Fixup.getKind()) {
-  default:
-    return false;
-
-  case FK_Data_1:
-    Log2Size = llvm::Log2_32(1);
-    return true;
-  case FK_Data_2:
-    Log2Size = llvm::Log2_32(2);
-    return true;
-  case FK_Data_4:
-    Log2Size = llvm::Log2_32(4);
-    if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
-      RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
-    return true;
-  case FK_Data_8:
-    Log2Size = llvm::Log2_32(8);
-    if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
-      RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
-    return true;
-  case ARM64::fixup_arm64_add_imm12:
-  case ARM64::fixup_arm64_ldst_imm12_scale1:
-  case ARM64::fixup_arm64_ldst_imm12_scale2:
-  case ARM64::fixup_arm64_ldst_imm12_scale4:
-  case ARM64::fixup_arm64_ldst_imm12_scale8:
-  case ARM64::fixup_arm64_ldst_imm12_scale16:
-    Log2Size = llvm::Log2_32(4);
-    switch (Sym->getKind()) {
-    default:
-      assert(0 && "Unexpected symbol reference variant kind!");
-    case MCSymbolRefExpr::VK_PAGEOFF:
-      RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12);
-      return true;
-    case MCSymbolRefExpr::VK_GOTPAGEOFF:
-      RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12);
-      return true;
-    case MCSymbolRefExpr::VK_TLVPPAGEOFF:
-      RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12);
-      return true;
-    }
-  case ARM64::fixup_arm64_pcrel_adrp_imm21:
-    Log2Size = llvm::Log2_32(4);
-    // This encompasses the relocation for the whole 21-bit value.
-    switch (Sym->getKind()) {
-    default:
-      Asm.getContext().FatalError(Fixup.getLoc(),
-                                  "ADR/ADRP relocations must be GOT relative");
-    case MCSymbolRefExpr::VK_PAGE:
-      RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
-      return true;
-    case MCSymbolRefExpr::VK_GOTPAGE:
-      RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGE21);
-      return true;
-    case MCSymbolRefExpr::VK_TLVPPAGE:
-      RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGE21);
-      return true;
-    }
-    return true;
-  case ARM64::fixup_arm64_pcrel_branch26:
-  case ARM64::fixup_arm64_pcrel_call26:
-    Log2Size = llvm::Log2_32(4);
-    RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26);
-    return true;
-  }
-}
-
-void ARM64MachObjectWriter::RecordRelocation(
-    MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
-    const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
-    uint64_t &FixedValue) {
-  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
-
-  // See <reloc.h>.
-  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment);
-  unsigned Log2Size = 0;
-  int64_t Value = 0;
-  unsigned Index = 0;
-  unsigned IsExtern = 0;
-  unsigned Type = 0;
-  unsigned Kind = Fixup.getKind();
-
-  FixupOffset += Fixup.getOffset();
-
-  // ARM64 pcrel relocation addends do not include the section offset.
-  if (IsPCRel)
-    FixedValue += FixupOffset;
-
-  // ADRP fixups use relocations for the whole symbol value and only
-  // put the addend in the instruction itself. Clear out any value the
-  // generic code figured out from the sybmol definition.
-  if (Kind == ARM64::fixup_arm64_pcrel_adrp_imm21 ||
-      Kind == ARM64::fixup_arm64_pcrel_imm19)
-    FixedValue = 0;
-
-  // imm19 relocations are for conditional branches, which require
-  // assembler local symbols. If we got here, that's not what we have,
-  // so complain loudly.
-  if (Kind == ARM64::fixup_arm64_pcrel_imm19) {
-    Asm.getContext().FatalError(Fixup.getLoc(),
-                                "conditional branch requires assembler-local"
-                                " label. '" +
-                                    Target.getSymA()->getSymbol().getName() +
-                                    "' is external.");
-    return;
-  }
-
-  // 14-bit branch relocations should only target internal labels, and so
-  // should never get here.
-  if (Kind == ARM64::fixup_arm64_pcrel_branch14) {
-    Asm.getContext().FatalError(Fixup.getLoc(),
-                                "Invalid relocation on conditional branch!");
-    return;
-  }
-
-  if (!getARM64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
-                                  Asm)) {
-    Asm.getContext().FatalError(Fixup.getLoc(), "unknown ARM64 fixup kind!");
-    return;
-  }
-
-  Value = Target.getConstant();
-
-  if (Target.isAbsolute()) { // constant
-    // FIXME: Should this always be extern?
-    // SymbolNum of 0 indicates the absolute section.
-    Type = MachO::ARM64_RELOC_UNSIGNED;
-    Index = 0;
-
-    if (IsPCRel) {
-      IsExtern = 1;
-      Asm.getContext().FatalError(Fixup.getLoc(),
-                                  "PC relative absolute relocation!");
-
-      // FIXME: x86_64 sets the type to a branch reloc here. Should we do
-      // something similar?
-    }
-  } else if (Target.getSymB()) { // A - B + constant
-    const MCSymbol *A = &Target.getSymA()->getSymbol();
-    MCSymbolData &A_SD = Asm.getSymbolData(*A);
-    const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
-
-    const MCSymbol *B = &Target.getSymB()->getSymbol();
-    MCSymbolData &B_SD = Asm.getSymbolData(*B);
-    const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
-
-    // Check for "_foo@got - .", which comes through here as:
-    // Ltmp0:
-    //    ... _foo@got - Ltmp0
-    if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOT &&
-        Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None &&
-        Layout.getSymbolOffset(&B_SD) ==
-            Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) {
-      // SymB is the PC, so use a PC-rel pointer-to-GOT relocation.
-      Index = A_Base->getIndex();
-      IsExtern = 1;
-      Type = MachO::ARM64_RELOC_POINTER_TO_GOT;
-      IsPCRel = 1;
-      MachO::any_relocation_info MRE;
-      MRE.r_word0 = FixupOffset;
-      MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
-                     (IsExtern << 27) | (Type << 28));
-      Writer->addRelocation(Fragment->getParent(), MRE);
-      return;
-    } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
-               Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
-      // Otherwise, neither symbol can be modified.
-      Asm.getContext().FatalError(Fixup.getLoc(),
-                                  "unsupported relocation of modified symbol");
-
-    // We don't support PCrel relocations of differences.
-    if (IsPCRel)
-      Asm.getContext().FatalError(Fixup.getLoc(),
-                                  "unsupported pc-relative relocation of "
-                                  "difference");
-
-    // ARM64 always uses external relocations. If there is no symbol to use as
-    // a base address (a local symbol with no preceeding non-local symbol),
-    // error out.
-    //
-    // FIXME: We should probably just synthesize an external symbol and use
-    // that.
-    if (!A_Base)
-      Asm.getContext().FatalError(
-          Fixup.getLoc(),
-          "unsupported relocation of local symbol '" + A->getName() +
-              "'. Must have non-local symbol earlier in section.");
-    if (!B_Base)
-      Asm.getContext().FatalError(
-          Fixup.getLoc(),
-          "unsupported relocation of local symbol '" + B->getName() +
-              "'. Must have non-local symbol earlier in section.");
-
-    if (A_Base == B_Base && A_Base)
-      Asm.getContext().FatalError(Fixup.getLoc(),
-                                  "unsupported relocation with identical base");
-
-    Value += (A_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress(
-                                                   &A_SD, Layout)) -
-             (A_Base == NULL || A_Base->getFragment() == NULL
-                  ? 0
-                  : Writer->getSymbolAddress(A_Base, Layout));
-    Value -= (B_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress(
-                                                   &B_SD, Layout)) -
-             (B_Base == NULL || B_Base->getFragment() == NULL
-                  ? 0
-                  : Writer->getSymbolAddress(B_Base, Layout));
-
-    Index = A_Base->getIndex();
-    IsExtern = 1;
-    Type = MachO::ARM64_RELOC_UNSIGNED;
-
-    MachO::any_relocation_info MRE;
-    MRE.r_word0 = FixupOffset;
-    MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
-                   (IsExtern << 27) | (Type << 28));
-    Writer->addRelocation(Fragment->getParent(), MRE);
-
-    Index = B_Base->getIndex();
-    IsExtern = 1;
-    Type = MachO::ARM64_RELOC_SUBTRACTOR;
-  } else { // A + constant
-    const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
-    MCSymbolData &SD = Asm.getSymbolData(*Symbol);
-    const MCSymbolData *Base = Asm.getAtom(&SD);
-    const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
-        Fragment->getParent()->getSection());
-
-    // If the symbol is a variable and we weren't able to get a Base for it
-    // (i.e., it's not in the symbol table associated with a section) resolve
-    // the relocation based its expansion instead.
-    if (Symbol->isVariable() && !Base) {
-      // If the evaluation is an absolute value, just use that directly
-      // to keep things easy.
-      int64_t Res;
-      if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
-              Res, Layout, Writer->getSectionAddressMap())) {
-        FixedValue = Res;
-        return;
-      }
-
-      // FIXME: Will the Target we already have ever have any data in it
-      // we need to preserve and merge with the new Target? How about
-      // the FixedValue?
-      if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout))
-        Asm.getContext().FatalError(Fixup.getLoc(),
-                                    "unable to resolve variable '" +
-                                        Symbol->getName() + "'");
-      return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
-                              FixedValue);
-    }
-
-    // Relocations inside debug sections always use local relocations when
-    // possible. This seems to be done because the debugger doesn't fully
-    // understand relocation entries and expects to find values that
-    // have already been fixed up.
-    if (Symbol->isInSection()) {
-      if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
-        Base = 0;
-    }
-
-    // ARM64 uses external relocations as much as possible. For debug sections,
-    // and for pointer-sized relocations (.quad), we allow section relocations.
-    // It's code sections that run into trouble.
-    if (Base) {
-      Index = Base->getIndex();
-      IsExtern = 1;
-
-      // Add the local offset, if needed.
-      if (Base != &SD)
-        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
-    } else if (Symbol->isInSection()) {
-      // Pointer-sized relocations can use a local relocation. Otherwise,
-      // we have to be in a debug info section.
-      if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3)
-        Asm.getContext().FatalError(
-            Fixup.getLoc(),
-            "unsupported relocation of local symbol '" + Symbol->getName() +
-                "'. Must have non-local symbol earlier in section.");
-      // Adjust the relocation to be section-relative.
-      // The index is the section ordinal (1-based).
-      const MCSectionData &SymSD =
-          Asm.getSectionData(SD.getSymbol().getSection());
-      Index = SymSD.getOrdinal() + 1;
-      IsExtern = 0;
-      Value += Writer->getSymbolAddress(&SD, Layout);
-
-      if (IsPCRel)
-        Value -= Writer->getFragmentAddress(Fragment, Layout) +
-                 Fixup.getOffset() + (1ULL << Log2Size);
-    } else {
-      // Resolve constant variables.
-      if (SD.getSymbol().isVariable()) {
-        int64_t Res;
-        if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute(
-                Res, Layout, Writer->getSectionAddressMap())) {
-          FixedValue = Res;
-          return;
-        }
-      }
-      Asm.getContext().FatalError(Fixup.getLoc(),
-                                  "unsupported relocation of variable '" +
-                                      Symbol->getName() + "'");
-    }
-  }
-
-  // If the relocation kind is Branch26, Page21, or Pageoff12, any addend
-  // is represented via an Addend relocation, not encoded directly into
-  // the instruction.
-  if ((Type == MachO::ARM64_RELOC_BRANCH26 ||
-       Type == MachO::ARM64_RELOC_PAGE21 ||
-       Type == MachO::ARM64_RELOC_PAGEOFF12) &&
-      Value) {
-    assert((Value & 0xff000000) == 0 && "Added relocation out of range!");
-
-    MachO::any_relocation_info MRE;
-    MRE.r_word0 = FixupOffset;
-    MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
-                   (IsExtern << 27) | (Type << 28));
-    Writer->addRelocation(Fragment->getParent(), MRE);
-
-    // Now set up the Addend relocation.
-    Type = MachO::ARM64_RELOC_ADDEND;
-    Index = Value;
-    IsPCRel = 0;
-    Log2Size = 2;
-    IsExtern = 0;
-
-    // Put zero into the instruction itself. The addend is in the relocation.
-    Value = 0;
-  }
-
-  // If there's any addend left to handle, encode it in the instruction.
-  FixedValue = Value;
-
-  // struct relocation_info (8 bytes)
-  MachO::any_relocation_info MRE;
-  MRE.r_word0 = FixupOffset;
-  MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) |
-                 (IsExtern << 27) | (Type << 28));
-  Writer->addRelocation(Fragment->getParent(), MRE);
-}
-
-MCObjectWriter *llvm::createARM64MachObjectWriter(raw_ostream &OS,
-                                                  uint32_t CPUType,
-                                                  uint32_t CPUSubtype) {
-  return createMachObjectWriter(new ARM64MachObjectWriter(CPUType, CPUSubtype),
-                                OS, /*IsLittleEndian=*/true);
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index f8665bc..0000000
--- a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-add_llvm_library(LLVMARM64Desc
-  ARM64AsmBackend.cpp
-  ARM64ELFObjectWriter.cpp
-  ARM64ELFStreamer.cpp
-  ARM64MCAsmInfo.cpp
-  ARM64MCCodeEmitter.cpp
-  ARM64MCExpr.cpp
-  ARM64MCTargetDesc.cpp
-  ARM64MachObjectWriter.cpp
-)
-add_dependencies(LLVMARM64Desc ARM64CommonTableGen)
-
-# Hack: we need to include 'main' target directory to grab private headers
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt
deleted file mode 100644
index e4c74d2..0000000
--- a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-;===- ./lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ARM64Desc
-parent = ARM64
-required_libraries = ARM64AsmPrinter ARM64Info MC Support
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/MCTargetDesc/Makefile b/lib/Target/ARM64/MCTargetDesc/Makefile
deleted file mode 100644
index 013cc63..0000000
--- a/lib/Target/ARM64/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/ARM64/TargetDesc/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64Desc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/Makefile b/lib/Target/ARM64/Makefile
deleted file mode 100644
index 5f0f307..0000000
--- a/lib/Target/ARM64/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-##===- lib/Target/ARM64/Makefile ---------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMARM64CodeGen
-TARGET = ARM64
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = ARM64GenRegisterInfo.inc ARM64GenInstrInfo.inc \
-		ARM64GenAsmWriter.inc ARM64GenAsmWriter1.inc \
-		ARM64GenDAGISel.inc \
-		ARM64GenCallingConv.inc ARM64GenAsmMatcher.inc \
-		ARM64GenSubtargetInfo.inc ARM64GenMCCodeEmitter.inc \
-		ARM64GenFastISel.inc ARM64GenDisassemblerTables.inc \
-		ARM64GenMCPseudoLowering.inc
-
-DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp b/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp
deleted file mode 100644
index dec09ed..0000000
--- a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- ARM64TargetInfo.cpp - ARM64 Target Implementation -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-namespace llvm {
-Target TheARM64Target;
-} // end namespace llvm
-
-extern "C" void LLVMInitializeARM64TargetInfo() {
-  RegisterTarget<Triple::arm64, /*HasJIT=*/true> X(TheARM64Target, "arm64",
-                                                   "ARM64");
-}
diff --git a/lib/Target/ARM64/TargetInfo/CMakeLists.txt b/lib/Target/ARM64/TargetInfo/CMakeLists.txt
deleted file mode 100644
index a0142c4..0000000
--- a/lib/Target/ARM64/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARM64Info
-  ARM64TargetInfo.cpp
-  )
-
-add_dependencies(LLVMARM64Info ARM64CommonTableGen)
diff --git a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt b/lib/Target/ARM64/TargetInfo/LLVMBuild.txt
deleted file mode 100644
index 5bea694..0000000
--- a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-;===- ./lib/Target/ARM64/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ARM64Info
-parent = ARM64
-required_libraries = MC Support
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/TargetInfo/Makefile b/lib/Target/ARM64/TargetInfo/Makefile
deleted file mode 100644
index 2d5a1a0..0000000
--- a/lib/Target/ARM64/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM64/TargetInfo/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64Info
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
author	Stephen Hines <srhines@google.com>	2014-05-29 02:49:00 -0700
committer	Stephen Hines <srhines@google.com>	2014-05-29 02:49:00 -0700
commit	dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch)
tree	dcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Target/ARM64
parent	220b921aed042f9e520c26cffd8282a94c66c3d5 (diff)
download	external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2