diff options
author | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
commit | dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch) | |
tree | dcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Target/ARM64 | |
parent | 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff) | |
download | external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2 |
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Target/ARM64')
84 files changed, 0 insertions, 58692 deletions
diff --git a/lib/Target/ARM64/ARM64.h b/lib/Target/ARM64/ARM64.h deleted file mode 100644 index f2c5e60..0000000 --- a/lib/Target/ARM64/ARM64.h +++ /dev/null @@ -1,48 +0,0 @@ -//===-- ARM64.h - Top-level interface for ARM64 representation --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// ARM64 back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef TARGET_ARM64_H -#define TARGET_ARM64_H - -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64MCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/DataTypes.h" - -namespace llvm { - -class ARM64TargetMachine; -class FunctionPass; -class MachineFunctionPass; - -FunctionPass *createARM64DeadRegisterDefinitions(); -FunctionPass *createARM64ConditionalCompares(); -FunctionPass *createARM64AdvSIMDScalar(); -FunctionPass *createARM64BranchRelaxation(); -FunctionPass *createARM64ISelDag(ARM64TargetMachine &TM, - CodeGenOpt::Level OptLevel); -FunctionPass *createARM64StorePairSuppressPass(); -FunctionPass *createARM64ExpandPseudoPass(); -FunctionPass *createARM64LoadStoreOptimizationPass(); -ModulePass *createARM64PromoteConstantPass(); -FunctionPass *createARM64AddressTypePromotionPass(); -/// \brief Creates an ARM-specific Target Transformation Info pass. -ImmutablePass *createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM); - -FunctionPass *createARM64CleanupLocalDynamicTLSPass(); - -FunctionPass *createARM64CollectLOHPass(); -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/ARM64.td b/lib/Target/ARM64/ARM64.td deleted file mode 100644 index 3eef8b2..0000000 --- a/lib/Target/ARM64/ARM64.td +++ /dev/null @@ -1,95 +0,0 @@ -//===- ARM64.td - Describe the ARM64 Target Machine --------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces which we are implementing -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// ARM64 Subtarget features. -// - -/// Cyclone has register move instructions which are "free". -def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", - "Has zereo-cycle register moves">; - -/// Cyclone has instructions which zero registers for "free". -def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", - "Has zero-cycle zeroing instructions">; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "ARM64RegisterInfo.td" -include "ARM64CallingConvention.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "ARM64Schedule.td" -include "ARM64InstrInfo.td" - -def ARM64InstrInfo : InstrInfo; - -//===----------------------------------------------------------------------===// -// ARM64 Processors supported. -// -include "ARM64SchedCyclone.td" - -def : ProcessorModel<"arm64-generic", NoSchedModel, []>; - -def : ProcessorModel<"cyclone", CycloneModel, [FeatureZCRegMove, FeatureZCZeroing]>; - -//===----------------------------------------------------------------------===// -// Assembly parser -//===----------------------------------------------------------------------===// - -def GenericAsmParserVariant : AsmParserVariant { - int Variant = 0; - string Name = "generic"; -} - -def AppleAsmParserVariant : AsmParserVariant { - int Variant = 1; - string Name = "apple-neon"; -} - -//===----------------------------------------------------------------------===// -// Assembly printer -//===----------------------------------------------------------------------===// -// ARM64 Uses the MC printer for asm output, so make sure the TableGen -// AsmWriter bits get associated with the correct class. -def GenericAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - int Variant = 0; - bit isMCAsmWriter = 1; -} - -def AppleAsmWriter : AsmWriter { - let AsmWriterClassName = "AppleInstPrinter"; - int Variant = 1; - int isMCAsmWriter = 1; -} - -//===----------------------------------------------------------------------===// -// Target Declaration -//===----------------------------------------------------------------------===// - -def ARM64 : Target { - let InstructionSet = ARM64InstrInfo; - let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant]; - let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter]; -} diff --git a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp b/lib/Target/ARM64/ARM64AddressTypePromotion.cpp deleted file mode 100644 index 72fa6af..0000000 --- a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp +++ /dev/null @@ -1,496 +0,0 @@ - -//===-- ARM64AddressTypePromotion.cpp --- Promote type for addr accesses -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass tries to promote the computations use to obtained a sign extended -// value used into memory accesses. -// E.g. -// a = add nsw i32 b, 3 -// d = sext i32 a to i64 -// e = getelementptr ..., i64 d -// -// => -// f = sext i32 b to i64 -// a = add nsw i64 f, 3 -// e = getelementptr ..., i64 a -// -// This is legal to do so if the computations are markers with either nsw or nuw -// markers. -// Moreover, the current heuristic is simple: it does not create new sext -// operations, i.e., it gives up when a sext would have forked (e.g., if -// a = add i32 b, c, two sexts are required to promote the computation). -// -// FIXME: This pass may be useful for other targets too. -// ===---------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-type-promotion" -#include "ARM64.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" - -using namespace llvm; - -static cl::opt<bool> -EnableAddressTypePromotion("arm64-type-promotion", cl::Hidden, - cl::desc("Enable the type promotion pass"), - cl::init(true)); -static cl::opt<bool> -EnableMerge("arm64-type-promotion-merge", cl::Hidden, - cl::desc("Enable merging of redundant sexts when one is dominating" - " the other."), - cl::init(true)); - -//===----------------------------------------------------------------------===// -// ARM64AddressTypePromotion -//===----------------------------------------------------------------------===// - -namespace llvm { -void initializeARM64AddressTypePromotionPass(PassRegistry &); -} - -namespace { -class ARM64AddressTypePromotion : public FunctionPass { - -public: - static char ID; - ARM64AddressTypePromotion() - : FunctionPass(ID), Func(NULL), ConsideredSExtType(NULL) { - initializeARM64AddressTypePromotionPass(*PassRegistry::getPassRegistry()); - } - - virtual const char *getPassName() const { - return "ARM64 Address Type Promotion"; - } - - /// Iterate over the functions and promote the computation of interesting - // sext instructions. - bool runOnFunction(Function &F); - -private: - /// The current function. - Function *Func; - /// Filter out all sexts that does not have this type. - /// Currently initialized with Int64Ty. - Type *ConsideredSExtType; - - // This transformation requires dominator info. - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - FunctionPass::getAnalysisUsage(AU); - } - - typedef SmallPtrSet<Instruction *, 32> SetOfInstructions; - typedef SmallVector<Instruction *, 16> Instructions; - typedef DenseMap<Value *, Instructions> ValueToInsts; - - /// Check if it is profitable to move a sext through this instruction. - /// Currently, we consider it is profitable if: - /// - Inst is used only once (no need to insert truncate). - /// - Inst has only one operand that will require a sext operation (we do - /// do not create new sext operation). - bool shouldGetThrough(const Instruction *Inst); - - /// Check if it is possible and legal to move a sext through this - /// instruction. - /// Current heuristic considers that we can get through: - /// - Arithmetic operation marked with the nsw or nuw flag. - /// - Other sext operation. - /// - Truncate operation if it was just dropping sign extended bits. - bool canGetThrough(const Instruction *Inst); - - /// Move sext operations through safe to sext instructions. - bool propagateSignExtension(Instructions &SExtInsts); - - /// Is this sext should be considered for code motion. - /// We look for sext with ConsideredSExtType and uses in at least one - // GetElementPtrInst. - bool shouldConsiderSExt(const Instruction *SExt) const; - - /// Collect all interesting sext operations, i.e., the ones with the right - /// type and used in memory accesses. - /// More precisely, a sext instruction is considered as interesting if it - /// is used in a "complex" getelementptr or it exits at least another - /// sext instruction that sign extended the same initial value. - /// A getelementptr is considered as "complex" if it has more than 2 - // operands. - void analyzeSExtension(Instructions &SExtInsts); - - /// Merge redundant sign extension operations in common dominator. - void mergeSExts(ValueToInsts &ValToSExtendedUses, - SetOfInstructions &ToRemove); -}; -} // end anonymous namespace. - -char ARM64AddressTypePromotion::ID = 0; - -INITIALIZE_PASS_BEGIN(ARM64AddressTypePromotion, "arm64-type-promotion", - "ARM64 Type Promotion Pass", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(ARM64AddressTypePromotion, "arm64-type-promotion", - "ARM64 Type Promotion Pass", false, false) - -FunctionPass *llvm::createARM64AddressTypePromotionPass() { - return new ARM64AddressTypePromotion(); -} - -bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) { - if (isa<SExtInst>(Inst)) - return true; - - const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); - if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && - (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) - return true; - - // sext(trunc(sext)) --> sext - if (isa<TruncInst>(Inst) && isa<SExtInst>(Inst->getOperand(0))) { - const Instruction *Opnd = cast<Instruction>(Inst->getOperand(0)); - // Check that the truncate just drop sign extended bits. - if (Inst->getType()->getIntegerBitWidth() >= - Opnd->getOperand(0)->getType()->getIntegerBitWidth() && - Inst->getOperand(0)->getType()->getIntegerBitWidth() <= - ConsideredSExtType->getIntegerBitWidth()) - return true; - } - - return false; -} - -bool ARM64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) { - // If the type of the sext is the same as the considered one, this sext - // will become useless. - // Otherwise, we will have to do something to preserve the original value, - // unless it is used once. - if (isa<SExtInst>(Inst) && - (Inst->getType() == ConsideredSExtType || Inst->hasOneUse())) - return true; - - // If the Inst is used more that once, we may need to insert truncate - // operations and we don't do that at the moment. - if (!Inst->hasOneUse()) - return false; - - // This truncate is used only once, thus if we can get thourgh, it will become - // useless. - if (isa<TruncInst>(Inst)) - return true; - - // If both operands are not constant, a new sext will be created here. - // Current heuristic is: each step should be profitable. - // Therefore we don't allow to increase the number of sext even if it may - // be profitable later on. - if (isa<BinaryOperator>(Inst) && isa<ConstantInt>(Inst->getOperand(1))) - return true; - - return false; -} - -static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { - if (isa<SelectInst>(Inst) && OpIdx == 0) - return false; - return true; -} - -bool -ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const { - if (SExt->getType() != ConsideredSExtType) - return false; - - for (Value::const_use_iterator UseIt = SExt->use_begin(), - EndUseIt = SExt->use_end(); - UseIt != EndUseIt; ++UseIt) { - if (isa<GetElementPtrInst>(*UseIt)) - return true; - } - - return false; -} - -// Input: -// - SExtInsts contains all the sext instructions that are use direclty in -// GetElementPtrInst, i.e., access to memory. -// Algorithm: -// - For each sext operation in SExtInsts: -// Let var be the operand of sext. -// while it is profitable (see shouldGetThrough), legal, and safe -// (see canGetThrough) to move sext through var's definition: -// * promote the type of var's definition. -// * fold var into sext uses. -// * move sext above var's definition. -// * update sext operand to use the operand of var that should be sign -// extended (by construction there is only one). -// -// E.g., -// a = ... i32 c, 3 -// b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a' -// ... -// = b -// => Yes, update the code -// b = sext i32 c to i64 -// a = ... i64 b, 3 -// ... -// = a -// Iterate on 'c'. -bool -ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { - DEBUG(dbgs() << "*** Propagate Sign Extension ***\n"); - - bool LocalChange = false; - SetOfInstructions ToRemove; - ValueToInsts ValToSExtendedUses; - while (!SExtInsts.empty()) { - // Get through simple chain. - Instruction *SExt = SExtInsts.pop_back_val(); - - DEBUG(dbgs() << "Consider:\n" << *SExt << '\n'); - - // If this SExt has already been merged continue. - if (SExt->use_empty() && ToRemove.count(SExt)) { - DEBUG(dbgs() << "No uses => marked as delete\n"); - continue; - } - - // Now try to get through the chain of definitions. - while (isa<Instruction>(SExt->getOperand(0))) { - Instruction *Inst = dyn_cast<Instruction>(SExt->getOperand(0)); - DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n'); - if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) { - // We cannot get through something that is not an Instruction - // or not safe to SExt. - DEBUG(dbgs() << "Cannot get through\n"); - break; - } - - LocalChange = true; - // If this is a sign extend, it becomes useless. - if (isa<SExtInst>(Inst) || isa<TruncInst>(Inst)) { - DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n"); - // We cannot use replaceAllUsesWith here because we may trigger some - // assertion on the type as all involved sext operation may have not - // been moved yet. - while (!Inst->use_empty()) { - Value::use_iterator UseIt = Inst->use_begin(); - Instruction *UseInst = dyn_cast<Instruction>(*UseIt); - assert(UseInst && "Use of sext is not an Instruction!"); - UseInst->setOperand(UseIt->getOperandNo(), SExt); - } - ToRemove.insert(Inst); - SExt->setOperand(0, Inst->getOperand(0)); - SExt->moveBefore(Inst); - continue; - } - - // Get through the Instruction: - // 1. Update its type. - // 2. Replace the uses of SExt by Inst. - // 3. Sign extend each operand that needs to be sign extended. - - // Step #1. - Inst->mutateType(SExt->getType()); - // Step #2. - SExt->replaceAllUsesWith(Inst); - // Step #3. - Instruction *SExtForOpnd = SExt; - - DEBUG(dbgs() << "Propagate SExt to operands\n"); - for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx; - ++OpIdx) { - DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n'); - if (Inst->getOperand(OpIdx)->getType() == SExt->getType() || - !shouldSExtOperand(Inst, OpIdx)) { - DEBUG(dbgs() << "No need to propagate\n"); - continue; - } - // Check if we can statically sign extend the operand. - Value *Opnd = Inst->getOperand(OpIdx); - if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { - DEBUG(dbgs() << "Statically sign extend\n"); - Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(), - Cst->getSExtValue())); - continue; - } - // UndefValue are typed, so we have to statically sign extend them. - if (isa<UndefValue>(Opnd)) { - DEBUG(dbgs() << "Statically sign extend\n"); - Inst->setOperand(OpIdx, UndefValue::get(SExt->getType())); - continue; - } - - // Otherwise we have to explicity sign extend it. - assert(SExtForOpnd && - "Only one operand should have been sign extended"); - - SExtForOpnd->setOperand(0, Opnd); - - DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n"); - // Move the sign extension before the insertion point. - SExtForOpnd->moveBefore(Inst); - Inst->setOperand(OpIdx, SExtForOpnd); - // If more sext are required, new instructions will have to be created. - SExtForOpnd = NULL; - } - if (SExtForOpnd == SExt) { - DEBUG(dbgs() << "Sign extension is useless now\n"); - ToRemove.insert(SExt); - break; - } - } - - // If the use is already of the right type, connect its uses to its argument - // and delete it. - // This can happen for an Instruction which all uses are sign extended. - if (!ToRemove.count(SExt) && - SExt->getType() == SExt->getOperand(0)->getType()) { - DEBUG(dbgs() << "Sign extension is useless, attach its use to " - "its argument\n"); - SExt->replaceAllUsesWith(SExt->getOperand(0)); - ToRemove.insert(SExt); - } else - ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt); - } - - if (EnableMerge) - mergeSExts(ValToSExtendedUses, ToRemove); - - // Remove all instructions marked as ToRemove. - for (Instruction *I: ToRemove) - I->eraseFromParent(); - return LocalChange; -} - -void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses, - SetOfInstructions &ToRemove) { - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - - for (auto &Entry: ValToSExtendedUses) { - Instructions &Insts = Entry.second; - Instructions CurPts; - for (Instruction *Inst : Insts) { - if (ToRemove.count(Inst)) - continue; - bool inserted = false; - for (auto Pt : CurPts) { - if (DT.dominates(Inst, Pt)) { - DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n" - << *Inst << '\n'); - (Pt)->replaceAllUsesWith(Inst); - ToRemove.insert(Pt); - Pt = Inst; - inserted = true; - break; - } - if (!DT.dominates(Pt, Inst)) - // Give up if we need to merge in a common dominator as the - // expermients show it is not profitable. - continue; - - DEBUG(dbgs() << "Replace all uses of:\n" << *Inst << "\nwith:\n" - << *Pt << '\n'); - Inst->replaceAllUsesWith(Pt); - ToRemove.insert(Inst); - inserted = true; - break; - } - if (!inserted) - CurPts.push_back(Inst); - } - } -} - -void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) { - DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n"); - - DenseMap<Value *, Instruction *> SeenChains; - - for (auto &BB : *Func) { - for (auto &II: BB) { - Instruction *SExt = &II; - - // Collect all sext operation per type. - if (!isa<SExtInst>(SExt) || !shouldConsiderSExt(SExt)) - continue; - - DEBUG(dbgs() << "Found:\n" << (*SExt) << '\n'); - - // Cases where we actually perform the optimization: - // 1. SExt is used in a getelementptr with more than 2 operand => - // likely we can merge some computation if they are done on 64 bits. - // 2. The beginning of the SExt chain is SExt several time. => - // code sharing is possible. - - bool insert = false; - // #1. - for (Value::use_iterator UseIt = SExt->use_begin(), - EndUseIt = SExt->use_end(); - UseIt != EndUseIt; ++UseIt) { - const Instruction *Inst = dyn_cast<GetElementPtrInst>(*UseIt); - if (Inst && Inst->getNumOperands() > 2) { - DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst - << '\n'); - insert = true; - break; - } - } - - // #2. - // Check the head of the chain. - Instruction *Inst = SExt; - Value *Last; - do { - int OpdIdx = 0; - const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); - if (BinOp && isa<ConstantInt>(BinOp->getOperand(0))) - OpdIdx = 1; - Last = Inst->getOperand(OpdIdx); - Inst = dyn_cast<Instruction>(Last); - } while (Inst && canGetThrough(Inst) && shouldGetThrough(Inst)); - - DEBUG(dbgs() << "Head of the chain:\n" << *Last << '\n'); - DenseMap<Value *, Instruction *>::iterator AlreadySeen = - SeenChains.find(Last); - if (insert || AlreadySeen != SeenChains.end()) { - DEBUG(dbgs() << "Insert\n"); - SExtInsts.push_back(SExt); - if (AlreadySeen != SeenChains.end() && AlreadySeen->second != NULL) { - DEBUG(dbgs() << "Insert chain member\n"); - SExtInsts.push_back(AlreadySeen->second); - SeenChains[Last] = NULL; - } - } else { - DEBUG(dbgs() << "Record its chain membership\n"); - SeenChains[Last] = SExt; - } - } - } -} - -bool ARM64AddressTypePromotion::runOnFunction(Function &F) { - if (!EnableAddressTypePromotion || F.isDeclaration()) - return false; - Func = &F; - ConsideredSExtType = Type::getInt64Ty(Func->getContext()); - - DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n'); - - Instructions SExtInsts; - analyzeSExtension(SExtInsts); - return propagateSignExtension(SExtInsts); -} diff --git a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp b/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp deleted file mode 100644 index 83f8cda..0000000 --- a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp +++ /dev/null @@ -1,392 +0,0 @@ -//===-- ARM64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// When profitable, replace GPR targeting i64 instructions with their -// AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined -// as minimizing the number of cross-class register copies. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// TODO: Graph based predicate heuristics. -// Walking the instruction list linearly will get many, perhaps most, of -// the cases, but to do a truly throrough job of this, we need a more -// wholistic approach. -// -// This optimization is very similar in spirit to the register allocator's -// spill placement, only here we're determining where to place cross-class -// register copies rather than spills. As such, a similar approach is -// called for. -// -// We want to build up a set of graphs of all instructions which are candidates -// for transformation along with instructions which generate their inputs and -// consume their outputs. For each edge in the graph, we assign a weight -// based on whether there is a copy required there (weight zero if not) and -// the block frequency of the block containing the defining or using -// instruction, whichever is less. Our optimization is then a graph problem -// to minimize the total weight of all the graphs, then transform instructions -// and add or remove copy instructions as called for to implement the -// solution. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-simd-scalar" -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64RegisterInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -static cl::opt<bool> -AdvSIMDScalar("arm64-simd-scalar", - cl::desc("enable use of AdvSIMD scalar integer instructions"), - cl::init(false), cl::Hidden); -// Allow forcing all i64 operations with equivalent SIMD instructions to use -// them. For stress-testing the transformation function. -static cl::opt<bool> -TransformAll("arm64-simd-scalar-force-all", - cl::desc("Force use of AdvSIMD scalar instructions everywhere"), - cl::init(false), cl::Hidden); - -STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used"); -STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted"); -STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted"); - -namespace { -class ARM64AdvSIMDScalar : public MachineFunctionPass { - MachineRegisterInfo *MRI; - const ARM64InstrInfo *TII; - -private: - // isProfitableToTransform - Predicate function to determine whether an - // instruction should be transformed to its equivalent AdvSIMD scalar - // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example. - bool isProfitableToTransform(const MachineInstr *MI) const; - - // tranformInstruction - Perform the transformation of an instruction - // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs - // to be the correct register class, minimizing cross-class copies. - void transformInstruction(MachineInstr *MI); - - // processMachineBasicBlock - Main optimzation loop. - bool processMachineBasicBlock(MachineBasicBlock *MBB); - -public: - static char ID; // Pass identification, replacement for typeid. - explicit ARM64AdvSIMDScalar() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &F); - - const char *getPassName() const { - return "AdvSIMD scalar operation optimization"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; -char ARM64AdvSIMDScalar::ID = 0; -} // end anonymous namespace - -static bool isGPR64(unsigned Reg, unsigned SubReg, - const MachineRegisterInfo *MRI) { - if (SubReg) - return false; - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::GPR64RegClass); - return ARM64::GPR64RegClass.contains(Reg); -} - -static bool isFPR64(unsigned Reg, unsigned SubReg, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR64RegClass) && - SubReg == 0) || - (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) && - SubReg == ARM64::dsub); - // Physical register references just check the regist class directly. - return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) || - (ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub); -} - -// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64 -// copy instruction. Return zero_reg if the instruction is not a copy. -static unsigned getSrcFromCopy(const MachineInstr *MI, - const MachineRegisterInfo *MRI, - unsigned &SubReg) { - SubReg = 0; - // The "FMOV Xd, Dn" instruction is the typical form. - if (MI->getOpcode() == ARM64::FMOVDXr || MI->getOpcode() == ARM64::FMOVXDr) - return MI->getOperand(1).getReg(); - // A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see - // these at this stage, but it's easy to check for. - if (MI->getOpcode() == ARM64::UMOVvi64 && MI->getOperand(2).getImm() == 0) { - SubReg = ARM64::dsub; - return MI->getOperand(1).getReg(); - } - // Or just a plain COPY instruction. This can be directly to/from FPR64, - // or it can be a dsub subreg reference to an FPR128. - if (MI->getOpcode() == ARM64::COPY) { - if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(), - MRI) && - isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI)) - return MI->getOperand(1).getReg(); - if (isGPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(), - MRI) && - isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), - MRI)) { - SubReg = ARM64::dsub; - return MI->getOperand(1).getReg(); - } - } - - // Otherwise, this is some other kind of instruction. - return 0; -} - -// getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent -// that we're considering transforming to, return that AdvSIMD opcode. For all -// others, return the original opcode. -static int getTransformOpcode(unsigned Opc) { - switch (Opc) { - default: - break; - // FIXME: Lots more possibilities. - case ARM64::ADDXrr: - return ARM64::ADDv1i64; - case ARM64::SUBXrr: - return ARM64::SUBv1i64; - } - // No AdvSIMD equivalent, so just return the original opcode. - return Opc; -} - -static bool isTransformable(const MachineInstr *MI) { - int Opc = MI->getOpcode(); - return Opc != getTransformOpcode(Opc); -} - -// isProfitableToTransform - Predicate function to determine whether an -// instruction should be transformed to its equivalent AdvSIMD scalar -// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example. -bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const { - // If this instruction isn't eligible to be transformed (no SIMD equivalent), - // early exit since that's the common case. - if (!isTransformable(MI)) - return false; - - // Count the number of copies we'll need to add and approximate the number - // of copies that a transform will enable us to remove. - unsigned NumNewCopies = 3; - unsigned NumRemovableCopies = 0; - - unsigned OrigSrc0 = MI->getOperand(1).getReg(); - unsigned OrigSrc1 = MI->getOperand(2).getReg(); - unsigned Src0 = 0, SubReg0; - unsigned Src1 = 0, SubReg1; - if (!MRI->def_empty(OrigSrc0)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc0); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - Src0 = getSrcFromCopy(&*Def, MRI, SubReg0); - // If the source was from a copy, we don't need to insert a new copy. - if (Src0) - --NumNewCopies; - // If there are no other users of the original source, we can delete - // that instruction. - if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) - ++NumRemovableCopies; - } - if (!MRI->def_empty(OrigSrc1)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc1); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - Src1 = getSrcFromCopy(&*Def, MRI, SubReg1); - if (Src1) - --NumNewCopies; - // If there are no other users of the original source, we can delete - // that instruction. - if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) - ++NumRemovableCopies; - } - - // If any of the uses of the original instructions is a cross class copy, - // that's a copy that will be removable if we transform. Likewise, if - // any of the uses is a transformable instruction, it's likely the tranforms - // will chain, enabling us to save a copy there, too. This is an aggressive - // heuristic that approximates the graph based cost analysis described above. - unsigned Dst = MI->getOperand(0).getReg(); - bool AllUsesAreCopies = true; - for (MachineRegisterInfo::use_instr_nodbg_iterator - Use = MRI->use_instr_nodbg_begin(Dst), - E = MRI->use_instr_nodbg_end(); - Use != E; ++Use) { - unsigned SubReg; - if (getSrcFromCopy(&*Use, MRI, SubReg) || isTransformable(&*Use)) - ++NumRemovableCopies; - // If the use is an INSERT_SUBREG, that's still something that can - // directly use the FPR64, so we don't invalidate AllUsesAreCopies. It's - // preferable to have it use the FPR64 in most cases, as if the source - // vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely. - // Ditto for a lane insert. - else if (Use->getOpcode() == ARM64::INSERT_SUBREG || - Use->getOpcode() == ARM64::INSvi64gpr) - ; - else - AllUsesAreCopies = false; - } - // If all of the uses of the original destination register are copies to - // FPR64, then we won't end up having a new copy back to GPR64 either. - if (AllUsesAreCopies) - --NumNewCopies; - - // If a tranform will not increase the number of cross-class copies required, - // return true. - if (NumNewCopies <= NumRemovableCopies) - return true; - - // Finally, even if we otherwise wouldn't transform, check if we're forcing - // transformation of everything. - return TransformAll; -} - -static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI, - unsigned Dst, unsigned Src, bool IsKill) { - MachineInstrBuilder MIB = - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(ARM64::COPY), - Dst) - .addReg(Src, getKillRegState(IsKill)); - DEBUG(dbgs() << " adding copy: " << *MIB); - ++NumCopiesInserted; - return MIB; -} - -// tranformInstruction - Perform the transformation of an instruction -// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs -// to be the correct register class, minimizing cross-class copies. -void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) { - DEBUG(dbgs() << "Scalar transform: " << *MI); - - MachineBasicBlock *MBB = MI->getParent(); - int OldOpc = MI->getOpcode(); - int NewOpc = getTransformOpcode(OldOpc); - assert(OldOpc != NewOpc && "transform an instruction to itself?!"); - - // Check if we need a copy for the source registers. - unsigned OrigSrc0 = MI->getOperand(1).getReg(); - unsigned OrigSrc1 = MI->getOperand(2).getReg(); - unsigned Src0 = 0, SubReg0; - unsigned Src1 = 0, SubReg1; - if (!MRI->def_empty(OrigSrc0)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc0); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - Src0 = getSrcFromCopy(&*Def, MRI, SubReg0); - // If there are no other users of the original source, we can delete - // that instruction. - if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) { - assert(Src0 && "Can't delete copy w/o a valid original source!"); - Def->eraseFromParent(); - ++NumCopiesDeleted; - } - } - if (!MRI->def_empty(OrigSrc1)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc1); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - Src1 = getSrcFromCopy(&*Def, MRI, SubReg1); - // If there are no other users of the original source, we can delete - // that instruction. - if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) { - assert(Src1 && "Can't delete copy w/o a valid original source!"); - Def->eraseFromParent(); - ++NumCopiesDeleted; - } - } - // If we weren't able to reference the original source directly, create a - // copy. - if (!Src0) { - SubReg0 = 0; - Src0 = MRI->createVirtualRegister(&ARM64::FPR64RegClass); - insertCopy(TII, MI, Src0, OrigSrc0, true); - } - if (!Src1) { - SubReg1 = 0; - Src1 = MRI->createVirtualRegister(&ARM64::FPR64RegClass); - insertCopy(TII, MI, Src1, OrigSrc1, true); - } - - // Create a vreg for the destination. - // FIXME: No need to do this if the ultimate user expects an FPR64. - // Check for that and avoid the copy if possible. - unsigned Dst = MRI->createVirtualRegister(&ARM64::FPR64RegClass); - - // For now, all of the new instructions have the same simple three-register - // form, so no need to special case based on what instruction we're - // building. - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(NewOpc), Dst) - .addReg(Src0, getKillRegState(true), SubReg0) - .addReg(Src1, getKillRegState(true), SubReg1); - - // Now copy the result back out to a GPR. - // FIXME: Try to avoid this if all uses could actually just use the FPR64 - // directly. - insertCopy(TII, MI, MI->getOperand(0).getReg(), Dst, true); - - // Erase the old instruction. - MI->eraseFromParent(); - - ++NumScalarInsnsUsed; -} - -// processMachineBasicBlock - Main optimzation loop. -bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) { - bool Changed = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { - MachineInstr *MI = I; - ++I; - if (isProfitableToTransform(MI)) { - transformInstruction(MI); - Changed = true; - } - } - return Changed; -} - -// runOnMachineFunction - Pass entry point from PassManager. -bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { - // Early exit if pass disabled. - if (!AdvSIMDScalar) - return false; - - bool Changed = false; - DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n"); - - const TargetMachine &TM = mf.getTarget(); - MRI = &mf.getRegInfo(); - TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo()); - - // Just check things on a one-block-at-a-time basis. - for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) - if (processMachineBasicBlock(I)) - Changed = true; - return Changed; -} - -// createARM64AdvSIMDScalar - Factory function used by ARM64TargetMachine -// to add the pass to the PassManager. -FunctionPass *llvm::createARM64AdvSIMDScalar() { - return new ARM64AdvSIMDScalar(); -} diff --git a/lib/Target/ARM64/ARM64AsmPrinter.cpp b/lib/Target/ARM64/ARM64AsmPrinter.cpp deleted file mode 100644 index d0aa6af..0000000 --- a/lib/Target/ARM64/ARM64AsmPrinter.cpp +++ /dev/null @@ -1,563 +0,0 @@ -//===-- ARM64AsmPrinter.cpp - ARM64 LLVM assembly writer ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to the ARM64 assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "ARM64.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64MCInstLower.h" -#include "ARM64RegisterInfo.h" -#include "InstPrinter/ARM64InstPrinter.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/StackMaps.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstBuilder.h" -#include "llvm/MC/MCLinkerOptimizationHint.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -namespace { - -class ARM64AsmPrinter : public AsmPrinter { - ARM64MCInstLower MCInstLowering; - StackMaps SM; - -public: - ARM64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), MCInstLowering(OutContext, *Mang, *this), - SM(*this), ARM64FI(NULL), LOHLabelCounter(0) {} - - virtual const char *getPassName() const { return "ARM64 Assembly Printer"; } - - /// \brief Wrapper for MCInstLowering.lowerOperand() for the - /// tblgen'erated pseudo lowering. - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { - return MCInstLowering.lowerOperand(MO, MCOp); - } - - void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI); - void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI); - /// \brief tblgen'erated driver function for lowering simple MI->MC - /// pseudo instructions. - bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, - const MachineInstr *MI); - - void EmitInstruction(const MachineInstr *MI); - - void getAnalysisUsage(AnalysisUsage &AU) const { - AsmPrinter::getAnalysisUsage(AU); - AU.setPreservesAll(); - } - - bool runOnMachineFunction(MachineFunction &F) { - ARM64FI = F.getInfo<ARM64FunctionInfo>(); - return AsmPrinter::runOnMachineFunction(F); - } - -private: - MachineLocation getDebugValueLocation(const MachineInstr *MI) const; - void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O); - bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O); - bool printAsmRegInClass(const MachineOperand &MO, - const TargetRegisterClass *RC, bool isVector, - raw_ostream &O); - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - - void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - - void EmitFunctionBodyEnd(); - - MCSymbol *GetCPISymbol(unsigned CPID) const; - void EmitEndOfAsmFile(Module &M); - ARM64FunctionInfo *ARM64FI; - - /// \brief Emit the LOHs contained in ARM64FI. - void EmitLOHs(); - - typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol; - MInstToMCSymbol LOHInstToLabel; - unsigned LOHLabelCounter; -}; - -} // end of anonymous namespace - -//===----------------------------------------------------------------------===// - -void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) { - // Funny Darwin hack: This flag tells the linker that no global symbols - // contain code that falls through to other global symbols (e.g. the obvious - // implementation of multiple entry points). If this doesn't occur, the - // linker can safely perform dead code stripping. Since LLVM never - // generates code that does this, it is always safe to set. - OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); - SM.serializeToStackMapSection(); -} - -MachineLocation -ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { - MachineLocation Location; - assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - else { - DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); - } - return Location; -} - -void ARM64AsmPrinter::EmitLOHs() { - SmallVector<MCSymbol *, 3> MCArgs; - - for (const auto &D : ARM64FI->getLOHContainer()) { - for (const MachineInstr *MI : D.getArgs()) { - MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(MI); - assert(LabelIt != LOHInstToLabel.end() && - "Label hasn't been inserted for LOH related instruction"); - MCArgs.push_back(LabelIt->second); - } - OutStreamer.EmitLOHDirective(D.getKind(), MCArgs); - MCArgs.clear(); - } -} - -void ARM64AsmPrinter::EmitFunctionBodyEnd() { - if (!ARM64FI->getLOHRelated().empty()) - EmitLOHs(); -} - -/// GetCPISymbol - Return the symbol for the specified constant pool entry. -MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const { - // Darwin uses a linker-private symbol name for constant-pools (to - // avoid addends on the relocation?), ELF has no such concept and - // uses a normal private symbol. - if (getDataLayout().getLinkerPrivateGlobalPrefix()[0]) - return OutContext.GetOrCreateSymbol( - Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" + - Twine(getFunctionNumber()) + "_" + Twine(CPID)); - - return OutContext.GetOrCreateSymbol( - Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" + - Twine(getFunctionNumber()) + "_" + Twine(CPID)); -} - -void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - default: - assert(0 && "<unknown operand type>"); - case MachineOperand::MO_Register: { - unsigned Reg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); - assert(!MO.getSubReg() && "Subregs should be eliminated!"); - O << ARM64InstPrinter::getRegisterName(Reg); - break; - } - case MachineOperand::MO_Immediate: { - int64_t Imm = MO.getImm(); - O << '#' << Imm; - break; - } - } -} - -bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, - raw_ostream &O) { - unsigned Reg = MO.getReg(); - switch (Mode) { - default: - return true; // Unknown mode. - case 'w': - Reg = getWRegFromXReg(Reg); - break; - case 'x': - Reg = getXRegFromWReg(Reg); - break; - } - - O << ARM64InstPrinter::getRegisterName(Reg); - return false; -} - -// Prints the register in MO using class RC using the offset in the -// new register class. This should not be used for cross class -// printing. -bool ARM64AsmPrinter::printAsmRegInClass(const MachineOperand &MO, - const TargetRegisterClass *RC, - bool isVector, raw_ostream &O) { - assert(MO.isReg() && "Should only get here with a register!"); - const ARM64RegisterInfo *RI = - static_cast<const ARM64RegisterInfo *>(TM.getRegisterInfo()); - unsigned Reg = MO.getReg(); - unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg)); - assert(RI->regsOverlap(RegToPrint, Reg)); - O << ARM64InstPrinter::getRegisterName( - RegToPrint, isVector ? ARM64::vreg : ARM64::NoRegAltName); - return false; -} - -bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNum); - // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) - return true; // Unknown modifier. - - switch (ExtraCode[0]) { - default: - return true; // Unknown modifier. - case 'w': // Print W register - case 'x': // Print X register - if (MO.isReg()) - return printAsmMRegister(MO, ExtraCode[0], O); - if (MO.isImm() && MO.getImm() == 0) { - unsigned Reg = ExtraCode[0] == 'w' ? ARM64::WZR : ARM64::XZR; - O << ARM64InstPrinter::getRegisterName(Reg); - return false; - } - printOperand(MI, OpNum, O); - return false; - case 'b': // Print B register. - case 'h': // Print H register. - case 's': // Print S register. - case 'd': // Print D register. - case 'q': // Print Q register. - if (MO.isReg()) { - const TargetRegisterClass *RC; - switch (ExtraCode[0]) { - case 'b': - RC = &ARM64::FPR8RegClass; - break; - case 'h': - RC = &ARM64::FPR16RegClass; - break; - case 's': - RC = &ARM64::FPR32RegClass; - break; - case 'd': - RC = &ARM64::FPR64RegClass; - break; - case 'q': - RC = &ARM64::FPR128RegClass; - break; - default: - return true; - } - return printAsmRegInClass(MO, RC, false /* vector */, O); - } - printOperand(MI, OpNum, O); - return false; - } - } - - // According to ARM, we should emit x and v registers unless we have a - // modifier. - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - - // If this is a w or x register, print an x register. - if (ARM64::GPR32allRegClass.contains(Reg) || - ARM64::GPR64allRegClass.contains(Reg)) - return printAsmMRegister(MO, 'x', O); - - // If this is a b, h, s, d, or q register, print it as a v register. - return printAsmRegInClass(MO, &ARM64::FPR128RegClass, true /* vector */, O); - } - - printOperand(MI, OpNum, O); - return false; -} - -bool ARM64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNum, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - - const MachineOperand &MO = MI->getOperand(OpNum); - assert(MO.isReg() && "unexpected inline asm memory operand"); - O << "[" << ARM64InstPrinter::getRegisterName(MO.getReg()) << "]"; - return false; -} - -void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &OS) { - unsigned NOps = MI->getNumOperands(); - assert(NOps == 4); - OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata())); - OS << V.getName(); - OS << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - OS << '['; - printOperand(MI, 0, OS); - OS << '+'; - printOperand(MI, 1, OS); - OS << ']'; - OS << "+"; - printOperand(MI, NOps - 2, OS); -} - -void ARM64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI) { - unsigned NumNOPBytes = MI.getOperand(1).getImm(); - - SM.recordStackMap(MI); - // Emit padding. - assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); - for (unsigned i = 0; i < NumNOPBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0)); -} - -// Lower a patchpoint of the form: -// [<def>], <id>, <numBytes>, <target>, <numArgs> -void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI) { - SM.recordPatchPoint(MI); - - PatchPointOpers Opers(&MI); - - int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm(); - unsigned EncodedBytes = 0; - if (CallTarget) { - assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && - "High 16 bits of call target should be zero."); - unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); - EncodedBytes = 16; - // Materialize the jump address: - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVZWi) - .addReg(ScratchReg) - .addImm((CallTarget >> 32) & 0xFFFF) - .addImm(32)); - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi) - .addReg(ScratchReg) - .addReg(ScratchReg) - .addImm((CallTarget >> 16) & 0xFFFF) - .addImm(16)); - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi) - .addReg(ScratchReg) - .addReg(ScratchReg) - .addImm(CallTarget & 0xFFFF) - .addImm(0)); - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::BLR).addReg(ScratchReg)); - } - // Emit padding. - unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); - assert(NumBytes >= EncodedBytes && - "Patchpoint can't request size less than the length of a call."); - assert((NumBytes - EncodedBytes) % 4 == 0 && - "Invalid number of NOP bytes requested!"); - for (unsigned i = EncodedBytes; i < NumBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0)); -} - -// Simple pseudo-instructions have their lowering (with expansion to real -// instructions) auto-generated. -#include "ARM64GenMCPseudoLowering.inc" - -static unsigned getRealIndexedOpcode(unsigned Opc) { - switch (Opc) { - case ARM64::LDRXpre_isel: return ARM64::LDRXpre; - case ARM64::LDRWpre_isel: return ARM64::LDRWpre; - case ARM64::LDRDpre_isel: return ARM64::LDRDpre; - case ARM64::LDRSpre_isel: return ARM64::LDRSpre; - case ARM64::LDRBBpre_isel: return ARM64::LDRBBpre; - case ARM64::LDRHHpre_isel: return ARM64::LDRHHpre; - case ARM64::LDRSBWpre_isel: return ARM64::LDRSBWpre; - case ARM64::LDRSBXpre_isel: return ARM64::LDRSBXpre; - case ARM64::LDRSHWpre_isel: return ARM64::LDRSHWpre; - case ARM64::LDRSHXpre_isel: return ARM64::LDRSHXpre; - case ARM64::LDRSWpre_isel: return ARM64::LDRSWpre; - - case ARM64::LDRDpost_isel: return ARM64::LDRDpost; - case ARM64::LDRSpost_isel: return ARM64::LDRSpost; - case ARM64::LDRXpost_isel: return ARM64::LDRXpost; - case ARM64::LDRWpost_isel: return ARM64::LDRWpost; - case ARM64::LDRHHpost_isel: return ARM64::LDRHHpost; - case ARM64::LDRBBpost_isel: return ARM64::LDRBBpost; - case ARM64::LDRSWpost_isel: return ARM64::LDRSWpost; - case ARM64::LDRSHWpost_isel: return ARM64::LDRSHWpost; - case ARM64::LDRSHXpost_isel: return ARM64::LDRSHXpost; - case ARM64::LDRSBWpost_isel: return ARM64::LDRSBWpost; - case ARM64::LDRSBXpost_isel: return ARM64::LDRSBXpost; - - case ARM64::STRXpre_isel: return ARM64::STRXpre; - case ARM64::STRWpre_isel: return ARM64::STRWpre; - case ARM64::STRHHpre_isel: return ARM64::STRHHpre; - case ARM64::STRBBpre_isel: return ARM64::STRBBpre; - case ARM64::STRDpre_isel: return ARM64::STRDpre; - case ARM64::STRSpre_isel: return ARM64::STRSpre; - } - llvm_unreachable("Unexpected pre-indexed opcode!"); -} - -void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { - // Do any auto-generated pseudo lowerings. - if (emitPseudoExpansionLowering(OutStreamer, MI)) - return; - - if (ARM64FI->getLOHRelated().count(MI)) { - // Generate a label for LOH related instruction - MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++); - // Associate the instruction with the label - LOHInstToLabel[MI] = LOHLabel; - OutStreamer.EmitLabel(LOHLabel); - } - - // Do any manual lowerings. - switch (MI->getOpcode()) { - default: - break; - case ARM64::DBG_VALUE: { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - SmallString<128> TmpStr; - raw_svector_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; - } - // Indexed loads and stores use a pseudo to handle complex operand - // tricks and writeback to the base register. We strip off the writeback - // operand and switch the opcode here. Post-indexed stores were handled by the - // tablegen'erated pseudos above. (The complex operand <--> simple - // operand isel is beyond tablegen's ability, so we do these manually). - case ARM64::LDRHHpre_isel: - case ARM64::LDRBBpre_isel: - case ARM64::LDRXpre_isel: - case ARM64::LDRWpre_isel: - case ARM64::LDRDpre_isel: - case ARM64::LDRSpre_isel: - case ARM64::LDRSBWpre_isel: - case ARM64::LDRSBXpre_isel: - case ARM64::LDRSHWpre_isel: - case ARM64::LDRSHXpre_isel: - case ARM64::LDRSWpre_isel: - case ARM64::LDRDpost_isel: - case ARM64::LDRSpost_isel: - case ARM64::LDRXpost_isel: - case ARM64::LDRWpost_isel: - case ARM64::LDRHHpost_isel: - case ARM64::LDRBBpost_isel: - case ARM64::LDRSWpost_isel: - case ARM64::LDRSHWpost_isel: - case ARM64::LDRSHXpost_isel: - case ARM64::LDRSBWpost_isel: - case ARM64::LDRSBXpost_isel: { - MCInst TmpInst; - // For loads, the writeback operand to be skipped is the second. - TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg())); - TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); - EmitToStreamer(OutStreamer, TmpInst); - return; - } - case ARM64::STRXpre_isel: - case ARM64::STRWpre_isel: - case ARM64::STRHHpre_isel: - case ARM64::STRBBpre_isel: - case ARM64::STRDpre_isel: - case ARM64::STRSpre_isel: { - MCInst TmpInst; - // For loads, the writeback operand to be skipped is the first. - TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg())); - TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); - EmitToStreamer(OutStreamer, TmpInst); - return; - } - - // Tail calls use pseudo instructions so they have the proper code-gen - // attributes (isCall, isReturn, etc.). We lower them to the real - // instruction here. - case ARM64::TCRETURNri: { - MCInst TmpInst; - TmpInst.setOpcode(ARM64::BR); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - EmitToStreamer(OutStreamer, TmpInst); - return; - } - case ARM64::TCRETURNdi: { - MCOperand Dest; - MCInstLowering.lowerOperand(MI->getOperand(0), Dest); - MCInst TmpInst; - TmpInst.setOpcode(ARM64::B); - TmpInst.addOperand(Dest); - EmitToStreamer(OutStreamer, TmpInst); - return; - } - case ARM64::TLSDESC_BLR: { - MCOperand Callee, Sym; - MCInstLowering.lowerOperand(MI->getOperand(0), Callee); - MCInstLowering.lowerOperand(MI->getOperand(1), Sym); - - // First emit a relocation-annotation. This expands to no code, but requests - // the following instruction gets an R_AARCH64_TLSDESC_CALL. - MCInst TLSDescCall; - TLSDescCall.setOpcode(ARM64::TLSDESCCALL); - TLSDescCall.addOperand(Sym); - EmitToStreamer(OutStreamer, TLSDescCall); - - // Other than that it's just a normal indirect call to the function loaded - // from the descriptor. - MCInst BLR; - BLR.setOpcode(ARM64::BLR); - BLR.addOperand(Callee); - EmitToStreamer(OutStreamer, BLR); - - return; - } - - case TargetOpcode::STACKMAP: - return LowerSTACKMAP(OutStreamer, SM, *MI); - - case TargetOpcode::PATCHPOINT: - return LowerPATCHPOINT(OutStreamer, SM, *MI); - } - - // Finally, do the automated lowerings for everything else. - MCInst TmpInst; - MCInstLowering.Lower(MI, TmpInst); - EmitToStreamer(OutStreamer, TmpInst); -} - -// Force static initialization. -extern "C" void LLVMInitializeARM64AsmPrinter() { - RegisterAsmPrinter<ARM64AsmPrinter> X(TheARM64Target); -} diff --git a/lib/Target/ARM64/ARM64BranchRelaxation.cpp b/lib/Target/ARM64/ARM64BranchRelaxation.cpp deleted file mode 100644 index a9bbef5..0000000 --- a/lib/Target/ARM64/ARM64BranchRelaxation.cpp +++ /dev/null @@ -1,505 +0,0 @@ -//===-- ARM64BranchRelaxation.cpp - ARM64 branch relaxation ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-branch-relax" -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -using namespace llvm; - -static cl::opt<bool> -BranchRelaxation("arm64-branch-relax", cl::Hidden, cl::init(true), - cl::desc("Relax out of range conditional branches")); - -static cl::opt<unsigned> -TBZDisplacementBits("arm64-tbz-offset-bits", cl::Hidden, cl::init(14), - cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); - -static cl::opt<unsigned> -CBZDisplacementBits("arm64-cbz-offset-bits", cl::Hidden, cl::init(19), - cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); - -static cl::opt<unsigned> -BCCDisplacementBits("arm64-bcc-offset-bits", cl::Hidden, cl::init(19), - cl::desc("Restrict range of Bcc instructions (DEBUG)")); - -STATISTIC(NumSplit, "Number of basic blocks split"); -STATISTIC(NumRelaxed, "Number of conditional branches relaxed"); - -namespace { -class ARM64BranchRelaxation : public MachineFunctionPass { - /// BasicBlockInfo - Information about the offset and size of a single - /// basic block. - struct BasicBlockInfo { - /// Offset - Distance from the beginning of the function to the beginning - /// of this basic block. - /// - /// The offset is always aligned as required by the basic block. - unsigned Offset; - - /// Size - Size of the basic block in bytes. If the block contains - /// inline assembly, this is a worst case estimate. - /// - /// The size does not include any alignment padding whether from the - /// beginning of the block, or from an aligned jump table at the end. - unsigned Size; - - BasicBlockInfo() : Offset(0), Size(0) {} - - /// Compute the offset immediately following this block. If LogAlign is - /// specified, return the offset the successor block will get if it has - /// this alignment. - unsigned postOffset(unsigned LogAlign = 0) const { - unsigned PO = Offset + Size; - unsigned Align = 1 << LogAlign; - return (PO + Align - 1) / Align * Align; - } - }; - - SmallVector<BasicBlockInfo, 16> BlockInfo; - - MachineFunction *MF; - const ARM64InstrInfo *TII; - - bool relaxBranchInstructions(); - void scanFunction(); - MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); - void adjustBlockOffsets(MachineBasicBlock *BB); - bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool fixupConditionalBranch(MachineInstr *MI); - void computeBlockSize(MachineBasicBlock *MBB); - unsigned getInstrOffset(MachineInstr *MI) const; - void dumpBBs(); - void verify(); - -public: - static char ID; - ARM64BranchRelaxation() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "ARM64 branch relaxation pass"; - } -}; -char ARM64BranchRelaxation::ID = 0; -} - -/// verify - check BBOffsets, BBSizes, alignment of islands -void ARM64BranchRelaxation::verify() { -#ifndef NDEBUG - unsigned PrevNum = MF->begin()->getNumber(); - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; - ++MBBI) { - MachineBasicBlock *MBB = MBBI; - unsigned Align = MBB->getAlignment(); - unsigned Num = MBB->getNumber(); - assert(BlockInfo[Num].Offset % (1u << Align) == 0); - assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset); - PrevNum = Num; - } -#endif -} - -/// print block size and offset information - debugging -void ARM64BranchRelaxation::dumpBBs() { - for (auto &MBB: *MF) { - const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()]; - dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) - << format("size=%#x\n", BBI.Size); - } -} - -/// BBHasFallthrough - Return true if the specified basic block can fallthrough -/// into the block immediately after it. -static bool BBHasFallthrough(MachineBasicBlock *MBB) { - // Get the next machine basic block in the function. - MachineFunction::iterator MBBI = MBB; - // Can't fall off end of function. - if (std::next(MBBI) == MBB->getParent()->end()) - return false; - - MachineBasicBlock *NextBB = std::next(MBBI); - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); - I != E; ++I) - if (*I == NextBB) - return true; - - return false; -} - -/// scanFunction - Do the initial scan of the function, building up -/// information about each block. -void ARM64BranchRelaxation::scanFunction() { - BlockInfo.clear(); - BlockInfo.resize(MF->getNumBlockIDs()); - - // First thing, compute the size of all basic blocks, and see if the function - // has any inline assembly in it. If so, we have to be conservative about - // alignment assumptions, as we don't know for sure the size of any - // instructions in the inline assembly. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - computeBlockSize(I); - - // Compute block offsets and known bits. - adjustBlockOffsets(MF->begin()); -} - -/// computeBlockSize - Compute the size for MBB. -/// This function updates BlockInfo directly. -void ARM64BranchRelaxation::computeBlockSize(MachineBasicBlock *MBB) { - unsigned Size = 0; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) - Size += TII->GetInstSizeInBytes(I); - BlockInfo[MBB->getNumber()].Size = Size; -} - -/// getInstrOffset - Return the current offset of the specified machine -/// instruction from the start of the function. This offset changes as stuff is -/// moved around inside the function. -unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const { - MachineBasicBlock *MBB = MI->getParent(); - - // The offset is composed of two things: the sum of the sizes of all MBB's - // before this instruction's block, and the offset from the start of the block - // it is in. - unsigned Offset = BlockInfo[MBB->getNumber()].Offset; - - // Sum instructions before MI in MBB. - for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { - assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - Offset += TII->GetInstSizeInBytes(I); - } - return Offset; -} - -void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock *Start) { - unsigned PrevNum = Start->getNumber(); - MachineFunction::iterator MBBI = Start, E = MF->end(); - for (++MBBI; MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - unsigned Num = MBB->getNumber(); - if (!Num) // block zero is never changed from offset zero. - continue; - // Get the offset and known bits at the end of the layout predecessor. - // Include the alignment of the current block. - unsigned LogAlign = MBBI->getAlignment(); - BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign); - PrevNum = Num; - } -} - -/// Split the basic block containing MI into two blocks, which are joined by -/// an unconditional branch. Update data structures and renumber blocks to -/// account for this change and returns the newly created block. -/// NOTE: Successor list of the original BB is out of date after this function, -/// and must be updated by the caller! Other transforms follow using this -/// utility function, so no point updating now rather than waiting. -MachineBasicBlock * -ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) { - MachineBasicBlock *OrigBB = MI->getParent(); - - // Create a new MBB for the code after the OrigBB. - MachineBasicBlock *NewBB = - MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); - MachineFunction::iterator MBBI = OrigBB; - ++MBBI; - MF->insert(MBBI, NewBB); - - // Splice the instructions starting with MI over to NewBB. - NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); - - // Add an unconditional branch from OrigBB to NewBB. - // Note the new unconditional branch is not being recorded. - // There doesn't seem to be meaningful DebugInfo available; this doesn't - // correspond to anything in the source. - BuildMI(OrigBB, DebugLoc(), TII->get(ARM64::B)).addMBB(NewBB); - - // Insert an entry into BlockInfo to align it properly with the block numbers. - BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); - - // Figure out how large the OrigBB is. As the first half of the original - // block, it cannot contain a tablejump. The size includes - // the new jump we added. (It should be possible to do this without - // recounting everything, but it's very confusing, and this is rarely - // executed.) - computeBlockSize(OrigBB); - - // Figure out how large the NewMBB is. As the second half of the original - // block, it may contain a tablejump. - computeBlockSize(NewBB); - - // All BBOffsets following these blocks must be modified. - adjustBlockOffsets(OrigBB); - - ++NumSplit; - - return NewBB; -} - -/// isBlockInRange - Returns true if the distance between specific MI and -/// specific BB can fit in MI's displacement field. -bool ARM64BranchRelaxation::isBlockInRange(MachineInstr *MI, - MachineBasicBlock *DestBB, - unsigned Bits) { - unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2; - unsigned BrOffset = getInstrOffset(MI); - unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset; - - DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() - << " from BB#" << MI->getParent()->getNumber() - << " max delta=" << MaxOffs << " from " << getInstrOffset(MI) - << " to " << DestOffset << " offset " - << int(DestOffset - BrOffset) << "\t" << *MI); - - // Branch before the Dest. - if (BrOffset <= DestOffset) - return (DestOffset - BrOffset <= MaxOffs); - return (BrOffset - DestOffset <= MaxOffs); -} - -static bool isConditionalBranch(unsigned Opc) { - switch (Opc) { - default: - return false; - case ARM64::TBZ: - case ARM64::TBNZ: - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: - case ARM64::Bcc: - return true; - } -} - -static MachineBasicBlock *getDestBlock(MachineInstr *MI) { - switch (MI->getOpcode()) { - default: - assert(0 && "unexpected opcode!"); - case ARM64::TBZ: - case ARM64::TBNZ: - return MI->getOperand(2).getMBB(); - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: - case ARM64::Bcc: - return MI->getOperand(1).getMBB(); - } -} - -static unsigned getOppositeConditionOpcode(unsigned Opc) { - switch (Opc) { - default: - assert(0 && "unexpected opcode!"); - case ARM64::TBNZ: return ARM64::TBZ; - case ARM64::TBZ: return ARM64::TBNZ; - case ARM64::CBNZW: return ARM64::CBZW; - case ARM64::CBNZX: return ARM64::CBZX; - case ARM64::CBZW: return ARM64::CBNZW; - case ARM64::CBZX: return ARM64::CBNZX; - case ARM64::Bcc: return ARM64::Bcc; // Condition is an operand for Bcc. - } -} - -static unsigned getBranchDisplacementBits(unsigned Opc) { - switch (Opc) { - default: - assert(0 && "unexpected opcode!"); - case ARM64::TBNZ: - case ARM64::TBZ: - return TBZDisplacementBits; - case ARM64::CBNZW: - case ARM64::CBZW: - case ARM64::CBNZX: - case ARM64::CBZX: - return CBZDisplacementBits; - case ARM64::Bcc: - return BCCDisplacementBits; - } -} - -static inline void invertBccCondition(MachineInstr *MI) { - assert(MI->getOpcode() == ARM64::Bcc && "Unexpected opcode!"); - ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(0).getImm(); - CC = ARM64CC::getInvertedCondCode(CC); - MI->getOperand(0).setImm((int64_t)CC); -} - -/// fixupConditionalBranch - Fix up a conditional branch whose destination is -/// too far away to fit in its displacement field. It is converted to an inverse -/// conditional branch + an unconditional branch to the destination. -bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) { - MachineBasicBlock *DestBB = getDestBlock(MI); - - // Add an unconditional branch to the destination and invert the branch - // condition to jump over it: - // tbz L1 - // => - // tbnz L2 - // b L1 - // L2: - - // If the branch is at the end of its MBB and that has a fall-through block, - // direct the updated conditional branch to the fall-through block. Otherwise, - // split the MBB before the next instruction. - MachineBasicBlock *MBB = MI->getParent(); - MachineInstr *BMI = &MBB->back(); - bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); - - if (BMI != MI) { - if (std::next(MachineBasicBlock::iterator(MI)) == - std::prev(MBB->getLastNonDebugInstr()) && - BMI->getOpcode() == ARM64::B) { - // Last MI in the BB is an unconditional branch. Can we simply invert the - // condition and swap destinations: - // beq L1 - // b L2 - // => - // bne L2 - // b L1 - MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); - if (isBlockInRange(MI, NewDest, - getBranchDisplacementBits(MI->getOpcode()))) { - DEBUG(dbgs() << " Invert condition and swap its destination with " - << *BMI); - BMI->getOperand(0).setMBB(DestBB); - unsigned OpNum = - (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ) - ? 2 - : 1; - MI->getOperand(OpNum).setMBB(NewDest); - MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode()))); - if (MI->getOpcode() == ARM64::Bcc) - invertBccCondition(MI); - return true; - } - } - } - - if (NeedSplit) { - // Analyze the branch so we know how to update the successor lists. - MachineBasicBlock *TBB, *FBB; - SmallVector<MachineOperand, 2> Cond; - TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, false); - - MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI); - // No need for the branch to the next block. We're adding an unconditional - // branch to the destination. - int delta = TII->GetInstSizeInBytes(&MBB->back()); - BlockInfo[MBB->getNumber()].Size -= delta; - MBB->back().eraseFromParent(); - // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below - - // Update the successor lists according to the transformation to follow. - // Do it here since if there's no split, no update is needed. - MBB->replaceSuccessor(FBB, NewBB); - NewBB->addSuccessor(FBB); - } - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); - - DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() - << ", invert condition and change dest. to BB#" - << NextBB->getNumber() << "\n"); - - // Insert a new conditional branch and a new unconditional branch. - MachineInstrBuilder MIB = BuildMI( - MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode()))) - .addOperand(MI->getOperand(0)); - if (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ) - MIB.addOperand(MI->getOperand(1)); - if (MI->getOpcode() == ARM64::Bcc) - invertBccCondition(MIB); - MIB.addMBB(NextBB); - BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); - BuildMI(MBB, DebugLoc(), TII->get(ARM64::B)).addMBB(DestBB); - BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); - - // Remove the old conditional branch. It may or may not still be in MBB. - BlockInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); - MI->eraseFromParent(); - - // Finally, keep the block offsets up to date. - adjustBlockOffsets(MBB); - return true; -} - -bool ARM64BranchRelaxation::relaxBranchInstructions() { - bool Changed = false; - // Relaxing branches involves creating new basic blocks, so re-eval - // end() for termination. - for (auto &MBB : *MF) { - MachineInstr *MI = MBB.getFirstTerminator(); - if (isConditionalBranch(MI->getOpcode()) && - !isBlockInRange(MI, getDestBlock(MI), - getBranchDisplacementBits(MI->getOpcode()))) { - fixupConditionalBranch(MI); - ++NumRelaxed; - Changed = true; - } - } - return Changed; -} - -bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { - MF = &mf; - - // If the pass is disabled, just bail early. - if (!BranchRelaxation) - return false; - - DEBUG(dbgs() << "***** ARM64BranchRelaxation *****\n"); - - TII = (const ARM64InstrInfo *)MF->getTarget().getInstrInfo(); - - // Renumber all of the machine basic blocks in the function, guaranteeing that - // the numbers agree with the position of the block in the function. - MF->RenumberBlocks(); - - // Do the initial scan of the function, building up information about the - // sizes of each block. - scanFunction(); - - DEBUG(dbgs() << " Basic blocks before relaxation\n"); - DEBUG(dumpBBs()); - - bool MadeChange = false; - while (relaxBranchInstructions()) - MadeChange = true; - - // After a while, this might be made debug-only, but it is not expensive. - verify(); - - DEBUG(dbgs() << " Basic blocks after relaxation\n"); - DEBUG(dbgs() << '\n'; dumpBBs()); - - BlockInfo.clear(); - - return MadeChange; -} - -/// createARM64BranchRelaxation - returns an instance of the constpool -/// island pass. -FunctionPass *llvm::createARM64BranchRelaxation() { - return new ARM64BranchRelaxation(); -} diff --git a/lib/Target/ARM64/ARM64CallingConv.h b/lib/Target/ARM64/ARM64CallingConv.h deleted file mode 100644 index 0128236..0000000 --- a/lib/Target/ARM64/ARM64CallingConv.h +++ /dev/null @@ -1,94 +0,0 @@ -//=== ARM64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the custom routines for the ARM64 Calling Convention that -// aren't done by tablegen. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64CALLINGCONV_H -#define ARM64CALLINGCONV_H - -#include "ARM64InstrInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/Target/TargetInstrInfo.h" - -namespace llvm { - -/// CC_ARM64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via -/// register. Here, ValVT can be i1/i8/i16 or i32 depending on whether the -/// argument is already promoted and LocVT is i1/i8/i16. We only promote the -/// argument to i32 if we are sure this argument will be passed in register. -static bool CC_ARM64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - CCState &State, - bool IsWebKitJS = false) { - static const uint16_t RegList1[] = { ARM64::W0, ARM64::W1, ARM64::W2, - ARM64::W3, ARM64::W4, ARM64::W5, - ARM64::W6, ARM64::W7 }; - static const uint16_t RegList2[] = { ARM64::X0, ARM64::X1, ARM64::X2, - ARM64::X3, ARM64::X4, ARM64::X5, - ARM64::X6, ARM64::X7 }; - static const uint16_t WebKitRegList1[] = { ARM64::W0 }; - static const uint16_t WebKitRegList2[] = { ARM64::X0 }; - - const uint16_t *List1 = IsWebKitJS ? WebKitRegList1 : RegList1; - const uint16_t *List2 = IsWebKitJS ? WebKitRegList2 : RegList2; - - if (unsigned Reg = State.AllocateReg(List1, List2, 8)) { - // Customized extra section for handling i1/i8/i16: - // We need to promote the argument to i32 if it is not done already. - if (ValVT != MVT::i32) { - if (ArgFlags.isSExt()) - LocInfo = CCValAssign::SExt; - else if (ArgFlags.isZExt()) - LocInfo = CCValAssign::ZExt; - else - LocInfo = CCValAssign::AExt; - ValVT = MVT::i32; - } - // Set LocVT to i32 as well if passing via register. - LocVT = MVT::i32; - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return true; - } - return false; -} - -/// CC_ARM64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16 -/// via register. This behaves the same as CC_ARM64_Custom_i1i8i16_Reg, but only -/// uses the first register. -static bool CC_ARM64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - CCState &State) { - return CC_ARM64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags, - State, true); -} - -/// CC_ARM64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on -/// stack. Here, ValVT can be i1/i8/i16 or i32 depending on whether the argument -/// is already promoted and LocVT is i1/i8/i16. If ValVT is already promoted, -/// it will be truncated back to i1/i8/i16. -static bool CC_ARM64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - CCState &State) { - unsigned Space = ((LocVT == MVT::i1 || LocVT == MVT::i8) ? 1 : 2); - unsigned Offset12 = State.AllocateStack(Space, Space); - ValVT = LocVT; - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset12, LocVT, LocInfo)); - return true; -} - -} // End llvm namespace - -#endif diff --git a/lib/Target/ARM64/ARM64CallingConvention.td b/lib/Target/ARM64/ARM64CallingConvention.td deleted file mode 100644 index 9ac888f..0000000 --- a/lib/Target/ARM64/ARM64CallingConvention.td +++ /dev/null @@ -1,210 +0,0 @@ -//===- ARM64CallingConv.td - Calling Conventions for ARM64 -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for ARM64 architecture. -// -//===----------------------------------------------------------------------===// - -/// CCIfAlign - Match of the original alignment of the arg -class CCIfAlign<string Align, CCAction A> : - CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; - -//===----------------------------------------------------------------------===// -// ARM AAPCS64 Calling Convention -//===----------------------------------------------------------------------===// - -def CC_ARM64_AAPCS : CallingConv<[ - CCIfType<[v2f32], CCBitConvertToType<v2i32>>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, - - // An SRet is passed in X8, not X0 like a normal pointer parameter. - CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>, - - // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, - // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>, - CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], - [X0, X1, X2, X3, X4, X5, X6, X7]>>, - // i128 is split to two i64s, we can't fit half to register X7. - CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], - [X0, X1, X3, X5]>>>, - - // i128 is split to two i64s, and its stack alignment is 16 bytes. - CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>, - - CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], - [W0, W1, W2, W3, W4, W5, W6, W7]>>, - CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], - CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - - // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8, i16], CCAssignToStack<8, 8>>, - CCIfType<[i32, f32], CCAssignToStack<8, 8>>, - CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], - CCAssignToStack<8, 8>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>> -]>; - -def RetCC_ARM64_AAPCS : CallingConv<[ - CCIfType<[v2f32], CCBitConvertToType<v2i32>>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, - - CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], - [X0, X1, X2, X3, X4, X5, X6, X7]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], - [W0, W1, W2, W3, W4, W5, W6, W7]>>, - CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], - CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> -]>; - - -// Darwin uses a calling convention which differs in only two ways -// from the standard one at this level: -// + i128s (i.e. split i64s) don't need even registers. -// + Stack slots are sized as needed rather than being at least 64-bit. -def CC_ARM64_DarwinPCS : CallingConv<[ - CCIfType<[v2f32], CCBitConvertToType<v2i32>>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, - - // An SRet is passed in X8, not X0 like a normal pointer parameter. - CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>, - - // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, - // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>, - CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], - [X0, X1, X2, X3, X4, X5, X6, X7]>>, - // i128 is split to two i64s, we can't fit half to register X7. - CCIfType<[i64], - CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6], - [W0, W1, W2, W3, W4, W5, W6]>>>, - // i128 is split to two i64s, and its stack alignment is 16 bytes. - CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>, - - CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], - [W0, W1, W2, W3, W4, W5, W6, W7]>>, - CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], - CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - - // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Stack">>, - CCIfType<[i32, f32], CCAssignToStack<4, 4>>, - CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], - CCAssignToStack<8, 8>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>> -]>; - -def CC_ARM64_DarwinPCS_VarArg : CallingConv<[ - CCIfType<[v2f32], CCBitConvertToType<v2i32>>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, - - // Handle all scalar types as either i64 or f64. - CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, - CCIfType<[f32], CCPromoteToType<f64>>, - - // Everything is on the stack. - // i128 is split to two i64s, and its stack alignment is 16 bytes. - CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>, - CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], CCAssignToStack<8, 8>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>> -]>; - -// The WebKit_JS calling convention only passes the first argument (the callee) -// in register and the remaining arguments on stack. We allow 32bit stack slots, -// so that WebKit can write partial values in the stack and define the other -// 32bit quantity as undef. -def CC_ARM64_WebKit_JS : CallingConv<[ - // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_WebKit_JS_i1i8i16_Reg">>, - CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, - - // Pass the remaining arguments on the stack instead. - CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>, - CCIfType<[i32, f32], CCAssignToStack<4, 4>>, - CCIfType<[i64, f64], CCAssignToStack<8, 8>> -]>; - -def RetCC_ARM64_WebKit_JS : CallingConv<[ - CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], - [X0, X1, X2, X3, X4, X5, X6, X7]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], - [W0, W1, W2, W3, W4, W5, W6, W7]>>, - CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> -]>; - -// FIXME: LR is only callee-saved in the sense that *we* preserve it and are -// presumably a callee to someone. External functions may not do so, but this -// is currently safe since BL has LR as an implicit-def and what happens after a -// tail call doesn't matter. -// -// It would be better to model its preservation semantics properly (create a -// vreg on entry, use it in RET & tail call generation; make that vreg def if we -// end up saving LR as part of a call frame). Watch this space... -def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, - D8, D9, D10, D11, - D12, D13, D14, D15)>; - -// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since -// 'this' and the pointer return value are both passed in X0 in these cases, -// this can be partially modelled by treating X0 as a callee-saved register; -// only the resulting RegMask is used; the SaveList is ignored -// -// (For generic ARM 64-bit ABI code, clang will not generate constructors or -// destructors with 'this' returns, so this RegMask will not be used in that -// case) -def CSR_ARM64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_ARM64_AAPCS, X0)>; - -// The function used by Darwin to obtain the address of a thread-local variable -// guarantees more than a normal AAPCS function. x16 and x17 are used on the -// fast path for calculation, but other registers except X0 (argument/return) -// and LR (it is a call, after all) are preserved. -def CSR_ARM64_TLS_Darwin - : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), - FP, - (sequence "Q%u", 0, 31))>; - -// The ELF stub used for TLS-descriptor access saves every feasible -// register. Only X0 and LR are clobbered. -def CSR_ARM64_TLS_ELF - : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, - (sequence "Q%u", 0, 31))>; - -def CSR_ARM64_AllRegs - : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, - (sequence "X%u", 0, 28), FP, LR, SP, - (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), - (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), - (sequence "Q%u", 0, 31))>; - diff --git a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp b/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp deleted file mode 100644 index e3f8248..0000000 --- a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp +++ /dev/null @@ -1,147 +0,0 @@ -//===-- ARM64CleanupLocalDynamicTLSPass.cpp -----------------------*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Local-dynamic access to thread-local variables proceeds in three stages. -// -// 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated -// in much the same way as a general-dynamic TLS-descriptor access against -// the special symbol _TLS_MODULE_BASE. -// 2. The variable's offset from _TLS_MODULE_BASE_ is calculated using -// instructions with "dtprel" modifiers. -// 3. These two are added, together with TPIDR_EL0, to obtain the variable's -// true address. -// -// This is only better than general-dynamic access to the variable if two or -// more of the first stage TLS-descriptor calculations can be combined. This -// pass looks through a function and performs such combinations. -// -//===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64TargetMachine.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -using namespace llvm; - -namespace { -struct LDTLSCleanup : public MachineFunctionPass { - static char ID; - LDTLSCleanup() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF) { - ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>(); - if (AFI->getNumLocalDynamicTLSAccesses() < 2) { - // No point folding accesses if there isn't at least two. - return false; - } - - MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); - return VisitNode(DT->getRootNode(), 0); - } - - // Visit the dominator subtree rooted at Node in pre-order. - // If TLSBaseAddrReg is non-null, then use that to replace any - // TLS_base_addr instructions. Otherwise, create the register - // when the first such instruction is seen, and then use it - // as we encounter more instructions. - bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { - MachineBasicBlock *BB = Node->getBlock(); - bool Changed = false; - - // Traverse the current block. - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; - ++I) { - switch (I->getOpcode()) { - case ARM64::TLSDESC_BLR: - // Make sure it's a local dynamic access. - if (!I->getOperand(1).isSymbol() || - strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) - break; - - if (TLSBaseAddrReg) - I = replaceTLSBaseAddrCall(I, TLSBaseAddrReg); - else - I = setRegister(I, &TLSBaseAddrReg); - Changed = true; - break; - default: - break; - } - } - - // Visit the children of this block in the dominator tree. - for (MachineDomTreeNode *N : *Node) { - Changed |= VisitNode(N, TLSBaseAddrReg); - } - - return Changed; - } - - // Replace the TLS_base_addr instruction I with a copy from - // TLSBaseAddrReg, returning the new instruction. - MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I, - unsigned TLSBaseAddrReg) { - MachineFunction *MF = I->getParent()->getParent(); - const ARM64TargetMachine *TM = - static_cast<const ARM64TargetMachine *>(&MF->getTarget()); - const ARM64InstrInfo *TII = TM->getInstrInfo(); - - // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the - // code sequence assumes the address will be. - MachineInstr *Copy = - BuildMI(*I->getParent(), I, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), ARM64::X0).addReg(TLSBaseAddrReg); - - // Erase the TLS_base_addr instruction. - I->eraseFromParent(); - - return Copy; - } - - // Create a virtal register in *TLSBaseAddrReg, and populate it by - // inserting a copy instruction after I. Returns the new instruction. - MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { - MachineFunction *MF = I->getParent()->getParent(); - const ARM64TargetMachine *TM = - static_cast<const ARM64TargetMachine *>(&MF->getTarget()); - const ARM64InstrInfo *TII = TM->getInstrInfo(); - - // Create a virtual register for the TLS base address. - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - *TLSBaseAddrReg = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - - // Insert a copy from X0 to TLSBaseAddrReg for later. - MachineInstr *Next = I->getNextNode(); - MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), - *TLSBaseAddrReg).addReg(ARM64::X0); - - return Copy; - } - - virtual const char *getPassName() const { - return "Local Dynamic TLS Access Clean-up"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; -} - -char LDTLSCleanup::ID = 0; -FunctionPass *llvm::createARM64CleanupLocalDynamicTLSPass() { - return new LDTLSCleanup(); -} diff --git a/lib/Target/ARM64/ARM64CollectLOH.cpp b/lib/Target/ARM64/ARM64CollectLOH.cpp deleted file mode 100644 index f52778f..0000000 --- a/lib/Target/ARM64/ARM64CollectLOH.cpp +++ /dev/null @@ -1,1157 +0,0 @@ -//===-------------- ARM64CollectLOH.cpp - ARM64 collect LOH pass --*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that collect the Linker Optimization Hint (LOH). -// This pass should be run at the very end of the compilation flow, just before -// assembly printer. -// To be useful for the linker, the LOH must be printed into the assembly file. -// -// A LOH describes a sequence of instructions that may be optimized by the -// linker. -// This same sequence cannot be optimized by the compiler because some of -// the information will be known at link time. -// For instance, consider the following sequence: -// L1: adrp xA, sym@PAGE -// L2: add xB, xA, sym@PAGEOFF -// L3: ldr xC, [xB, #imm] -// This sequence can be turned into: -// A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB: -// L3: ldr xC, sym+#imm -// It may also be turned into either the following more efficient -// code sequences: -// - If sym@PAGEOFF + #imm fits the encoding space of L3. -// L1: adrp xA, sym@PAGE -// L3: ldr xC, [xB, sym@PAGEOFF + #imm] -// - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB: -// L1: adr xA, sym -// L3: ldr xC, [xB, #imm] -// -// To be valid a LOH must meet all the requirements needed by all the related -// possible linker transformations. -// For instance, using the running example, the constraints to emit -// ".loh AdrpAddLdr" are: -// - L1, L2, and L3 instructions are of the expected type, i.e., -// respectively ADRP, ADD (immediate), and LD. -// - The result of L1 is used only by L2. -// - The register argument (xA) used in the ADD instruction is defined -// only by L1. -// - The result of L2 is used only by L3. -// - The base address (xB) in L3 is defined only L2. -// - The ADRP in L1 and the ADD in L2 must reference the same symbol using -// @PAGE/@PAGEOFF with no additional constants -// -// Currently supported LOHs are: -// * So called non-ADRP-related: -// - .loh AdrpAddLdr L1, L2, L3: -// L1: adrp xA, sym@PAGE -// L2: add xB, xA, sym@PAGEOFF -// L3: ldr xC, [xB, #imm] -// - .loh AdrpLdrGotLdr L1, L2, L3: -// L1: adrp xA, sym@GOTPAGE -// L2: ldr xB, [xA, sym@GOTPAGEOFF] -// L3: ldr xC, [xB, #imm] -// - .loh AdrpLdr L1, L3: -// L1: adrp xA, sym@PAGE -// L3: ldr xC, [xA, sym@PAGEOFF] -// - .loh AdrpAddStr L1, L2, L3: -// L1: adrp xA, sym@PAGE -// L2: add xB, xA, sym@PAGEOFF -// L3: str xC, [xB, #imm] -// - .loh AdrpLdrGotStr L1, L2, L3: -// L1: adrp xA, sym@GOTPAGE -// L2: ldr xB, [xA, sym@GOTPAGEOFF] -// L3: str xC, [xB, #imm] -// - .loh AdrpAdd L1, L2: -// L1: adrp xA, sym@PAGE -// L2: add xB, xA, sym@PAGEOFF -// For all these LOHs, L1, L2, L3 form a simple chain: -// L1 result is used only by L2 and L2 result by L3. -// L3 LOH-related argument is defined only by L2 and L2 LOH-related argument -// by L1. -// All these LOHs aim at using more efficient load/store patterns by folding -// some instructions used to compute the address directly into the load/store. -// -// * So called ADRP-related: -// - .loh AdrpAdrp L2, L1: -// L2: ADRP xA, sym1@PAGE -// L1: ADRP xA, sym2@PAGE -// L2 dominates L1 and xA is not redifined between L2 and L1 -// This LOH aims at getting rid of redundant ADRP instructions. -// -// The overall design for emitting the LOHs is: -// 1. ARM64CollectLOH (this pass) records the LOHs in the ARM64FunctionInfo. -// 2. ARM64AsmPrinter reads the LOHs from ARM64FunctionInfo and it: -// 1. Associates them a label. -// 2. Emits them in a MCStreamer (EmitLOHDirective). -// - The MCMachOStreamer records them into the MCAssembler. -// - The MCAsmStreamer prints them. -// - Other MCStreamers ignore them. -// 3. Closes the MCStreamer: -// - The MachObjectWriter gets them from the MCAssembler and writes -// them in the object file. -// - Other ObjectWriters ignore them. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-collect-loh" -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -static cl::opt<bool> -PreCollectRegister("arm64-collect-loh-pre-collect-register", cl::Hidden, - cl::desc("Restrict analysis to registers invovled" - " in LOHs"), - cl::init(true)); - -static cl::opt<bool> -BasicBlockScopeOnly("arm64-collect-loh-bb-only", cl::Hidden, - cl::desc("Restrict analysis at basic block scope"), - cl::init(true)); - -STATISTIC(NumADRPSimpleCandidate, - "Number of simplifiable ADRP dominate by another"); -STATISTIC(NumADRPComplexCandidate2, - "Number of simplifiable ADRP reachable by 2 defs"); -STATISTIC(NumADRPComplexCandidate3, - "Number of simplifiable ADRP reachable by 3 defs"); -STATISTIC(NumADRPComplexCandidateOther, - "Number of simplifiable ADRP reachable by 4 or more defs"); -STATISTIC(NumADDToSTRWithImm, - "Number of simplifiable STR with imm reachable by ADD"); -STATISTIC(NumLDRToSTRWithImm, - "Number of simplifiable STR with imm reachable by LDR"); -STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD"); -STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR"); -STATISTIC(NumADDToLDRWithImm, - "Number of simplifiable LDR with imm reachable by ADD"); -STATISTIC(NumLDRToLDRWithImm, - "Number of simplifiable LDR with imm reachable by LDR"); -STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD"); -STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR"); -STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP"); -STATISTIC(NumCplxLvl1, "Number of complex case of level 1"); -STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1"); -STATISTIC(NumCplxLvl2, "Number of complex case of level 2"); -STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2"); -STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); -STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD"); - -namespace llvm { -void initializeARM64CollectLOHPass(PassRegistry &); -} - -namespace { -struct ARM64CollectLOH : public MachineFunctionPass { - static char ID; - ARM64CollectLOH() : MachineFunctionPass(ID) { - initializeARM64CollectLOHPass(*PassRegistry::getPassRegistry()); - } - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual const char *getPassName() const { - return "ARM64 Collect Linker Optimization Hint (LOH)"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<MachineDominatorTree>(); - } - -private: -}; - -/// A set of MachineInstruction. -typedef SetVector<const MachineInstr *> SetOfMachineInstr; -/// Map a basic block to a set of instructions per register. -/// This is used to represent the exposed uses of a basic block -/// per register. -typedef MapVector<const MachineBasicBlock *, SetOfMachineInstr *> -BlockToSetOfInstrsPerColor; -/// Map a basic block to an instruction per register. -/// This is used to represent the live-out definitions of a basic block -/// per register. -typedef MapVector<const MachineBasicBlock *, const MachineInstr **> -BlockToInstrPerColor; -/// Map an instruction to a set of instructions. Used to represent the -/// mapping def to reachable uses or use to definitions. -typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs; -/// Map a basic block to a BitVector. -/// This is used to record the kill registers per basic block. -typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet; - -/// Map a register to a dense id. -typedef DenseMap<unsigned, unsigned> MapRegToId; -/// Map a dense id to a register. Used for debug purposes. -typedef SmallVector<unsigned, 32> MapIdToReg; -} // end anonymous namespace. - -char ARM64CollectLOH::ID = 0; - -INITIALIZE_PASS_BEGIN(ARM64CollectLOH, "arm64-collect-loh", - "ARM64 Collect Linker Optimization Hint (LOH)", false, - false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_END(ARM64CollectLOH, "arm64-collect-loh", - "ARM64 Collect Linker Optimization Hint (LOH)", false, - false) - -/// Given a couple (MBB, reg) get the corresponding set of instruction from -/// the given "sets". -/// If this couple does not reference any set, an empty set is added to "sets" -/// for this couple and returned. -/// \param nbRegs is used internally allocate some memory. It must be consistent -/// with the way sets is used. -static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, - const MachineBasicBlock *MBB, unsigned reg, - unsigned nbRegs) { - SetOfMachineInstr *result; - BlockToSetOfInstrsPerColor::iterator it = sets.find(MBB); - if (it != sets.end()) { - result = it->second; - } else { - result = sets[MBB] = new SetOfMachineInstr[nbRegs]; - } - - return result[reg]; -} - -/// Given a couple (reg, MI) get the corresponding set of instructions from the -/// the given "sets". -/// This is used to get the uses record in sets of a definition identified by -/// MI and reg, i.e., MI defines reg. -/// If the couple does not reference anything, an empty set is added to -/// "sets[reg]". -/// \pre set[reg] is valid. -static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, - const MachineInstr *MI) { - return sets[reg][MI]; -} - -/// Same as getUses but does not modify the input map: sets. -/// \return NULL if the couple (reg, MI) is not in sets. -static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, - const MachineInstr *MI) { - InstrToInstrs::const_iterator Res = sets[reg].find(MI); - if (Res != sets[reg].end()) - return &(Res->second); - return NULL; -} - -/// Initialize the reaching definition algorithm: -/// For each basic block BB in MF, record: -/// - its kill set. -/// - its reachable uses (uses that are exposed to BB's predecessors). -/// - its the generated definitions. -/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to -/// the list of uses of exposed defintions. -/// \param ADRPMode specifies to only consider ADRP instructions for generated -/// definition. It also consider definitions of ADRP instructions as uses and -/// ignore other uses. The ADRPMode is used to collect the information for LHO -/// that involve ADRP operation only. -static void initReachingDef(MachineFunction *MF, - InstrToInstrs *ColorOpToReachedUses, - BlockToInstrPerColor &Gen, BlockToRegSet &Kill, - BlockToSetOfInstrsPerColor &ReachableUses, - const MapRegToId &RegToId, - const MachineInstr *DummyOp, bool ADRPMode) { - const TargetMachine &TM = MF->getTarget(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - - unsigned NbReg = RegToId.size(); - - for (MachineFunction::const_iterator IMBB = MF->begin(), IMBBEnd = MF->end(); - IMBB != IMBBEnd; ++IMBB) { - const MachineBasicBlock *MBB = &(*IMBB); - const MachineInstr **&BBGen = Gen[MBB]; - BBGen = new const MachineInstr *[NbReg]; - memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg); - - BitVector &BBKillSet = Kill[MBB]; - BBKillSet.resize(NbReg); - for (MachineBasicBlock::const_iterator II = MBB->begin(), IEnd = MBB->end(); - II != IEnd; ++II) { - bool IsADRP = II->getOpcode() == ARM64::ADRP; - - // Process uses first. - if (IsADRP || !ADRPMode) - for (MachineInstr::const_mop_iterator IO = II->operands_begin(), - IOEnd = II->operands_end(); - IO != IOEnd; ++IO) { - // Treat ADRP def as use, as the goal of the analysis is to find - // ADRP defs reached by other ADRP defs. - if (!IO->isReg() || (!ADRPMode && !IO->isUse()) || - (ADRPMode && (!IsADRP || !IO->isDef()))) - continue; - unsigned CurReg = IO->getReg(); - MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); - if (ItCurRegId == RegToId.end()) - continue; - CurReg = ItCurRegId->second; - - // if CurReg has not been defined, this use is reachable. - if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) - getSet(ReachableUses, MBB, CurReg, NbReg).insert(&(*II)); - // current basic block definition for this color, if any, is in Gen. - if (BBGen[CurReg]) - getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(&(*II)); - } - - // Process clobbers. - for (MachineInstr::const_mop_iterator IO = II->operands_begin(), - IOEnd = II->operands_end(); - IO != IOEnd; ++IO) { - if (!IO->isRegMask()) - continue; - // Clobbers kill the related colors. - const uint32_t *PreservedRegs = IO->getRegMask(); - - // Set generated regs. - for (const auto Entry : RegToId) { - unsigned Reg = Entry.second; - // Use the global register ID when querying APIs external to this - // pass. - if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { - // Do not register clobbered definition for no ADRP. - // This definition is not used anyway (otherwise register - // allocation is wrong). - BBGen[Reg] = ADRPMode ? II : NULL; - BBKillSet.set(Reg); - } - } - } - - // Process defs - for (MachineInstr::const_mop_iterator IO = II->operands_begin(), - IOEnd = II->operands_end(); - IO != IOEnd; ++IO) { - if (!IO->isReg() || !IO->isDef()) - continue; - unsigned CurReg = IO->getReg(); - MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); - if (ItCurRegId == RegToId.end()) - continue; - - for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { - MapRegToId::const_iterator ItRegId = RegToId.find(*AI); - assert(ItRegId != RegToId.end() && - "Sub-register of an " - "involved register, not recorded as involved!"); - BBKillSet.set(ItRegId->second); - BBGen[ItRegId->second] = &(*II); - } - BBGen[ItCurRegId->second] = &(*II); - } - } - - // If we restrict our analysis to basic block scope, conservatively add a - // dummy - // use for each generated value. - if (!ADRPMode && DummyOp && !MBB->succ_empty()) - for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) - if (BBGen[CurReg]) - getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(DummyOp); - } -} - -/// Reaching def core algorithm: -/// while an Out has changed -/// for each bb -/// for each color -/// In[bb][color] = U Out[bb.predecessors][color] -/// insert reachableUses[bb][color] in each in[bb][color] -/// op.reachedUses -/// -/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) -static void reachingDefAlgorithm(MachineFunction *MF, - InstrToInstrs *ColorOpToReachedUses, - BlockToSetOfInstrsPerColor &In, - BlockToSetOfInstrsPerColor &Out, - BlockToInstrPerColor &Gen, BlockToRegSet &Kill, - BlockToSetOfInstrsPerColor &ReachableUses, - unsigned NbReg) { - bool HasChanged; - do { - HasChanged = false; - for (MachineFunction::const_iterator IMBB = MF->begin(), - IMBBEnd = MF->end(); - IMBB != IMBBEnd; ++IMBB) { - const MachineBasicBlock *MBB = &(*IMBB); - unsigned CurReg; - for (CurReg = 0; CurReg < NbReg; ++CurReg) { - SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); - SetOfMachineInstr &BBReachableUses = - getSet(ReachableUses, MBB, CurReg, NbReg); - SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); - unsigned Size = BBOutSet.size(); - // In[bb][color] = U Out[bb.predecessors][color] - for (MachineBasicBlock::const_pred_iterator - PredMBB = MBB->pred_begin(), - EndPredMBB = MBB->pred_end(); - PredMBB != EndPredMBB; ++PredMBB) { - SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); - BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); - } - // insert reachableUses[bb][color] in each in[bb][color] op.reachedses - for (const MachineInstr *MI: BBInSet) { - SetOfMachineInstr &OpReachedUses = - getUses(ColorOpToReachedUses, CurReg, MI); - OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); - } - // Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) - if (!Kill[MBB].test(CurReg)) - BBOutSet.insert(BBInSet.begin(), BBInSet.end()); - if (Gen[MBB][CurReg]) - BBOutSet.insert(Gen[MBB][CurReg]); - HasChanged |= BBOutSet.size() != Size; - } - } - } while (HasChanged); -} - -/// Release all memory dynamically allocated during the reaching -/// definition algorithm. -static void finitReachingDef(BlockToSetOfInstrsPerColor &In, - BlockToSetOfInstrsPerColor &Out, - BlockToInstrPerColor &Gen, - BlockToSetOfInstrsPerColor &ReachableUses) { - for (BlockToSetOfInstrsPerColor::const_iterator IT = Out.begin(), - End = Out.end(); - IT != End; ++IT) - delete[] IT->second; - for (BlockToSetOfInstrsPerColor::const_iterator IT = In.begin(), - End = In.end(); - IT != End; ++IT) - delete[] IT->second; - for (BlockToSetOfInstrsPerColor::const_iterator IT = ReachableUses.begin(), - End = ReachableUses.end(); - IT != End; ++IT) - delete[] IT->second; - for (BlockToInstrPerColor::const_iterator IT = Gen.begin(), End = Gen.end(); - IT != End; ++IT) - delete[] IT->second; -} - -/// Reaching definiton algorithm. -/// \param MF function on which the algorithm will operate. -/// \param[out] ColorOpToReachedUses will contain the result of the reaching -/// def algorithm. -/// \param ADRPMode specify whether the reaching def algorithm should be tuned -/// for ADRP optimization. \see initReachingDef for more details. -/// \param DummyOp if not NULL, the algorithm will work at -/// basic block scope and will set for every exposed defintion a use to -/// @p DummyOp. -/// \pre ColorOpToReachedUses is an array of at least number of registers of -/// InstrToInstrs. -static void reachingDef(MachineFunction *MF, - InstrToInstrs *ColorOpToReachedUses, - const MapRegToId &RegToId, bool ADRPMode = false, - const MachineInstr *DummyOp = NULL) { - // structures: - // For each basic block. - // Out: a set per color of definitions that reach the - // out boundary of this block. - // In: Same as Out but for in boundary. - // Gen: generated color in this block (one operation per color). - // Kill: register set of killed color in this block. - // ReachableUses: a set per color of uses (operation) reachable - // for "In" definitions. - BlockToSetOfInstrsPerColor Out, In, ReachableUses; - BlockToInstrPerColor Gen; - BlockToRegSet Kill; - - // Initialize Gen, kill and reachableUses. - initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId, - DummyOp, ADRPMode); - - // Algo. - if (!DummyOp) - reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill, - ReachableUses, RegToId.size()); - - // finit. - finitReachingDef(In, Out, Gen, ReachableUses); -} - -#ifndef NDEBUG -/// print the result of the reaching definition algorithm. -static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses, - unsigned NbReg, const TargetRegisterInfo *TRI, - const MapIdToReg &IdToReg) { - unsigned CurReg; - for (CurReg = 0; CurReg < NbReg; ++CurReg) { - if (ColorOpToReachedUses[CurReg].empty()) - continue; - DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n"); - - InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin(); - InstrToInstrs::const_iterator DefsItEnd = - ColorOpToReachedUses[CurReg].end(); - for (; DefsIt != DefsItEnd; ++DefsIt) { - DEBUG(dbgs() << "Def:\n"); - DEBUG(DefsIt->first->print(dbgs())); - DEBUG(dbgs() << "Reachable uses:\n"); - for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(), - UsesItEnd = DefsIt->second.end(); - UsesIt != UsesItEnd; ++UsesIt) { - DEBUG((*UsesIt)->print(dbgs())); - } - } - } -} -#endif // NDEBUG - -/// Answer the following question: Can Def be one of the definition -/// involved in a part of a LOH? -static bool canDefBePartOfLOH(const MachineInstr *Def) { - unsigned Opc = Def->getOpcode(); - // Accept ADRP, ADDLow and LOADGot. - switch (Opc) { - default: - return false; - case ARM64::ADRP: - return true; - case ARM64::ADDXri: - // Check immediate to see if the immediate is an address. - switch (Def->getOperand(2).getType()) { - default: - return false; - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_JumpTableIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_BlockAddress: - return true; - } - case ARM64::LDRXui: - // Check immediate to see if the immediate is an address. - switch (Def->getOperand(2).getType()) { - default: - return false; - case MachineOperand::MO_GlobalAddress: - return true; - } - } - // Unreachable. - return false; -} - -/// Check whether the given instruction can the end of a LOH chain involving a -/// store. -static bool isCandidateStore(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { - default: - return false; - case ARM64::STRBui: - case ARM64::STRHui: - case ARM64::STRWui: - case ARM64::STRXui: - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - // In case we have str xA, [xA, #imm], this is two different uses - // of xA and we cannot fold, otherwise the xA stored may be wrong, - // even if #imm == 0. - if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg()) - return true; - } - return false; -} - -/// Given the result of a reaching defintion algorithm in ColorOpToReachedUses, -/// Build the Use to Defs information and filter out obvious non-LOH candidates. -/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions. -/// In non-ADRPMode, non-LOH candidates are "uses" with several definition, -/// i.e., no simple chain. -/// \param ADRPMode -- \see initReachingDef. -static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, - const InstrToInstrs *ColorOpToReachedUses, - const MapRegToId &RegToId, - bool ADRPMode = false) { - - SetOfMachineInstr NotCandidate; - unsigned NbReg = RegToId.size(); - MapRegToId::const_iterator EndIt = RegToId.end(); - for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) { - // If this color is never defined, continue. - if (ColorOpToReachedUses[CurReg].empty()) - continue; - - InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin(); - InstrToInstrs::const_iterator DefsItEnd = - ColorOpToReachedUses[CurReg].end(); - for (; DefsIt != DefsItEnd; ++DefsIt) { - for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(), - UsesItEnd = DefsIt->second.end(); - UsesIt != UsesItEnd; ++UsesIt) { - const MachineInstr *Def = DefsIt->first; - MapRegToId::const_iterator It; - // if all the reaching defs are not adrp, this use will not be - // simplifiable. - if ((ADRPMode && Def->getOpcode() != ARM64::ADRP) || - (!ADRPMode && !canDefBePartOfLOH(Def)) || - (!ADRPMode && isCandidateStore(*UsesIt) && - // store are LOH candidate iff the end of the chain is used as - // base. - ((It = RegToId.find((*UsesIt)->getOperand(1).getReg())) == EndIt || - It->second != CurReg))) { - NotCandidate.insert(*UsesIt); - continue; - } - // Do not consider self reaching as a simplifiable case for ADRP. - if (!ADRPMode || *UsesIt != DefsIt->first) { - UseToReachingDefs[*UsesIt].insert(DefsIt->first); - // If UsesIt has several reaching definitions, it is not - // candidate for simplificaton in non-ADRPMode. - if (!ADRPMode && UseToReachingDefs[*UsesIt].size() > 1) - NotCandidate.insert(*UsesIt); - } - } - } - } - for (const MachineInstr *Elem : NotCandidate) { - DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); - // It would have been better if we could just remove the entry - // from the map. Because of that, we have to filter the garbage - // (second.empty) in the subsequence analysis. - UseToReachingDefs[Elem].clear(); - } -} - -/// Based on the use to defs information (in ADRPMode), compute the -/// opportunities of LOH ADRP-related. -static void computeADRP(const InstrToInstrs &UseToDefs, - ARM64FunctionInfo &ARM64FI, - const MachineDominatorTree *MDT) { - DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); - for (const auto &Entry: UseToDefs) { - unsigned Size = Entry.second.size(); - if (Size == 0) - continue; - if (Size == 1) { - const MachineInstr *L2 = *Entry.second.begin(); - const MachineInstr *L1 = Entry.first; - if (!MDT->dominates(L2, L1)) { - DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1 - << '\n'); - continue; - } - DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n'); - SmallVector<const MachineInstr *, 2> Args; - Args.push_back(L2); - Args.push_back(L1); - ARM64FI.addLOHDirective(MCLOH_AdrpAdrp, Args); - ++NumADRPSimpleCandidate; - } -#ifdef DEBUG - else if (Size == 2) - ++NumADRPComplexCandidate2; - else if (Size == 3) - ++NumADRPComplexCandidate3; - else - ++NumADRPComplexCandidateOther; -#endif - // if Size < 1, the use should have been removed from the candidates - assert(Size >= 1 && "No reaching defs for that use!"); - } -} - -/// Check whether the given instruction can be the end of a LOH chain -/// involving a load. -static bool isCandidateLoad(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { - default: - return false; - case ARM64::LDRSBWui: - case ARM64::LDRSBXui: - case ARM64::LDRSHWui: - case ARM64::LDRSHXui: - case ARM64::LDRSWui: - case ARM64::LDRBui: - case ARM64::LDRHui: - case ARM64::LDRWui: - case ARM64::LDRXui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - if (Instr->getOperand(2).getTargetFlags() & ARM64II::MO_GOT) - return false; - return true; - } - // Unreachable. - return false; -} - -/// Check whether the given instruction can load a litteral. -static bool supportLoadFromLiteral(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { - default: - return false; - case ARM64::LDRSWui: - case ARM64::LDRWui: - case ARM64::LDRXui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - return true; - } - // Unreachable. - return false; -} - -/// Check whether the given instruction is a LOH candidate. -/// \param UseToDefs is used to check that Instr is at the end of LOH supported -/// chain. -/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are -/// already been filtered out. -static bool isCandidate(const MachineInstr *Instr, - const InstrToInstrs &UseToDefs, - const MachineDominatorTree *MDT) { - if (!isCandidateLoad(Instr) && !isCandidateStore(Instr)) - return false; - - const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); - if (Def->getOpcode() != ARM64::ADRP) { - // At this point, Def is ADDXri or LDRXui of the right type of - // symbol, because we filtered out the uses that were not defined - // by these kind of instructions (+ ADRP). - - // Check if this forms a simple chain: each intermediate node must - // dominates the next one. - if (!MDT->dominates(Def, Instr)) - return false; - // Move one node up in the simple chain. - if (UseToDefs.find(Def) == UseToDefs.end() - // The map may contain garbage we have to ignore. - || - UseToDefs.find(Def)->second.empty()) - return false; - Instr = Def; - Def = *UseToDefs.find(Def)->second.begin(); - } - // Check if we reached the top of the simple chain: - // - top is ADRP. - // - check the simple chain property: each intermediate node must - // dominates the next one. - if (Def->getOpcode() == ARM64::ADRP) - return MDT->dominates(Def, Instr); - return false; -} - -static bool registerADRCandidate(const MachineInstr *Use, - const InstrToInstrs &UseToDefs, - const InstrToInstrs *DefsPerColorToUses, - ARM64FunctionInfo &ARM64FI, - SetOfMachineInstr *InvolvedInLOHs, - const MapRegToId &RegToId) { - // Look for opportunities to turn ADRP -> ADD or - // ADRP -> LDR GOTPAGEOFF into ADR. - // If ADRP has more than one use. Give up. - if (Use->getOpcode() != ARM64::ADDXri && - (Use->getOpcode() != ARM64::LDRXui || - !(Use->getOperand(2).getTargetFlags() & ARM64II::MO_GOT))) - return false; - InstrToInstrs::const_iterator It = UseToDefs.find(Use); - // The map may contain garbage that we need to ignore. - if (It == UseToDefs.end() || It->second.empty()) - return false; - const MachineInstr *Def = *It->second.begin(); - if (Def->getOpcode() != ARM64::ADRP) - return false; - // Check the number of users of ADRP. - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, Def); - if (Users->size() > 1) { - ++NumADRComplexCandidate; - return false; - } - ++NumADRSimpleCandidate; - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Def)) && - "ADRP already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Use)) && - "ADD already involved in LOH."); - DEBUG(dbgs() << "Record AdrpAdd\n" << *Def << '\n' << *Use << '\n'); - - SmallVector<const MachineInstr *, 2> Args; - Args.push_back(Def); - Args.push_back(Use); - - ARM64FI.addLOHDirective(Use->getOpcode() == ARM64::ADDXri ? MCLOH_AdrpAdd - : MCLOH_AdrpLdrGot, - Args); - return true; -} - -/// Based on the use to defs information (in non-ADRPMode), compute the -/// opportunities of LOH non-ADRP-related -static void computeOthers(const InstrToInstrs &UseToDefs, - const InstrToInstrs *DefsPerColorToUses, - ARM64FunctionInfo &ARM64FI, const MapRegToId &RegToId, - const MachineDominatorTree *MDT) { - SetOfMachineInstr *InvolvedInLOHs = NULL; -#ifdef DEBUG - SetOfMachineInstr InvolvedInLOHsStorage; - InvolvedInLOHs = &InvolvedInLOHsStorage; -#endif // DEBUG - DEBUG(dbgs() << "*** Compute LOH for Others\n"); - // ADRP -> ADD/LDR -> LDR/STR pattern. - // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern. - - // FIXME: When the statistics are not important, - // This initial filtering loop can be merged into the next loop. - // Currently, we didn't do it to have the same code for both DEBUG and - // NDEBUG builds. Indeed, the iterator of the second loop would need - // to be changed. - SetOfMachineInstr PotentialCandidates; - SetOfMachineInstr PotentialADROpportunities; - for (InstrToInstrs::const_iterator UseIt = UseToDefs.begin(), - EndUseIt = UseToDefs.end(); - UseIt != EndUseIt; ++UseIt) { - // If no definition is available, this is a non candidate. - if (UseIt->second.empty()) - continue; - // Keep only instructions that are load or store and at the end of - // a ADRP -> ADD/LDR/Nothing chain. - // We already filtered out the no-chain cases. - if (!isCandidate(UseIt->first, UseToDefs, MDT)) { - PotentialADROpportunities.insert(UseIt->first); - continue; - } - PotentialCandidates.insert(UseIt->first); - } - - // Make the following distinctions for statistics as the linker does - // know how to decode instructions: - // - ADD/LDR/Nothing make there different patterns. - // - LDR/STR make two different patterns. - // Hence, 6 - 1 base patterns. - // (because ADRP-> Nothing -> STR is not simplifiable) - - // The linker is only able to have a simple semantic, i.e., if pattern A - // do B. - // However, we want to see the opportunity we may miss if we were able to - // catch more complex cases. - - // PotentialCandidates are result of a chain ADRP -> ADD/LDR -> - // A potential candidate becomes a candidate, if its current immediate - // operand is zero and all nodes of the chain have respectively only one user - SetOfMachineInstr::const_iterator CandidateIt, EndCandidateIt; -#ifdef DEBUG - SetOfMachineInstr DefsOfPotentialCandidates; -#endif - for (CandidateIt = PotentialCandidates.begin(), - EndCandidateIt = PotentialCandidates.end(); - CandidateIt != EndCandidateIt; ++CandidateIt) { - const MachineInstr *Candidate = *CandidateIt; - // Get the definition of the candidate i.e., ADD or LDR. - const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); - // Record the elements of the chain. - const MachineInstr *L1 = Def; - const MachineInstr *L2 = NULL; - unsigned ImmediateDefOpc = Def->getOpcode(); - if (Def->getOpcode() != ARM64::ADRP) { - // Check the number of users of this node. - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, Def); - if (Users->size() > 1) { -#ifdef DEBUG - // if all the uses of this def are in potential candidate, this is - // a complex candidate of level 2. - SetOfMachineInstr::const_iterator UseIt = Users->begin(); - SetOfMachineInstr::const_iterator EndUseIt = Users->end(); - for (; UseIt != EndUseIt; ++UseIt) { - if (!PotentialCandidates.count(*UseIt)) { - ++NumTooCplxLvl2; - break; - } - } - if (UseIt == EndUseIt) - ++NumCplxLvl2; -#endif // DEBUG - PotentialADROpportunities.insert(Def); - continue; - } - L2 = Def; - Def = *UseToDefs.find(Def)->second.begin(); - L1 = Def; - } // else the element in the middle of the chain is nothing, thus - // Def already contains the first element of the chain. - - // Check the number of users of the first node in the chain, i.e., ADRP - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, Def); - if (Users->size() > 1) { -#ifdef DEBUG - // if all the uses of this def are in the defs of the potential candidate, - // this is a complex candidate of level 1 - if (DefsOfPotentialCandidates.empty()) { - // lazy init - DefsOfPotentialCandidates = PotentialCandidates; - for (const MachineInstr *Candidate : PotentialCandidates) { - if (!UseToDefs.find(Candidate)->second.empty()) - DefsOfPotentialCandidates.insert( - *UseToDefs.find(Candidate)->second.begin()); - } - } - bool Found = false; - for (auto &Use: *Users) { - if (!DefsOfPotentialCandidates.count(Use)) { - ++NumTooCplxLvl1; - Found = true; - break; - } - } - if (!Found) - ++NumCplxLvl1; -#endif // DEBUG - continue; - } - - bool IsL2Add = (ImmediateDefOpc == ARM64::ADDXri); - // If the chain is three instructions long and ldr is the second element, - // then this ldr must load form GOT, otherwise this is not a correct chain. - if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != ARM64II::MO_GOT) - continue; - SmallVector<const MachineInstr *, 3> Args; - MCLOHType Kind; - if (isCandidateLoad(Candidate)) { - if (L2 == NULL) { - // At this point, the candidate LOH indicates that the ldr instruction - // may use a direct access to the symbol. There is not such encoding - // for loads of byte and half. - if (!supportLoadFromLiteral(Candidate)) - continue; - - DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate - << '\n'); - Kind = MCLOH_AdrpLdr; - Args.push_back(L1); - Args.push_back(Candidate); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); - ++NumADRPToLDR; - } else { - DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") - << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate - << '\n'); - - Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr; - Args.push_back(L1); - Args.push_back(L2); - Args.push_back(Candidate); - - PotentialADROpportunities.remove(L2); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && - "L2 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); -#ifdef DEBUG - // get the immediate of the load - if (Candidate->getOperand(2).getImm() == 0) - if (ImmediateDefOpc == ARM64::ADDXri) - ++NumADDToLDR; - else - ++NumLDRToLDR; - else if (ImmediateDefOpc == ARM64::ADDXri) - ++NumADDToLDRWithImm; - else - ++NumLDRToLDRWithImm; -#endif // DEBUG - } - } else { - if (ImmediateDefOpc == ARM64::ADRP) - continue; - else { - - DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") - << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate - << '\n'); - - Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr; - Args.push_back(L1); - Args.push_back(L2); - Args.push_back(Candidate); - - PotentialADROpportunities.remove(L2); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && - "L2 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); -#ifdef DEBUG - // get the immediate of the store - if (Candidate->getOperand(2).getImm() == 0) - if (ImmediateDefOpc == ARM64::ADDXri) - ++NumADDToSTR; - else - ++NumLDRToSTR; - else if (ImmediateDefOpc == ARM64::ADDXri) - ++NumADDToSTRWithImm; - else - ++NumLDRToSTRWithImm; -#endif // DEBUG - } - } - ARM64FI.addLOHDirective(Kind, Args); - } - - // Now, we grabbed all the big patterns, check ADR opportunities. - for (const MachineInstr *Candidate: PotentialADROpportunities) - registerADRCandidate(Candidate, UseToDefs, DefsPerColorToUses, ARM64FI, - InvolvedInLOHs, RegToId); -} - -/// Look for every register defined by potential LOHs candidates. -/// Map these registers with dense id in @p RegToId and vice-versa in -/// @p IdToReg. @p IdToReg is populated only in DEBUG mode. -static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId, - MapIdToReg &IdToReg, - const TargetRegisterInfo *TRI) { - unsigned CurRegId = 0; - if (!PreCollectRegister) { - unsigned NbReg = TRI->getNumRegs(); - for (; CurRegId < NbReg; ++CurRegId) { - RegToId[CurRegId] = CurRegId; - DEBUG(IdToReg.push_back(CurRegId)); - DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches")); - } - return; - } - - DEBUG(dbgs() << "** Collect Involved Register\n"); - for (MachineFunction::const_iterator IMBB = MF.begin(), IMBBEnd = MF.end(); - IMBB != IMBBEnd; ++IMBB) - for (MachineBasicBlock::const_iterator II = IMBB->begin(), - IEnd = IMBB->end(); - II != IEnd; ++II) { - - if (!canDefBePartOfLOH(II)) - continue; - - // Process defs - for (MachineInstr::const_mop_iterator IO = II->operands_begin(), - IOEnd = II->operands_end(); - IO != IOEnd; ++IO) { - if (!IO->isReg() || !IO->isDef()) - continue; - unsigned CurReg = IO->getReg(); - for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) - if (RegToId.find(*AI) == RegToId.end()) { - DEBUG(IdToReg.push_back(*AI); - assert(IdToReg[CurRegId] == *AI && - "Reg index mismatches insertion index.")); - RegToId[*AI] = CurRegId++; - DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n'); - } - } - } -} - -bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &Fn) { - const TargetMachine &TM = Fn.getTarget(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>(); - - MapRegToId RegToId; - MapIdToReg IdToReg; - ARM64FunctionInfo *ARM64FI = Fn.getInfo<ARM64FunctionInfo>(); - assert(ARM64FI && "No MachineFunctionInfo for this function!"); - - DEBUG(dbgs() << "Looking for LOH in " << Fn.getName() << '\n'); - - collectInvolvedReg(Fn, RegToId, IdToReg, TRI); - if (RegToId.empty()) - return false; - - MachineInstr *DummyOp = NULL; - if (BasicBlockScopeOnly) { - const ARM64InstrInfo *TII = - static_cast<const ARM64InstrInfo *>(TM.getInstrInfo()); - // For local analysis, create a dummy operation to record uses that are not - // local. - DummyOp = Fn.CreateMachineInstr(TII->get(ARM64::COPY), DebugLoc()); - } - - unsigned NbReg = RegToId.size(); - bool Modified = false; - - // Start with ADRP. - InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg]; - - // Compute the reaching def in ADRP mode, meaning ADRP definitions - // are first considered as uses. - reachingDef(&Fn, ColorOpToReachedUses, RegToId, true, DummyOp); - DEBUG(dbgs() << "ADRP reaching defs\n"); - DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); - - // Translate the definition to uses map into a use to definitions map to ease - // statistic computation. - InstrToInstrs ADRPToReachingDefs; - reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); - - // Compute LOH for ADRP. - computeADRP(ADRPToReachingDefs, *ARM64FI, MDT); - delete[] ColorOpToReachedUses; - - // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. - ColorOpToReachedUses = new InstrToInstrs[NbReg]; - - // first perform a regular reaching def analysis. - reachingDef(&Fn, ColorOpToReachedUses, RegToId, false, DummyOp); - DEBUG(dbgs() << "All reaching defs\n"); - DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); - - // Turn that into a use to defs to ease statistic computation. - InstrToInstrs UsesToReachingDefs; - reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); - - // Compute other than AdrpAdrp LOH. - computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *ARM64FI, RegToId, - MDT); - delete[] ColorOpToReachedUses; - - if (BasicBlockScopeOnly) - Fn.DeleteMachineInstr(DummyOp); - - return Modified; -} - -/// createARM64CollectLOHPass - returns an instance of the Statistic for -/// linker optimization pass. -FunctionPass *llvm::createARM64CollectLOHPass() { - return new ARM64CollectLOH(); -} diff --git a/lib/Target/ARM64/ARM64ConditionalCompares.cpp b/lib/Target/ARM64/ARM64ConditionalCompares.cpp deleted file mode 100644 index b495afa..0000000 --- a/lib/Target/ARM64/ARM64ConditionalCompares.cpp +++ /dev/null @@ -1,918 +0,0 @@ -//===-- ARM64ConditionalCompares.cpp --- CCMP formation for ARM64 ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64ConditionalCompares pass which reduces -// branching and code size by using the conditional compare instructions CCMP, -// CCMN, and FCMP. -// -// The CFG transformations for forming conditional compares are very similar to -// if-conversion, and this pass should run immediately before the early -// if-conversion pass. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-ccmp" -#include "ARM64.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SparseSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineTraceMetrics.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -using namespace llvm; - -// Absolute maximum number of instructions allowed per speculated block. -// This bypasses all other heuristics, so it should be set fairly high. -static cl::opt<unsigned> BlockInstrLimit( - "arm64-ccmp-limit", cl::init(30), cl::Hidden, - cl::desc("Maximum number of instructions per speculated block.")); - -// Stress testing mode - disable heuristics. -static cl::opt<bool> Stress("arm64-stress-ccmp", cl::Hidden, - cl::desc("Turn all knobs to 11")); - -STATISTIC(NumConsidered, "Number of ccmps considered"); -STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)"); -STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)"); -STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)"); -STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)"); -STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)"); -STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)"); -STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)"); -STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)"); -STATISTIC(NumMultCPSRUses, "Number of ccmps rejected (CPSR used)"); -STATISTIC(NumUnknCPSRDefs, "Number of ccmps rejected (CPSR def unknown)"); - -STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)"); - -STATISTIC(NumConverted, "Number of ccmp instructions created"); -STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted"); - -//===----------------------------------------------------------------------===// -// SSACCmpConv -//===----------------------------------------------------------------------===// -// -// The SSACCmpConv class performs ccmp-conversion on SSA form machine code -// after determining if it is possible. The class contains no heuristics; -// external code should be used to determine when ccmp-conversion is a good -// idea. -// -// CCmp-formation works on a CFG representing chained conditions, typically -// from C's short-circuit || and && operators: -// -// From: Head To: Head -// / | CmpBB -// / | / | -// | CmpBB / | -// | / | Tail | -// | / | | | -// Tail | | | -// | | | | -// ... ... ... ... -// -// The Head block is terminated by a br.cond instruction, and the CmpBB block -// contains compare + br.cond. Tail must be a successor of both. -// -// The cmp-conversion turns the compare instruction in CmpBB into a conditional -// compare, and merges CmpBB into Head, speculatively executing its -// instructions. The ARM64 conditional compare instructions have an immediate -// operand that specifies the NZCV flag values when the condition is false and -// the compare isn't executed. This makes it possible to chain compares with -// different condition codes. -// -// Example: -// -// if (a == 5 || b == 17) -// foo(); -// -// Head: -// cmp w0, #5 -// b.eq Tail -// CmpBB: -// cmp w1, #17 -// b.eq Tail -// ... -// Tail: -// bl _foo -// -// Becomes: -// -// Head: -// cmp w0, #5 -// ccmp w1, #17, 4, ne ; 4 = nZcv -// b.eq Tail -// ... -// Tail: -// bl _foo -// -// The ccmp condition code is the one that would cause the Head terminator to -// branch to CmpBB. -// -// FIXME: It should also be possible to speculate a block on the critical edge -// between Head and Tail, just like if-converting a diamond. -// -// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion). - -namespace { -class SSACCmpConv { - MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; - -public: - /// The first block containing a conditional branch, dominating everything - /// else. - MachineBasicBlock *Head; - - /// The block containing cmp+br.cond with a sucessor shared with Head. - MachineBasicBlock *CmpBB; - - /// The common successor for Head and CmpBB. - MachineBasicBlock *Tail; - - /// The compare instruction in CmpBB that can be converted to a ccmp. - MachineInstr *CmpMI; - -private: - /// The branch condition in Head as determined by AnalyzeBranch. - SmallVector<MachineOperand, 4> HeadCond; - - /// The condition code that makes Head branch to CmpBB. - ARM64CC::CondCode HeadCmpBBCC; - - /// The branch condition in CmpBB. - SmallVector<MachineOperand, 4> CmpBBCond; - - /// The condition code that makes CmpBB branch to Tail. - ARM64CC::CondCode CmpBBTailCC; - - /// Check if the Tail PHIs are trivially convertible. - bool trivialTailPHIs(); - - /// Remove CmpBB from the Tail PHIs. - void updateTailPHIs(); - - /// Check if an operand defining DstReg is dead. - bool isDeadDef(unsigned DstReg); - - /// Find the compare instruction in MBB that controls the conditional branch. - /// Return NULL if a convertible instruction can't be found. - MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB); - - /// Return true if all non-terminator instructions in MBB can be safely - /// speculated. - bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI); - -public: - /// runOnMachineFunction - Initialize per-function data structures. - void runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); - MRI = &MF.getRegInfo(); - } - - /// If the sub-CFG headed by MBB can be cmp-converted, initialize the - /// internal state, and return true. - bool canConvert(MachineBasicBlock *MBB); - - /// Cmo-convert the last block passed to canConvertCmp(), assuming - /// it is possible. Add any erased blocks to RemovedBlocks. - void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks); - - /// Return the expected code size delta if the conversion into a - /// conditional compare is performed. - int expectedCodeSizeDelta() const; -}; -} // end anonymous namespace - -// Check that all PHIs in Tail are selecting the same value from Head and CmpBB. -// This means that no if-conversion is required when merging CmpBB into Head. -bool SSACCmpConv::trivialTailPHIs() { - for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); - I != E && I->isPHI(); ++I) { - unsigned HeadReg = 0, CmpBBReg = 0; - // PHI operands come in (VReg, MBB) pairs. - for (unsigned oi = 1, oe = I->getNumOperands(); oi != oe; oi += 2) { - MachineBasicBlock *MBB = I->getOperand(oi + 1).getMBB(); - unsigned Reg = I->getOperand(oi).getReg(); - if (MBB == Head) { - assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands"); - HeadReg = Reg; - } - if (MBB == CmpBB) { - assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands"); - CmpBBReg = Reg; - } - } - if (HeadReg != CmpBBReg) - return false; - } - return true; -} - -// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply -// removing the CmpBB operands. The Head operands will be identical. -void SSACCmpConv::updateTailPHIs() { - for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); - I != E && I->isPHI(); ++I) { - // I is a PHI. It can have multiple entries for CmpBB. - for (unsigned oi = I->getNumOperands(); oi > 2; oi -= 2) { - // PHI operands are (Reg, MBB) at (oi-2, oi-1). - if (I->getOperand(oi - 1).getMBB() == CmpBB) { - I->RemoveOperand(oi - 1); - I->RemoveOperand(oi - 2); - } - } - } -} - -// This pass runs before the ARM64DeadRegisterDefinitions pass, so compares are -// still writing virtual registers without any uses. -bool SSACCmpConv::isDeadDef(unsigned DstReg) { - // Writes to the zero register are dead. - if (DstReg == ARM64::WZR || DstReg == ARM64::XZR) - return true; - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) - return false; - // A virtual register def without any uses will be marked dead later, and - // eventually replaced by the zero register. - return MRI->use_nodbg_empty(DstReg); -} - -// Parse a condition code returned by AnalyzeBranch, and compute the CondCode -// corresponding to TBB. -// Return -static bool parseCond(ArrayRef<MachineOperand> Cond, ARM64CC::CondCode &CC) { - // A normal br.cond simply has the condition code. - if (Cond[0].getImm() != -1) { - assert(Cond.size() == 1 && "Unknown Cond array format"); - CC = (ARM64CC::CondCode)(int)Cond[0].getImm(); - return true; - } - // For tbz and cbz instruction, the opcode is next. - switch (Cond[1].getImm()) { - default: - // This includes tbz / tbnz branches which can't be converted to - // ccmp + br.cond. - return false; - case ARM64::CBZW: - case ARM64::CBZX: - assert(Cond.size() == 3 && "Unknown Cond array format"); - CC = ARM64CC::EQ; - return true; - case ARM64::CBNZW: - case ARM64::CBNZX: - assert(Cond.size() == 3 && "Unknown Cond array format"); - CC = ARM64CC::NE; - return true; - } -} - -MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { - MachineBasicBlock::iterator I = MBB->getFirstTerminator(); - if (I == MBB->end()) - return 0; - // The terminator must be controlled by the flags. - if (!I->readsRegister(ARM64::CPSR)) { - switch (I->getOpcode()) { - case ARM64::CBZW: - case ARM64::CBZX: - case ARM64::CBNZW: - case ARM64::CBNZX: - // These can be converted into a ccmp against #0. - return I; - } - ++NumCmpTermRejs; - DEBUG(dbgs() << "Flags not used by terminator: " << *I); - return 0; - } - - // Now find the instruction controlling the terminator. - for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) { - --I; - assert(!I->isTerminator() && "Spurious terminator"); - switch (I->getOpcode()) { - // cmp is an alias for subs with a dead destination register. - case ARM64::SUBSWri: - case ARM64::SUBSXri: - // cmn is an alias for adds with a dead destination register. - case ARM64::ADDSWri: - case ARM64::ADDSXri: - // Check that the immediate operand is within range, ccmp wants a uimm5. - // Rd = SUBSri Rn, imm, shift - if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) { - DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I); - ++NumImmRangeRejs; - return 0; - } - // Fall through. - case ARM64::SUBSWrr: - case ARM64::SUBSXrr: - case ARM64::ADDSWrr: - case ARM64::ADDSXrr: - if (isDeadDef(I->getOperand(0).getReg())) - return I; - DEBUG(dbgs() << "Can't convert compare with live destination: " << *I); - ++NumLiveDstRejs; - return 0; - case ARM64::FCMPSrr: - case ARM64::FCMPDrr: - case ARM64::FCMPESrr: - case ARM64::FCMPEDrr: - return I; - } - - // Check for flag reads and clobbers. - MIOperands::PhysRegInfo PRI = - MIOperands(I).analyzePhysReg(ARM64::CPSR, TRI); - - if (PRI.Reads) { - // The ccmp doesn't produce exactly the same flags as the original - // compare, so reject the transform if there are uses of the flags - // besides the terminators. - DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I); - ++NumMultCPSRUses; - return 0; - } - - if (PRI.Clobbers) { - DEBUG(dbgs() << "Not convertible compare: " << *I); - ++NumUnknCPSRDefs; - return 0; - } - } - DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); - return 0; -} - -/// Determine if all the instructions in MBB can safely -/// be speculated. The terminators are not considered. -/// -/// Only CmpMI is allowed to clobber the flags. -/// -bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB, - const MachineInstr *CmpMI) { - // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to - // get right. - if (!MBB->livein_empty()) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); - return false; - } - - unsigned InstrCount = 0; - - // Check all instructions, except the terminators. It is assumed that - // terminators never have side effects or define any used register values. - for (MachineBasicBlock::iterator I = MBB->begin(), - E = MBB->getFirstTerminator(); - I != E; ++I) { - if (I->isDebugValue()) - continue; - - if (++InstrCount > BlockInstrLimit && !Stress) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " - << BlockInstrLimit << " instructions.\n"); - return false; - } - - // There shouldn't normally be any phis in a single-predecessor block. - if (I->isPHI()) { - DEBUG(dbgs() << "Can't hoist: " << *I); - return false; - } - - // Don't speculate loads. Note that it may be possible and desirable to - // speculate GOT or constant pool loads that are guaranteed not to trap, - // but we don't support that for now. - if (I->mayLoad()) { - DEBUG(dbgs() << "Won't speculate load: " << *I); - return false; - } - - // We never speculate stores, so an AA pointer isn't necessary. - bool DontMoveAcrossStore = true; - if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) { - DEBUG(dbgs() << "Can't speculate: " << *I); - return false; - } - - // Only CmpMI is alowed to clobber the flags. - if (&*I != CmpMI && I->modifiesRegister(ARM64::CPSR, TRI)) { - DEBUG(dbgs() << "Clobbers flags: " << *I); - return false; - } - } - return true; -} - -/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential -/// candidate for cmp-conversion. Fill out the internal state. -/// -bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) { - Head = MBB; - Tail = CmpBB = 0; - - if (Head->succ_size() != 2) - return false; - MachineBasicBlock *Succ0 = Head->succ_begin()[0]; - MachineBasicBlock *Succ1 = Head->succ_begin()[1]; - - // CmpBB can only have a single predecessor. Tail is allowed many. - if (Succ0->pred_size() != 1) - std::swap(Succ0, Succ1); - - // Succ0 is our candidate for CmpBB. - if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2) - return false; - - CmpBB = Succ0; - Tail = Succ1; - - if (!CmpBB->isSuccessor(Tail)) - return false; - - // The CFG topology checks out. - DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#" - << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n'); - ++NumConsidered; - - // Tail is allowed to have many predecessors, but we can't handle PHIs yet. - // - // FIXME: Real PHIs could be if-converted as long as the CmpBB values are - // defined before The CmpBB cmp clobbers the flags. Alternatively, it should - // always be safe to sink the ccmp down to immediately before the CmpBB - // terminators. - if (!trivialTailPHIs()) { - DEBUG(dbgs() << "Can't handle phis in Tail.\n"); - ++NumPhiRejs; - return false; - } - - if (!Tail->livein_empty()) { - DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n"); - ++NumPhysRejs; - return false; - } - - // CmpBB should never have PHIs since Head is its only predecessor. - // FIXME: Clean them up if it happens. - if (!CmpBB->empty() && CmpBB->front().isPHI()) { - DEBUG(dbgs() << "Can't handle phis in CmpBB.\n"); - ++NumPhi2Rejs; - return false; - } - - if (!CmpBB->livein_empty()) { - DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n"); - ++NumPhysRejs; - return false; - } - - // The branch we're looking to eliminate must be analyzable. - HeadCond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; - if (TII->AnalyzeBranch(*Head, TBB, FBB, HeadCond)) { - DEBUG(dbgs() << "Head branch not analyzable.\n"); - ++NumHeadBranchRejs; - return false; - } - - // This is weird, probably some sort of degenerate CFG, or an edge to a - // landing pad. - if (!TBB || HeadCond.empty()) { - DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n"); - ++NumHeadBranchRejs; - return false; - } - - if (!parseCond(HeadCond, HeadCmpBBCC)) { - DEBUG(dbgs() << "Unsupported branch type on Head\n"); - ++NumHeadBranchRejs; - return false; - } - - // Make sure the branch direction is right. - if (TBB != CmpBB) { - assert(TBB == Tail && "Unexpected TBB"); - HeadCmpBBCC = ARM64CC::getInvertedCondCode(HeadCmpBBCC); - } - - CmpBBCond.clear(); - TBB = FBB = 0; - if (TII->AnalyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) { - DEBUG(dbgs() << "CmpBB branch not analyzable.\n"); - ++NumCmpBranchRejs; - return false; - } - - if (!TBB || CmpBBCond.empty()) { - DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n"); - ++NumCmpBranchRejs; - return false; - } - - if (!parseCond(CmpBBCond, CmpBBTailCC)) { - DEBUG(dbgs() << "Unsupported branch type on CmpBB\n"); - ++NumCmpBranchRejs; - return false; - } - - if (TBB != Tail) - CmpBBTailCC = ARM64CC::getInvertedCondCode(CmpBBTailCC); - - DEBUG(dbgs() << "Head->CmpBB on " << ARM64CC::getCondCodeName(HeadCmpBBCC) - << ", CmpBB->Tail on " << ARM64CC::getCondCodeName(CmpBBTailCC) - << '\n'); - - CmpMI = findConvertibleCompare(CmpBB); - if (!CmpMI) - return false; - - if (!canSpeculateInstrs(CmpBB, CmpMI)) { - ++NumSpeculateRejs; - return false; - } - return true; -} - -void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { - DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#" - << Head->getNumber() << ":\n" << *CmpBB); - - // All CmpBB instructions are moved into Head, and CmpBB is deleted. - // Update the CFG first. - updateTailPHIs(); - Head->removeSuccessor(CmpBB); - CmpBB->removeSuccessor(Tail); - Head->transferSuccessorsAndUpdatePHIs(CmpBB); - DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); - TII->RemoveBranch(*Head); - - // If the Head terminator was one of the cbz / tbz branches with built-in - // compare, we need to insert an explicit compare instruction in its place. - if (HeadCond[0].getImm() == -1) { - ++NumCompBranches; - unsigned Opc = 0; - switch (HeadCond[1].getImm()) { - case ARM64::CBZW: - case ARM64::CBNZW: - Opc = ARM64::SUBSWri; - break; - case ARM64::CBZX: - case ARM64::CBNZX: - Opc = ARM64::SUBSXri; - break; - default: - llvm_unreachable("Cannot convert Head branch"); - } - const MCInstrDesc &MCID = TII->get(Opc); - // Create a dummy virtual register for the SUBS def. - unsigned DestReg = - MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF)); - // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. - BuildMI(*Head, Head->end(), TermDL, MCID) - .addReg(DestReg, RegState::Define | RegState::Dead) - .addOperand(HeadCond[2]) - .addImm(0) - .addImm(0); - // SUBS uses the GPR*sp register classes. - MRI->constrainRegClass(HeadCond[2].getReg(), - TII->getRegClass(MCID, 1, TRI, *MF)); - } - - Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end()); - - // Now replace CmpMI with a ccmp instruction that also considers the incoming - // flags. - unsigned Opc = 0; - unsigned FirstOp = 1; // First CmpMI operand to copy. - bool isZBranch = false; // CmpMI is a cbz/cbnz instruction. - switch (CmpMI->getOpcode()) { - default: - llvm_unreachable("Unknown compare opcode"); - case ARM64::SUBSWri: Opc = ARM64::CCMPWi; break; - case ARM64::SUBSWrr: Opc = ARM64::CCMPWr; break; - case ARM64::SUBSXri: Opc = ARM64::CCMPXi; break; - case ARM64::SUBSXrr: Opc = ARM64::CCMPXr; break; - case ARM64::ADDSWri: Opc = ARM64::CCMNWi; break; - case ARM64::ADDSWrr: Opc = ARM64::CCMNWr; break; - case ARM64::ADDSXri: Opc = ARM64::CCMNXi; break; - case ARM64::ADDSXrr: Opc = ARM64::CCMNXr; break; - case ARM64::FCMPSrr: Opc = ARM64::FCCMPSrr; FirstOp = 0; break; - case ARM64::FCMPDrr: Opc = ARM64::FCCMPDrr; FirstOp = 0; break; - case ARM64::FCMPESrr: Opc = ARM64::FCCMPESrr; FirstOp = 0; break; - case ARM64::FCMPEDrr: Opc = ARM64::FCCMPEDrr; FirstOp = 0; break; - case ARM64::CBZW: - case ARM64::CBNZW: - Opc = ARM64::CCMPWi; - FirstOp = 0; - isZBranch = true; - break; - case ARM64::CBZX: - case ARM64::CBNZX: - Opc = ARM64::CCMPXi; - FirstOp = 0; - isZBranch = true; - break; - } - - // The ccmp instruction should set the flags according to the comparison when - // Head would have branched to CmpBB. - // The NZCV immediate operand should provide flags for the case where Head - // would have branched to Tail. These flags should cause the new Head - // terminator to branch to tail. - unsigned NZCV = ARM64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); - const MCInstrDesc &MCID = TII->get(Opc); - MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(), - TII->getRegClass(MCID, 0, TRI, *MF)); - if (CmpMI->getOperand(FirstOp + 1).isReg()) - MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(), - TII->getRegClass(MCID, 1, TRI, *MF)); - MachineInstrBuilder MIB = - BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID) - .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn - if (isZBranch) - MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0 - else - MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate - MIB.addImm(NZCV).addImm(HeadCmpBBCC); - - // If CmpMI was a terminator, we need a new conditional branch to replace it. - // This now becomes a Head terminator. - if (isZBranch) { - bool isNZ = CmpMI->getOpcode() == ARM64::CBNZW || - CmpMI->getOpcode() == ARM64::CBNZX; - BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(ARM64::Bcc)) - .addImm(isNZ ? ARM64CC::NE : ARM64CC::EQ) - .addOperand(CmpMI->getOperand(1)); // Branch target. - } - CmpMI->eraseFromParent(); - Head->updateTerminator(); - - RemovedBlocks.push_back(CmpBB); - CmpBB->eraseFromParent(); - DEBUG(dbgs() << "Result:\n" << *Head); - ++NumConverted; -} - -int SSACCmpConv::expectedCodeSizeDelta() const { - int delta = 0; - // If the Head terminator was one of the cbz / tbz branches with built-in - // compare, we need to insert an explicit compare instruction in its place - // plus a branch instruction. - if (HeadCond[0].getImm() == -1) { - switch (HeadCond[1].getImm()) { - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: - // Therefore delta += 1 - delta = 1; - break; - default: - llvm_unreachable("Cannot convert Head branch"); - } - } - // If the Cmp terminator was one of the cbz / tbz branches with - // built-in compare, it will be turned into a compare instruction - // into Head, but we do not save any instruction. - // Otherwise, we save the branch instruction. - switch (CmpMI->getOpcode()) { - default: - --delta; - break; - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: - break; - } - return delta; -} - -//===----------------------------------------------------------------------===// -// ARM64ConditionalCompares Pass -//===----------------------------------------------------------------------===// - -namespace { -class ARM64ConditionalCompares : public MachineFunctionPass { - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const MCSchedModel *SchedModel; - // Does the proceeded function has Oz attribute. - bool MinSize; - MachineRegisterInfo *MRI; - MachineDominatorTree *DomTree; - MachineLoopInfo *Loops; - MachineTraceMetrics *Traces; - MachineTraceMetrics::Ensemble *MinInstr; - SSACCmpConv CmpConv; - -public: - static char ID; - ARM64ConditionalCompares() : MachineFunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const; - bool runOnMachineFunction(MachineFunction &MF); - const char *getPassName() const { return "ARM64 Conditional Compares"; } - -private: - bool tryConvert(MachineBasicBlock *); - void updateDomTree(ArrayRef<MachineBasicBlock *> Removed); - void updateLoops(ArrayRef<MachineBasicBlock *> Removed); - void invalidateTraces(); - bool shouldConvert(); -}; -} // end anonymous namespace - -char ARM64ConditionalCompares::ID = 0; - -namespace llvm { -void initializeARM64ConditionalComparesPass(PassRegistry &); -} - -INITIALIZE_PASS_BEGIN(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass", - false, false) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) -INITIALIZE_PASS_END(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass", - false, false) - -FunctionPass *llvm::createARM64ConditionalCompares() { - return new ARM64ConditionalCompares(); -} - -void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineBranchProbabilityInfo>(); - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); - AU.addRequired<MachineTraceMetrics>(); - AU.addPreserved<MachineTraceMetrics>(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -/// Update the dominator tree after if-conversion erased some blocks. -void -ARM64ConditionalCompares::updateDomTree(ArrayRef<MachineBasicBlock *> Removed) { - // convert() removes CmpBB which was previously dominated by Head. - // CmpBB children should be transferred to Head. - MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head); - for (unsigned i = 0, e = Removed.size(); i != e; ++i) { - MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); - assert(Node != HeadNode && "Cannot erase the head node"); - assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head"); - while (Node->getNumChildren()) - DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); - DomTree->eraseNode(Removed[i]); - } -} - -/// Update LoopInfo after if-conversion. -void -ARM64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) { - if (!Loops) - return; - for (unsigned i = 0, e = Removed.size(); i != e; ++i) - Loops->removeBlock(Removed[i]); -} - -/// Invalidate MachineTraceMetrics before if-conversion. -void ARM64ConditionalCompares::invalidateTraces() { - Traces->invalidate(CmpConv.Head); - Traces->invalidate(CmpConv.CmpBB); -} - -/// Apply cost model and heuristics to the if-conversion in IfConv. -/// Return true if the conversion is a good idea. -/// -bool ARM64ConditionalCompares::shouldConvert() { - // Stress testing mode disables all cost considerations. - if (Stress) - return true; - if (!MinInstr) - MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); - - // Head dominates CmpBB, so it is always included in its trace. - MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB); - - // If code size is the main concern - if (MinSize) { - int CodeSizeDelta = CmpConv.expectedCodeSizeDelta(); - DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n'); - // If we are minimizing the code size, do the conversion whatever - // the cost is. - if (CodeSizeDelta < 0) - return true; - if (CodeSizeDelta > 0) { - DEBUG(dbgs() << "Code size is increasing, give up on this one.\n"); - return false; - } - // CodeSizeDelta == 0, continue with the regular heuristics - } - - // Heuristic: The compare conversion delays the execution of the branch - // instruction because we must wait for the inputs to the second compare as - // well. The branch has no dependent instructions, but delaying it increases - // the cost of a misprediction. - // - // Set a limit on the delay we will accept. - unsigned DelayLimit = SchedModel->MispredictPenalty * 3 / 4; - - // Instruction depths can be computed for all trace instructions above CmpBB. - unsigned HeadDepth = - Trace.getInstrCycles(CmpConv.Head->getFirstTerminator()).Depth; - unsigned CmpBBDepth = - Trace.getInstrCycles(CmpConv.CmpBB->getFirstTerminator()).Depth; - DEBUG(dbgs() << "Head depth: " << HeadDepth - << "\nCmpBB depth: " << CmpBBDepth << '\n'); - if (CmpBBDepth > HeadDepth + DelayLimit) { - DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit - << " cycles.\n"); - return false; - } - - // Check the resource depth at the bottom of CmpBB - these instructions will - // be speculated. - unsigned ResDepth = Trace.getResourceDepth(true); - DEBUG(dbgs() << "Resources: " << ResDepth << '\n'); - - // Heuristic: The speculatively executed instructions must all be able to - // merge into the Head block. The Head critical path should dominate the - // resource cost of the speculated instructions. - if (ResDepth > HeadDepth) { - DEBUG(dbgs() << "Too many instructions to speculate.\n"); - return false; - } - return true; -} - -bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { - bool Changed = false; - while (CmpConv.canConvert(MBB) && shouldConvert()) { - invalidateTraces(); - SmallVector<MachineBasicBlock *, 4> RemovedBlocks; - CmpConv.convert(RemovedBlocks); - Changed = true; - updateDomTree(RemovedBlocks); - updateLoops(RemovedBlocks); - } - return Changed; -} - -bool ARM64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********** ARM64 Conditional Compares **********\n" - << "********** Function: " << MF.getName() << '\n'); - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); - SchedModel = - MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel(); - MRI = &MF.getRegInfo(); - DomTree = &getAnalysis<MachineDominatorTree>(); - Loops = getAnalysisIfAvailable<MachineLoopInfo>(); - Traces = &getAnalysis<MachineTraceMetrics>(); - MinInstr = 0; - MinSize = MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize); - - bool Changed = false; - CmpConv.runOnMachineFunction(MF); - - // Visit blocks in dominator tree pre-order. The pre-order enables multiple - // cmp-conversions from the same head block. - // Note that updateDomTree() modifies the children of the DomTree node - // currently being visited. The df_iterator supports that, it doesn't look at - // child_begin() / child_end() until after a node has been visited. - for (df_iterator<MachineDominatorTree *> I = df_begin(DomTree), - E = df_end(DomTree); - I != E; ++I) - if (tryConvert(I->getBlock())) - Changed = true; - - return Changed; -} diff --git a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp b/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp deleted file mode 100644 index 3e410e5..0000000 --- a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp +++ /dev/null @@ -1,104 +0,0 @@ -//===-- ARM64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// When allowed by the instruction, replace a dead definition of a GPR with -// the zero register. This makes the code a bit friendlier towards the -// hardware's register renamer. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-dead-defs" -#include "ARM64.h" -#include "ARM64RegisterInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced"); - -namespace { -class ARM64DeadRegisterDefinitions : public MachineFunctionPass { -private: - bool processMachineBasicBlock(MachineBasicBlock *MBB); - -public: - static char ID; // Pass identification, replacement for typeid. - explicit ARM64DeadRegisterDefinitions() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &F); - - const char *getPassName() const { return "Dead register definitions"; } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; -char ARM64DeadRegisterDefinitions::ID = 0; -} // end anonymous namespace - -bool -ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock *MBB) { - bool Changed = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - MachineInstr *MI = I; - for (int i = 0, e = MI->getDesc().getNumDefs(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDead() && MO.isDef()) { - assert(!MO.isImplicit() && "Unexpected implicit def!"); - DEBUG(dbgs() << " Dead def operand #" << i << " in:\n "; - MI->print(dbgs())); - // Be careful not to change the register if it's a tied operand. - if (MI->isRegTiedToUseOperand(i)) { - DEBUG(dbgs() << " Ignoring, def is tied operand.\n"); - continue; - } - // Make sure the instruction take a register class that contains - // the zero register and replace it if so. - unsigned NewReg; - switch (MI->getDesc().OpInfo[i].RegClass) { - default: - DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); - continue; - case ARM64::GPR32RegClassID: - NewReg = ARM64::WZR; - break; - case ARM64::GPR64RegClassID: - NewReg = ARM64::XZR; - break; - } - DEBUG(dbgs() << " Replacing with zero register. New:\n "); - MO.setReg(NewReg); - DEBUG(MI->print(dbgs())); - ++NumDeadDefsReplaced; - } - } - } - return Changed; -} - -// Scan the function for instructions that have a dead definition of a -// register. Replace that register with the zero register when possible. -bool ARM64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &mf) { - MachineFunction *MF = &mf; - bool Changed = false; - DEBUG(dbgs() << "***** ARM64DeadRegisterDefinitions *****\n"); - - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - if (processMachineBasicBlock(I)) - Changed = true; - return Changed; -} - -FunctionPass *llvm::createARM64DeadRegisterDefinitions() { - return new ARM64DeadRegisterDefinitions(); -} diff --git a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp b/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp deleted file mode 100644 index e082baf..0000000 --- a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp +++ /dev/null @@ -1,737 +0,0 @@ -//===-- ARM64ExpandPseudoInsts.cpp - Expand pseudo instructions ---*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that expands pseudo instructions into target -// instructions to allow proper scheduling and other late optimizations. This -// pass should be run after register allocation but before the post-regalloc -// scheduling pass. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "ARM64InstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/MathExtras.h" -using namespace llvm; - -namespace { -class ARM64ExpandPseudo : public MachineFunctionPass { -public: - static char ID; - ARM64ExpandPseudo() : MachineFunctionPass(ID) {} - - const ARM64InstrInfo *TII; - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual const char *getPassName() const { - return "ARM64 pseudo instruction expansion pass"; - } - -private: - bool expandMBB(MachineBasicBlock &MBB); - bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); - bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - unsigned BitSize); -}; -char ARM64ExpandPseudo::ID = 0; -} - -/// \brief Transfer implicit operands on the pseudo instruction to the -/// instructions created from the expansion. -static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, - MachineInstrBuilder &DefMI) { - const MCInstrDesc &Desc = OldMI.getDesc(); - for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; - ++i) { - const MachineOperand &MO = OldMI.getOperand(i); - assert(MO.isReg() && MO.getReg()); - if (MO.isUse()) - UseMI.addOperand(MO); - else - DefMI.addOperand(MO); - } -} - -/// \brief Helper function which extracts the specified 16-bit chunk from a -/// 64-bit value. -static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) { - assert(ChunkIdx < 4 && "Out of range chunk index specified!"); - - return (Imm >> (ChunkIdx * 16)) & 0xFFFF; -} - -/// \brief Helper function which replicates a 16-bit chunk within a 64-bit -/// value. Indices correspond to element numbers in a v4i16. -static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) { - assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!"); - const unsigned ShiftAmt = ToIdx * 16; - - // Replicate the source chunk to the destination position. - const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt; - // Clear the destination chunk. - Imm &= ~(0xFFFFLL << ShiftAmt); - // Insert the replicated chunk. - return Imm | Chunk; -} - -/// \brief Helper function which tries to materialize a 64-bit value with an -/// ORR + MOVK instruction sequence. -static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const ARM64InstrInfo *TII, unsigned ChunkIdx) { - assert(ChunkIdx < 4 && "Out of range chunk index specified!"); - const unsigned ShiftAmt = ChunkIdx * 16; - - uint64_t Encoding; - if (ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) { - // Create the ORR-immediate instruction. - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri)) - .addOperand(MI.getOperand(0)) - .addReg(ARM64::XZR) - .addImm(Encoding); - - // Create the MOVK instruction. - const unsigned Imm16 = getChunk(UImm, ChunkIdx); - const unsigned DstReg = MI.getOperand(0).getReg(); - const bool DstIsDead = MI.getOperand(0).isDead(); - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) - .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt)); - - transferImpOps(MI, MIB, MIB1); - MI.eraseFromParent(); - return true; - } - - return false; -} - -/// \brief Check whether the given 16-bit chunk replicated to full 64-bit width -/// can be materialized with an ORR instruction. -static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { - Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk; - - return ARM64_AM::processLogicalImmediate(Chunk, 64, Encoding); -} - -/// \brief Check for identical 16-bit chunks within the constant and if so -/// materialize them with a single ORR instruction. The remaining one or two -/// 16-bit chunks will be materialized with MOVK instructions. -/// -/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order -/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with -/// an ORR instruction. -/// -static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const ARM64InstrInfo *TII) { - typedef DenseMap<uint64_t, unsigned> CountMap; - CountMap Counts; - - // Scan the constant and count how often every chunk occurs. - for (unsigned Idx = 0; Idx < 4; ++Idx) - ++Counts[getChunk(UImm, Idx)]; - - // Traverse the chunks to find one which occurs more than once. - for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end(); - Chunk != End; ++Chunk) { - const uint64_t ChunkVal = Chunk->first; - const unsigned Count = Chunk->second; - - uint64_t Encoding = 0; - - // We are looking for chunks which have two or three instances and can be - // materialized with an ORR instruction. - if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding)) - continue; - - const bool CountThree = Count == 3; - // Create the ORR-immediate instruction. - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri)) - .addOperand(MI.getOperand(0)) - .addReg(ARM64::XZR) - .addImm(Encoding); - - const unsigned DstReg = MI.getOperand(0).getReg(); - const bool DstIsDead = MI.getOperand(0).isDead(); - - unsigned ShiftAmt = 0; - uint64_t Imm16 = 0; - // Find the first chunk not materialized with the ORR instruction. - for (; ShiftAmt < 64; ShiftAmt += 16) { - Imm16 = (UImm >> ShiftAmt) & 0xFFFF; - - if (Imm16 != ChunkVal) - break; - } - - // Create the first MOVK instruction. - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) - .addReg(DstReg, - RegState::Define | getDeadRegState(DstIsDead && CountThree)) - .addReg(DstReg) - .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt)); - - // In case we have three instances the whole constant is now materialized - // and we can exit. - if (CountThree) { - transferImpOps(MI, MIB, MIB1); - MI.eraseFromParent(); - return true; - } - - // Find the remaining chunk which needs to be materialized. - for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) { - Imm16 = (UImm >> ShiftAmt) & 0xFFFF; - - if (Imm16 != ChunkVal) - break; - } - - // Create the second MOVK instruction. - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) - .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt)); - - transferImpOps(MI, MIB, MIB2); - MI.eraseFromParent(); - return true; - } - - return false; -} - -/// \brief Check whether this chunk matches the pattern '1...0...'. This pattern -/// starts a contiguous sequence of ones if we look at the bits from the LSB -/// towards the MSB. -static bool isStartChunk(uint64_t Chunk) { - if (Chunk == 0 || Chunk == UINT64_MAX) - return false; - - return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64; -} - -/// \brief Check whether this chunk matches the pattern '0...1...' This pattern -/// ends a contiguous sequence of ones if we look at the bits from the LSB -/// towards the MSB. -static bool isEndChunk(uint64_t Chunk) { - if (Chunk == 0 || Chunk == UINT64_MAX) - return false; - - return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64; -} - -/// \brief Clear or set all bits in the chunk at the given index. -static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) { - const uint64_t Mask = 0xFFFF; - - if (Clear) - // Clear chunk in the immediate. - Imm &= ~(Mask << (Idx * 16)); - else - // Set all bits in the immediate for the particular chunk. - Imm |= Mask << (Idx * 16); - - return Imm; -} - -/// \brief Check whether the constant contains a sequence of contiguous ones, -/// which might be interrupted by one or two chunks. If so, materialize the -/// sequence of contiguous ones with an ORR instruction. -/// Materialize the chunks which are either interrupting the sequence or outside -/// of the sequence with a MOVK instruction. -/// -/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk -/// which ends the sequence (0...1...). Then we are looking for constants which -/// contain at least one S and E chunk. -/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|. -/// -/// We are also looking for constants like |S|A|B|E| where the contiguous -/// sequence of ones wraps around the MSB into the LSB. -/// -static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const ARM64InstrInfo *TII) { - const int NotSet = -1; - const uint64_t Mask = 0xFFFF; - - int StartIdx = NotSet; - int EndIdx = NotSet; - // Try to find the chunks which start/end a contiguous sequence of ones. - for (int Idx = 0; Idx < 4; ++Idx) { - int64_t Chunk = getChunk(UImm, Idx); - // Sign extend the 16-bit chunk to 64-bit. - Chunk = (Chunk << 48) >> 48; - - if (isStartChunk(Chunk)) - StartIdx = Idx; - else if (isEndChunk(Chunk)) - EndIdx = Idx; - } - - // Early exit in case we can't find a start/end chunk. - if (StartIdx == NotSet || EndIdx == NotSet) - return false; - - // Outside of the contiguous sequence of ones everything needs to be zero. - uint64_t Outside = 0; - // Chunks between the start and end chunk need to have all their bits set. - uint64_t Inside = Mask; - - // If our contiguous sequence of ones wraps around from the MSB into the LSB, - // just swap indices and pretend we are materializing a contiguous sequence - // of zeros surrounded by a contiguous sequence of ones. - if (StartIdx > EndIdx) { - std::swap(StartIdx, EndIdx); - std::swap(Outside, Inside); - } - - uint64_t OrrImm = UImm; - int FirstMovkIdx = NotSet; - int SecondMovkIdx = NotSet; - - // Find out which chunks we need to patch up to obtain a contiguous sequence - // of ones. - for (int Idx = 0; Idx < 4; ++Idx) { - const uint64_t Chunk = getChunk(UImm, Idx); - - // Check whether we are looking at a chunk which is not part of the - // contiguous sequence of ones. - if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) { - OrrImm = updateImm(OrrImm, Idx, Outside == 0); - - // Remember the index we need to patch. - if (FirstMovkIdx == NotSet) - FirstMovkIdx = Idx; - else - SecondMovkIdx = Idx; - - // Check whether we are looking a chunk which is part of the contiguous - // sequence of ones. - } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) { - OrrImm = updateImm(OrrImm, Idx, Inside != Mask); - - // Remember the index we need to patch. - if (FirstMovkIdx == NotSet) - FirstMovkIdx = Idx; - else - SecondMovkIdx = Idx; - } - } - assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!"); - - // Create the ORR-immediate instruction. - uint64_t Encoding = 0; - ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding); - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri)) - .addOperand(MI.getOperand(0)) - .addReg(ARM64::XZR) - .addImm(Encoding); - - const unsigned DstReg = MI.getOperand(0).getReg(); - const bool DstIsDead = MI.getOperand(0).isDead(); - - const bool SingleMovk = SecondMovkIdx == NotSet; - // Create the first MOVK instruction. - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) - .addReg(DstReg, - RegState::Define | getDeadRegState(DstIsDead && SingleMovk)) - .addReg(DstReg) - .addImm(getChunk(UImm, FirstMovkIdx)) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, FirstMovkIdx * 16)); - - // Early exit in case we only need to emit a single MOVK instruction. - if (SingleMovk) { - transferImpOps(MI, MIB, MIB1); - MI.eraseFromParent(); - return true; - } - - // Create the second MOVK instruction. - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) - .addImm(getChunk(UImm, SecondMovkIdx)) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, SecondMovkIdx * 16)); - - transferImpOps(MI, MIB, MIB2); - MI.eraseFromParent(); - return true; -} - -/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more -/// real move-immediate instructions to synthesize the immediate. -bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned BitSize) { - MachineInstr &MI = *MBBI; - uint64_t Imm = MI.getOperand(1).getImm(); - const unsigned Mask = 0xFFFF; - - // Try a MOVI instruction (aka ORR-immediate with the zero register). - uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); - uint64_t Encoding; - if (ARM64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { - unsigned Opc = (BitSize == 32 ? ARM64::ORRWri : ARM64::ORRXri); - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) - .addOperand(MI.getOperand(0)) - .addReg(BitSize == 32 ? ARM64::WZR : ARM64::XZR) - .addImm(Encoding); - transferImpOps(MI, MIB, MIB); - MI.eraseFromParent(); - return true; - } - - // Scan the immediate and count the number of 16-bit chunks which are either - // all ones or all zeros. - unsigned OneChunks = 0; - unsigned ZeroChunks = 0; - for (unsigned Shift = 0; Shift < BitSize; Shift += 16) { - const unsigned Chunk = (Imm >> Shift) & Mask; - if (Chunk == Mask) - OneChunks++; - else if (Chunk == 0) - ZeroChunks++; - } - - // Since we can't materialize the constant with a single ORR instruction, - // let's see whether we can materialize 3/4 of the constant with an ORR - // instruction and use an additional MOVK instruction to materialize the - // remaining 1/4. - // - // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|. - // - // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR, - // we would create the following instruction sequence: - // - // ORR x0, xzr, |A|X|A|X| - // MOVK x0, |B|, LSL #16 - // - // Only look at 64-bit constants which can't be materialized with a single - // instruction e.g. which have less than either three all zero or all one - // chunks. - // - // Ignore 32-bit constants here, they always can be materialized with a - // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized - // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair. - // Thus we fall back to the default code below which in the best case creates - // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one). - // - if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) { - // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2 - // identical? - if (getChunk(UImm, 0) == getChunk(UImm, 2)) { - // See if we can come up with a constant which can be materialized with - // ORR-immediate by replicating element 3 into element 1. - uint64_t OrrImm = replicateChunk(UImm, 3, 1); - if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1)) - return true; - - // See if we can come up with a constant which can be materialized with - // ORR-immediate by replicating element 1 into element 3. - OrrImm = replicateChunk(UImm, 1, 3); - if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3)) - return true; - - // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3 - // identical? - } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) { - // See if we can come up with a constant which can be materialized with - // ORR-immediate by replicating element 2 into element 0. - uint64_t OrrImm = replicateChunk(UImm, 2, 0); - if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0)) - return true; - - // See if we can come up with a constant which can be materialized with - // ORR-immediate by replicating element 1 into element 3. - OrrImm = replicateChunk(UImm, 0, 2); - if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2)) - return true; - } - } - - // Check for identical 16-bit chunks within the constant and if so materialize - // them with a single ORR instruction. The remaining one or two 16-bit chunks - // will be materialized with MOVK instructions. - if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII)) - return true; - - // Check whether the constant contains a sequence of contiguous ones, which - // might be interrupted by one or two chunks. If so, materialize the sequence - // of contiguous ones with an ORR instruction. Materialize the chunks which - // are either interrupting the sequence or outside of the sequence with a - // MOVK instruction. - if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII)) - return true; - - // Use a MOVZ or MOVN instruction to set the high bits, followed by one or - // more MOVK instructions to insert additional 16-bit portions into the - // lower bits. - bool isNeg = false; - - // Use MOVN to materialize the high bits if we have more all one chunks - // than all zero chunks. - if (OneChunks > ZeroChunks) { - isNeg = true; - Imm = ~Imm; - } - - unsigned FirstOpc; - if (BitSize == 32) { - Imm &= (1LL << 32) - 1; - FirstOpc = (isNeg ? ARM64::MOVNWi : ARM64::MOVZWi); - } else { - FirstOpc = (isNeg ? ARM64::MOVNXi : ARM64::MOVZXi); - } - unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN - unsigned LastShift = 0; // LSL amount for last MOVK - if (Imm != 0) { - unsigned LZ = countLeadingZeros(Imm); - unsigned TZ = countTrailingZeros(Imm); - Shift = ((63 - LZ) / 16) * 16; - LastShift = (TZ / 16) * 16; - } - unsigned Imm16 = (Imm >> Shift) & Mask; - unsigned DstReg = MI.getOperand(0).getReg(); - bool DstIsDead = MI.getOperand(0).isDead(); - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc)) - .addReg(DstReg, RegState::Define | - getDeadRegState(DstIsDead && Shift == LastShift)) - .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift)); - - // If a MOVN was used for the high bits of a negative value, flip the rest - // of the bits back for use with MOVK. - if (isNeg) - Imm = ~Imm; - - if (Shift == LastShift) { - transferImpOps(MI, MIB1, MIB1); - MI.eraseFromParent(); - return true; - } - - MachineInstrBuilder MIB2; - unsigned Opc = (BitSize == 32 ? ARM64::MOVKWi : ARM64::MOVKXi); - while (Shift != LastShift) { - Shift -= 16; - Imm16 = (Imm >> Shift) & Mask; - if (Imm16 == (isNeg ? Mask : 0)) - continue; // This 16-bit portion is already set correctly. - MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(DstIsDead && Shift == LastShift)) - .addReg(DstReg) - .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift)); - } - - transferImpOps(MI, MIB1, MIB2); - MI.eraseFromParent(); - return true; -} - -/// \brief If MBBI references a pseudo instruction that should be expanded here, -/// do the expansion and return true. Otherwise return false. -bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI) { - MachineInstr &MI = *MBBI; - unsigned Opcode = MI.getOpcode(); - switch (Opcode) { - default: - break; - - case ARM64::ADDWrr: - case ARM64::SUBWrr: - case ARM64::ADDXrr: - case ARM64::SUBXrr: - case ARM64::ADDSWrr: - case ARM64::SUBSWrr: - case ARM64::ADDSXrr: - case ARM64::SUBSXrr: - case ARM64::ANDWrr: - case ARM64::ANDXrr: - case ARM64::BICWrr: - case ARM64::BICXrr: - case ARM64::EONWrr: - case ARM64::EONXrr: - case ARM64::EORWrr: - case ARM64::EORXrr: - case ARM64::ORNWrr: - case ARM64::ORNXrr: - case ARM64::ORRWrr: - case ARM64::ORRXrr: { - unsigned Opcode; - switch (MI.getOpcode()) { - default: - return false; - case ARM64::ADDWrr: Opcode = ARM64::ADDWrs; break; - case ARM64::SUBWrr: Opcode = ARM64::SUBWrs; break; - case ARM64::ADDXrr: Opcode = ARM64::ADDXrs; break; - case ARM64::SUBXrr: Opcode = ARM64::SUBXrs; break; - case ARM64::ADDSWrr: Opcode = ARM64::ADDSWrs; break; - case ARM64::SUBSWrr: Opcode = ARM64::SUBSWrs; break; - case ARM64::ADDSXrr: Opcode = ARM64::ADDSXrs; break; - case ARM64::SUBSXrr: Opcode = ARM64::SUBSXrs; break; - case ARM64::ANDWrr: Opcode = ARM64::ANDWrs; break; - case ARM64::ANDXrr: Opcode = ARM64::ANDXrs; break; - case ARM64::BICWrr: Opcode = ARM64::BICWrs; break; - case ARM64::BICXrr: Opcode = ARM64::BICXrs; break; - case ARM64::EONWrr: Opcode = ARM64::EONWrs; break; - case ARM64::EONXrr: Opcode = ARM64::EONXrs; break; - case ARM64::EORWrr: Opcode = ARM64::EORWrs; break; - case ARM64::EORXrr: Opcode = ARM64::EORXrs; break; - case ARM64::ORNWrr: Opcode = ARM64::ORNWrs; break; - case ARM64::ORNXrr: Opcode = ARM64::ORNXrs; break; - case ARM64::ORRWrr: Opcode = ARM64::ORRWrs; break; - case ARM64::ORRXrr: Opcode = ARM64::ORRXrs; break; - } - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), - MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(2)) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - transferImpOps(MI, MIB1, MIB1); - MI.eraseFromParent(); - return true; - } - - case ARM64::FCVTSHpseudo: { - MachineOperand Src = MI.getOperand(1); - Src.setImplicit(); - unsigned SrcH = TII->getRegisterInfo().getSubReg(Src.getReg(), ARM64::hsub); - auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::FCVTSHr)) - .addOperand(MI.getOperand(0)) - .addReg(SrcH, RegState::Undef) - .addOperand(Src); - transferImpOps(MI, MIB, MIB); - MI.eraseFromParent(); - return true; - } - case ARM64::LOADgot: { - // Expand into ADRP + LDR. - unsigned DstReg = MI.getOperand(0).getReg(); - const MachineOperand &MO1 = MI.getOperand(1); - unsigned Flags = MO1.getTargetFlags(); - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg); - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::LDRXui)) - .addOperand(MI.getOperand(0)) - .addReg(DstReg); - - if (MO1.isGlobal()) { - MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | ARM64II::MO_PAGE); - MIB2.addGlobalAddress(MO1.getGlobal(), 0, - Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - } else if (MO1.isSymbol()) { - MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | ARM64II::MO_PAGE); - MIB2.addExternalSymbol(MO1.getSymbolName(), - Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - } else { - assert(MO1.isCPI() && - "Only expect globals, externalsymbols, or constant pools"); - MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), - Flags | ARM64II::MO_PAGE); - MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), - Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - } - - transferImpOps(MI, MIB1, MIB2); - MI.eraseFromParent(); - return true; - } - - case ARM64::MOVaddr: - case ARM64::MOVaddrJT: - case ARM64::MOVaddrCP: - case ARM64::MOVaddrBA: - case ARM64::MOVaddrTLS: - case ARM64::MOVaddrEXT: { - // Expand into ADRP + ADD. - unsigned DstReg = MI.getOperand(0).getReg(); - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg) - .addOperand(MI.getOperand(1)); - - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADDXri)) - .addOperand(MI.getOperand(0)) - .addReg(DstReg) - .addOperand(MI.getOperand(2)) - .addImm(0); - - transferImpOps(MI, MIB1, MIB2); - MI.eraseFromParent(); - return true; - } - - case ARM64::MOVi32imm: - return expandMOVImm(MBB, MBBI, 32); - case ARM64::MOVi64imm: - return expandMOVImm(MBB, MBBI, 64); - case ARM64::RET_ReallyLR: - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::RET)) - .addReg(ARM64::LR); - MI.eraseFromParent(); - return true; - } - return false; -} - -/// \brief Iterate over the instructions in basic block MBB and expand any -/// pseudo instructions. Return true if anything was modified. -bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { - bool Modified = false; - - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { - MachineBasicBlock::iterator NMBBI = std::next(MBBI); - Modified |= expandMI(MBB, MBBI); - MBBI = NMBBI; - } - - return Modified; -} - -bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo()); - - bool Modified = false; - for (auto &MBB : MF) - Modified |= expandMBB(MBB); - return Modified; -} - -/// \brief Returns an instance of the pseudo instruction expansion pass. -FunctionPass *llvm::createARM64ExpandPseudoPass() { - return new ARM64ExpandPseudo(); -} diff --git a/lib/Target/ARM64/ARM64FastISel.cpp b/lib/Target/ARM64/ARM64FastISel.cpp deleted file mode 100644 index 51b0f76..0000000 --- a/lib/Target/ARM64/ARM64FastISel.cpp +++ /dev/null @@ -1,1929 +0,0 @@ -//===-- ARM6464FastISel.cpp - ARM64 FastISel implementation ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the ARM64-specific support for the FastISel class. Some -// of the target-specific code is generated by tablegen in the file -// ARM64GenFastISel.inc, which is #included here. -// -//===----------------------------------------------------------------------===// - -#include "ARM64.h" -#include "ARM64TargetMachine.h" -#include "ARM64Subtarget.h" -#include "ARM64CallingConv.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/FastISel.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Operator.h" -#include "llvm/Support/CommandLine.h" -using namespace llvm; - -namespace { - -class ARM64FastISel : public FastISel { - - class Address { - public: - typedef enum { - RegBase, - FrameIndexBase - } BaseKind; - - private: - BaseKind Kind; - union { - unsigned Reg; - int FI; - } Base; - int64_t Offset; - - public: - Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; } - void setKind(BaseKind K) { Kind = K; } - BaseKind getKind() const { return Kind; } - bool isRegBase() const { return Kind == RegBase; } - bool isFIBase() const { return Kind == FrameIndexBase; } - void setReg(unsigned Reg) { - assert(isRegBase() && "Invalid base register access!"); - Base.Reg = Reg; - } - unsigned getReg() const { - assert(isRegBase() && "Invalid base register access!"); - return Base.Reg; - } - void setFI(unsigned FI) { - assert(isFIBase() && "Invalid base frame index access!"); - Base.FI = FI; - } - unsigned getFI() const { - assert(isFIBase() && "Invalid base frame index access!"); - return Base.FI; - } - void setOffset(int64_t O) { Offset = O; } - int64_t getOffset() { return Offset; } - - bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); } - }; - - /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can - /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; - LLVMContext *Context; - -private: - // Selection routines. - bool SelectLoad(const Instruction *I); - bool SelectStore(const Instruction *I); - bool SelectBranch(const Instruction *I); - bool SelectIndirectBr(const Instruction *I); - bool SelectCmp(const Instruction *I); - bool SelectSelect(const Instruction *I); - bool SelectFPExt(const Instruction *I); - bool SelectFPTrunc(const Instruction *I); - bool SelectFPToInt(const Instruction *I, bool Signed); - bool SelectIntToFP(const Instruction *I, bool Signed); - bool SelectRem(const Instruction *I, unsigned ISDOpcode); - bool SelectCall(const Instruction *I, const char *IntrMemName); - bool SelectIntrinsicCall(const IntrinsicInst &I); - bool SelectRet(const Instruction *I); - bool SelectTrunc(const Instruction *I); - bool SelectIntExt(const Instruction *I); - bool SelectMul(const Instruction *I); - - // Utility helper routines. - bool isTypeLegal(Type *Ty, MVT &VT); - bool isLoadStoreTypeLegal(Type *Ty, MVT &VT); - bool ComputeAddress(const Value *Obj, Address &Addr); - bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor, - bool UseUnscaled); - void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags, bool UseUnscaled); - bool IsMemCpySmall(uint64_t Len, unsigned Alignment); - bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, - unsigned Alignment); - // Emit functions. - bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt); - bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, - bool UseUnscaled = false); - bool EmitStore(MVT VT, unsigned SrcReg, Address Addr, - bool UseUnscaled = false); - unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); - unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); - - unsigned ARM64MaterializeFP(const ConstantFP *CFP, MVT VT); - unsigned ARM64MaterializeGV(const GlobalValue *GV); - - // Call handling routines. -private: - CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; - bool ProcessCallArgs(SmallVectorImpl<Value *> &Args, - SmallVectorImpl<unsigned> &ArgRegs, - SmallVectorImpl<MVT> &ArgVTs, - SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, - SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC, - unsigned &NumBytes); - bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, - const Instruction *I, CallingConv::ID CC, unsigned &NumBytes); - -public: - // Backend specific FastISel code. - virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); - virtual unsigned TargetMaterializeConstant(const Constant *C); - - explicit ARM64FastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) - : FastISel(funcInfo, libInfo) { - Subtarget = &TM.getSubtarget<ARM64Subtarget>(); - Context = &funcInfo.Fn->getContext(); - } - - virtual bool TargetSelectInstruction(const Instruction *I); - -#include "ARM64GenFastISel.inc" -}; - -} // end anonymous namespace - -#include "ARM64GenCallingConv.inc" - -CCAssignFn *ARM64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { - if (CC == CallingConv::WebKit_JS) - return CC_ARM64_WebKit_JS; - return Subtarget->isTargetDarwin() ? CC_ARM64_DarwinPCS : CC_ARM64_AAPCS; -} - -unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) { - assert(TLI.getValueType(AI->getType(), true) == MVT::i64 && - "Alloca should always return a pointer."); - - // Don't handle dynamic allocas. - if (!FuncInfo.StaticAllocaMap.count(AI)) - return 0; - - DenseMap<const AllocaInst *, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - - if (SI != FuncInfo.StaticAllocaMap.end()) { - unsigned ResultReg = createResultReg(&ARM64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri), - ResultReg) - .addFrameIndex(SI->second) - .addImm(0) - .addImm(0); - return ResultReg; - } - - return 0; -} - -unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) { - const APFloat Val = CFP->getValueAPF(); - bool is64bit = (VT == MVT::f64); - - // This checks to see if we can use FMOV instructions to materialize - // a constant, otherwise we have to materialize via the constant pool. - if (TLI.isFPImmLegal(Val, VT)) { - int Imm; - unsigned Opc; - if (is64bit) { - Imm = ARM64_AM::getFP64Imm(Val); - Opc = ARM64::FMOVDi; - } else { - Imm = ARM64_AM::getFP32Imm(Val); - Opc = ARM64::FMOVSi; - } - unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addImm(Imm); - return ResultReg; - } - - // Materialize via constant pool. MachineConstantPool wants an explicit - // alignment. - unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); - if (Align == 0) - Align = DL.getTypeAllocSize(CFP->getType()); - - unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); - unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP), - ADRPReg).addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGE); - - unsigned Opc = is64bit ? ARM64::LDRDui : ARM64::LDRSui; - unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(ADRPReg) - .addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - return ResultReg; -} - -unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) { - // We can't handle thread-local variables quickly yet. Unfortunately we have - // to peer through any aliases to find out if that rule applies. - const GlobalValue *TLSGV = GV; - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - TLSGV = GA->getAliasedGlobal(); - - if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(TLSGV)) - if (GVar->isThreadLocal()) - return 0; - - unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); - - EVT DestEVT = TLI.getValueType(GV->getType(), true); - if (!DestEVT.isSimple()) - return 0; - MVT DestVT = DestEVT.getSimpleVT(); - - unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass); - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - - if (OpFlags & ARM64II::MO_GOT) { - // ADRP + LDRX - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP), - ADRPReg) - .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGE); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::LDRXui), - ResultReg) - .addReg(ADRPReg) - .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGEOFF | - ARM64II::MO_NC); - } else { - // ADRP + ADDX - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP), - ADRPReg).addGlobalAddress(GV, 0, ARM64II::MO_PAGE); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri), - ResultReg) - .addReg(ADRPReg) - .addGlobalAddress(GV, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC) - .addImm(0); - } - return ResultReg; -} - -unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); - - // Only handle simple types. - if (!CEVT.isSimple()) - return 0; - MVT VT = CEVT.getSimpleVT(); - - // FIXME: Handle ConstantInt. - if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) - return ARM64MaterializeFP(CFP, VT); - else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) - return ARM64MaterializeGV(GV); - - return 0; -} - -// Computes the address to get to an object. -bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { - const User *U = NULL; - unsigned Opcode = Instruction::UserOp1; - if (const Instruction *I = dyn_cast<Instruction>(Obj)) { - // Don't walk into other basic blocks unless the object is an alloca from - // another block, otherwise it may not have a virtual register assigned. - if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || - FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { - Opcode = I->getOpcode(); - U = I; - } - } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { - Opcode = C->getOpcode(); - U = C; - } - - if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) - if (Ty->getAddressSpace() > 255) - // Fast instruction selection doesn't support the special - // address spaces. - return false; - - switch (Opcode) { - default: - break; - case Instruction::BitCast: { - // Look through bitcasts. - return ComputeAddress(U->getOperand(0), Addr); - } - case Instruction::IntToPtr: { - // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) - return ComputeAddress(U->getOperand(0), Addr); - break; - } - case Instruction::PtrToInt: { - // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) - return ComputeAddress(U->getOperand(0), Addr); - break; - } - case Instruction::GetElementPtr: { - Address SavedAddr = Addr; - uint64_t TmpOffset = Addr.getOffset(); - - // Iterate through the GEP folding the constants into offsets where - // we can. - gep_type_iterator GTI = gep_type_begin(U); - for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; - ++i, ++GTI) { - const Value *Op = *i; - if (StructType *STy = dyn_cast<StructType>(*GTI)) { - const StructLayout *SL = DL.getStructLayout(STy); - unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); - TmpOffset += SL->getElementOffset(Idx); - } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); - for (;;) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { - // Constant-offset addressing. - TmpOffset += CI->getSExtValue() * S; - break; - } - if (canFoldAddIntoGEP(U, Op)) { - // A compatible add with a constant operand. Fold the constant. - ConstantInt *CI = - cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); - TmpOffset += CI->getSExtValue() * S; - // Iterate on the other operand. - Op = cast<AddOperator>(Op)->getOperand(0); - continue; - } - // Unsupported - goto unsupported_gep; - } - } - } - - // Try to grab the base operand now. - Addr.setOffset(TmpOffset); - if (ComputeAddress(U->getOperand(0), Addr)) - return true; - - // We failed, restore everything and try the other options. - Addr = SavedAddr; - - unsupported_gep: - break; - } - case Instruction::Alloca: { - const AllocaInst *AI = cast<AllocaInst>(Obj); - DenseMap<const AllocaInst *, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) { - Addr.setKind(Address::FrameIndexBase); - Addr.setFI(SI->second); - return true; - } - break; - } - } - - // Try to get this in a register if nothing else has worked. - if (!Addr.isValid()) - Addr.setReg(getRegForValue(Obj)); - return Addr.isValid(); -} - -bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT evt = TLI.getValueType(Ty, true); - - // Only handle simple types. - if (evt == MVT::Other || !evt.isSimple()) - return false; - VT = evt.getSimpleVT(); - - // Handle all legal types, i.e. a register that will directly hold this - // value. - return TLI.isTypeLegal(VT); -} - -bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) { - if (isTypeLegal(Ty, VT)) - return true; - - // If this is a type than can be sign or zero-extended to a basic operation - // go ahead and accept it now. For stores, this reflects truncation. - if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) - return true; - - return false; -} - -bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor, - bool UseUnscaled) { - bool needsLowering = false; - int64_t Offset = Addr.getOffset(); - switch (VT.SimpleTy) { - default: - return false; - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - case MVT::i64: - case MVT::f32: - case MVT::f64: - if (!UseUnscaled) - // Using scaled, 12-bit, unsigned immediate offsets. - needsLowering = ((Offset & 0xfff) != Offset); - else - // Using unscaled, 9-bit, signed immediate offsets. - needsLowering = (Offset > 256 || Offset < -256); - break; - } - - // FIXME: If this is a stack pointer and the offset needs to be simplified - // then put the alloca address into a register, set the base type back to - // register and continue. This should almost never happen. - if (needsLowering && Addr.getKind() == Address::FrameIndexBase) { - return false; - } - - // Since the offset is too large for the load/store instruction get the - // reg+offset into a register. - if (needsLowering) { - uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor; - unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false, - UnscaledOffset, MVT::i64); - if (ResultReg == 0) - return false; - Addr.setReg(ResultReg); - Addr.setOffset(0); - } - return true; -} - -void ARM64FastISel::AddLoadStoreOperands(Address &Addr, - const MachineInstrBuilder &MIB, - unsigned Flags, bool UseUnscaled) { - int64_t Offset = Addr.getOffset(); - // Frame base works a bit differently. Handle it separately. - if (Addr.getKind() == Address::FrameIndexBase) { - int FI = Addr.getFI(); - // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size - // and alignment should be based on the VT. - MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI, Offset), Flags, - MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); - // Now add the rest of the operands. - MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); - } else { - // Now add the rest of the operands. - MIB.addReg(Addr.getReg()); - MIB.addImm(Offset); - } -} - -bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, - bool UseUnscaled) { - // Negative offsets require unscaled, 9-bit, signed immediate offsets. - // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. - if (!UseUnscaled && Addr.getOffset() < 0) - UseUnscaled = true; - - unsigned Opc; - const TargetRegisterClass *RC; - bool VTIsi1 = false; - int64_t ScaleFactor = 0; - switch (VT.SimpleTy) { - default: - return false; - case MVT::i1: - VTIsi1 = true; - // Intentional fall-through. - case MVT::i8: - Opc = UseUnscaled ? ARM64::LDURBBi : ARM64::LDRBBui; - RC = &ARM64::GPR32RegClass; - ScaleFactor = 1; - break; - case MVT::i16: - Opc = UseUnscaled ? ARM64::LDURHHi : ARM64::LDRHHui; - RC = &ARM64::GPR32RegClass; - ScaleFactor = 2; - break; - case MVT::i32: - Opc = UseUnscaled ? ARM64::LDURWi : ARM64::LDRWui; - RC = &ARM64::GPR32RegClass; - ScaleFactor = 4; - break; - case MVT::i64: - Opc = UseUnscaled ? ARM64::LDURXi : ARM64::LDRXui; - RC = &ARM64::GPR64RegClass; - ScaleFactor = 8; - break; - case MVT::f32: - Opc = UseUnscaled ? ARM64::LDURSi : ARM64::LDRSui; - RC = TLI.getRegClassFor(VT); - ScaleFactor = 4; - break; - case MVT::f64: - Opc = UseUnscaled ? ARM64::LDURDi : ARM64::LDRDui; - RC = TLI.getRegClassFor(VT); - ScaleFactor = 8; - break; - } - // Scale the offset. - if (!UseUnscaled) { - int64_t Offset = Addr.getOffset(); - if (Offset & (ScaleFactor - 1)) - // Retry using an unscaled, 9-bit, signed immediate offset. - return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true); - - Addr.setOffset(Offset / ScaleFactor); - } - - // Simplify this down to something we can handle. - if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled)) - return false; - - // Create the base instruction, then add the operands. - ResultReg = createResultReg(RC); - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc), ResultReg); - AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled); - - // Loading an i1 requires special handling. - if (VTIsi1) { - unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) - .addReg(ResultReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - ResultReg = ANDReg; - } - return true; -} - -bool ARM64FastISel::SelectLoad(const Instruction *I) { - MVT VT; - // Verify we have a legal type before going any further. Currently, we handle - // simple types that will directly fit in a register (i32/f32/i64/f64) or - // those that can be sign or zero-extended to a basic operation (i1/i8/i16). - if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic()) - return false; - - // See if we can handle this address. - Address Addr; - if (!ComputeAddress(I->getOperand(0), Addr)) - return false; - - unsigned ResultReg; - if (!EmitLoad(VT, ResultReg, Addr)) - return false; - - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, - bool UseUnscaled) { - // Negative offsets require unscaled, 9-bit, signed immediate offsets. - // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. - if (!UseUnscaled && Addr.getOffset() < 0) - UseUnscaled = true; - - unsigned StrOpc; - bool VTIsi1 = false; - int64_t ScaleFactor = 0; - // Using scaled, 12-bit, unsigned immediate offsets. - switch (VT.SimpleTy) { - default: - return false; - case MVT::i1: - VTIsi1 = true; - case MVT::i8: - StrOpc = UseUnscaled ? ARM64::STURBBi : ARM64::STRBBui; - ScaleFactor = 1; - break; - case MVT::i16: - StrOpc = UseUnscaled ? ARM64::STURHHi : ARM64::STRHHui; - ScaleFactor = 2; - break; - case MVT::i32: - StrOpc = UseUnscaled ? ARM64::STURWi : ARM64::STRWui; - ScaleFactor = 4; - break; - case MVT::i64: - StrOpc = UseUnscaled ? ARM64::STURXi : ARM64::STRXui; - ScaleFactor = 8; - break; - case MVT::f32: - StrOpc = UseUnscaled ? ARM64::STURSi : ARM64::STRSui; - ScaleFactor = 4; - break; - case MVT::f64: - StrOpc = UseUnscaled ? ARM64::STURDi : ARM64::STRDui; - ScaleFactor = 8; - break; - } - // Scale the offset. - if (!UseUnscaled) { - int64_t Offset = Addr.getOffset(); - if (Offset & (ScaleFactor - 1)) - // Retry using an unscaled, 9-bit, signed immediate offset. - return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true); - - Addr.setOffset(Offset / ScaleFactor); - } - - // Simplify this down to something we can handle. - if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled)) - return false; - - // Storing an i1 requires special handling. - if (VTIsi1) { - unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) - .addReg(SrcReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - SrcReg = ANDReg; - } - // Create the base instruction, then add the operands. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(StrOpc)).addReg(SrcReg); - AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled); - return true; -} - -bool ARM64FastISel::SelectStore(const Instruction *I) { - MVT VT; - Value *Op0 = I->getOperand(0); - // Verify we have a legal type before going any further. Currently, we handle - // simple types that will directly fit in a register (i32/f32/i64/f64) or - // those that can be sign or zero-extended to a basic operation (i1/i8/i16). - if (!isLoadStoreTypeLegal(Op0->getType(), VT) || - cast<StoreInst>(I)->isAtomic()) - return false; - - // Get the value to be stored into a register. - unsigned SrcReg = getRegForValue(Op0); - if (SrcReg == 0) - return false; - - // See if we can handle this address. - Address Addr; - if (!ComputeAddress(I->getOperand(1), Addr)) - return false; - - if (!EmitStore(VT, SrcReg, Addr)) - return false; - return true; -} - -static ARM64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { - switch (Pred) { - case CmpInst::FCMP_ONE: - case CmpInst::FCMP_UEQ: - default: - // AL is our "false" for now. The other two need more compares. - return ARM64CC::AL; - case CmpInst::ICMP_EQ: - case CmpInst::FCMP_OEQ: - return ARM64CC::EQ; - case CmpInst::ICMP_SGT: - case CmpInst::FCMP_OGT: - return ARM64CC::GT; - case CmpInst::ICMP_SGE: - case CmpInst::FCMP_OGE: - return ARM64CC::GE; - case CmpInst::ICMP_UGT: - case CmpInst::FCMP_UGT: - return ARM64CC::HI; - case CmpInst::FCMP_OLT: - return ARM64CC::MI; - case CmpInst::ICMP_ULE: - case CmpInst::FCMP_OLE: - return ARM64CC::LS; - case CmpInst::FCMP_ORD: - return ARM64CC::VC; - case CmpInst::FCMP_UNO: - return ARM64CC::VS; - case CmpInst::FCMP_UGE: - return ARM64CC::PL; - case CmpInst::ICMP_SLT: - case CmpInst::FCMP_ULT: - return ARM64CC::LT; - case CmpInst::ICMP_SLE: - case CmpInst::FCMP_ULE: - return ARM64CC::LE; - case CmpInst::FCMP_UNE: - case CmpInst::ICMP_NE: - return ARM64CC::NE; - case CmpInst::ICMP_UGE: - return ARM64CC::CS; - case CmpInst::ICMP_ULT: - return ARM64CC::CC; - } -} - -bool ARM64FastISel::SelectBranch(const Instruction *I) { - const BranchInst *BI = cast<BranchInst>(I); - MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; - MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - - if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { - if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { - // We may not handle every CC for now. - ARM64CC::CondCode CC = getCompareCC(CI->getPredicate()); - if (CC == ARM64CC::AL) - return false; - - // Emit the cmp. - if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) - return false; - - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc)) - .addImm(CC) - .addMBB(TBB); - FuncInfo.MBB->addSuccessor(TBB); - - FastEmitBranch(FBB, DbgLoc); - return true; - } - } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { - MVT SrcVT; - if (TI->hasOneUse() && TI->getParent() == I->getParent() && - (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) { - unsigned CondReg = getRegForValue(TI->getOperand(0)); - if (CondReg == 0) - return false; - - // Issue an extract_subreg to get the lower 32-bits. - if (SrcVT == MVT::i64) - CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true, - ARM64::sub_32); - - unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) - .addReg(CondReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri)) - .addReg(ANDReg) - .addReg(ANDReg) - .addImm(0) - .addImm(0); - - unsigned CC = ARM64CC::NE; - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = ARM64CC::EQ; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc)) - .addImm(CC) - .addMBB(TBB); - FuncInfo.MBB->addSuccessor(TBB); - FastEmitBranch(FBB, DbgLoc); - return true; - } - } else if (const ConstantInt *CI = - dyn_cast<ConstantInt>(BI->getCondition())) { - uint64_t Imm = CI->getZExtValue(); - MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::B)) - .addMBB(Target); - FuncInfo.MBB->addSuccessor(Target); - return true; - } - - unsigned CondReg = getRegForValue(BI->getCondition()); - if (CondReg == 0) - return false; - - // We've been divorced from our compare! Our block was split, and - // now our compare lives in a predecessor block. We musn't - // re-compare here, as the children of the compare aren't guaranteed - // live across the block boundary (we *could* check for this). - // Regardless, the compare has been done in the predecessor block, - // and it left a value for us in a virtual register. Ergo, we test - // the one-bit value left in the virtual register. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri), - ARM64::WZR) - .addReg(CondReg) - .addImm(0) - .addImm(0); - - unsigned CC = ARM64CC::NE; - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = ARM64CC::EQ; - } - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc)) - .addImm(CC) - .addMBB(TBB); - FuncInfo.MBB->addSuccessor(TBB); - FastEmitBranch(FBB, DbgLoc); - return true; -} - -bool ARM64FastISel::SelectIndirectBr(const Instruction *I) { - const IndirectBrInst *BI = cast<IndirectBrInst>(I); - unsigned AddrReg = getRegForValue(BI->getOperand(0)); - if (AddrReg == 0) - return false; - - // Emit the indirect branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BR)) - .addReg(AddrReg); - - // Make sure the CFG is up-to-date. - for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i) - FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]); - - return true; -} - -bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) { - Type *Ty = Src1Value->getType(); - EVT SrcEVT = TLI.getValueType(Ty, true); - if (!SrcEVT.isSimple()) - return false; - MVT SrcVT = SrcEVT.getSimpleVT(); - - // Check to see if the 2nd operand is a constant that we can encode directly - // in the compare. - uint64_t Imm; - bool UseImm = false; - bool isNegativeImm = false; - if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) { - if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || - SrcVT == MVT::i8 || SrcVT == MVT::i1) { - const APInt &CIVal = ConstInt->getValue(); - - Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue(); - if (CIVal.isNegative()) { - isNegativeImm = true; - Imm = -Imm; - } - // FIXME: We can handle more immediates using shifts. - UseImm = ((Imm & 0xfff) == Imm); - } - } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) { - if (SrcVT == MVT::f32 || SrcVT == MVT::f64) - if (ConstFP->isZero() && !ConstFP->isNegative()) - UseImm = true; - } - - unsigned ZReg; - unsigned CmpOpc; - bool isICmp = true; - bool needsExt = false; - switch (SrcVT.SimpleTy) { - default: - return false; - case MVT::i1: - case MVT::i8: - case MVT::i16: - needsExt = true; - // Intentional fall-through. - case MVT::i32: - ZReg = ARM64::WZR; - if (UseImm) - CmpOpc = isNegativeImm ? ARM64::ADDSWri : ARM64::SUBSWri; - else - CmpOpc = ARM64::SUBSWrr; - break; - case MVT::i64: - ZReg = ARM64::XZR; - if (UseImm) - CmpOpc = isNegativeImm ? ARM64::ADDSXri : ARM64::SUBSXri; - else - CmpOpc = ARM64::SUBSXrr; - break; - case MVT::f32: - isICmp = false; - CmpOpc = UseImm ? ARM64::FCMPSri : ARM64::FCMPSrr; - break; - case MVT::f64: - isICmp = false; - CmpOpc = UseImm ? ARM64::FCMPDri : ARM64::FCMPDrr; - break; - } - - unsigned SrcReg1 = getRegForValue(Src1Value); - if (SrcReg1 == 0) - return false; - - unsigned SrcReg2; - if (!UseImm) { - SrcReg2 = getRegForValue(Src2Value); - if (SrcReg2 == 0) - return false; - } - - // We have i1, i8, or i16, we need to either zero extend or sign extend. - if (needsExt) { - SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); - if (SrcReg1 == 0) - return false; - if (!UseImm) { - SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); - if (SrcReg2 == 0) - return false; - } - } - - if (isICmp) { - if (UseImm) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) - .addReg(ZReg) - .addReg(SrcReg1) - .addImm(Imm) - .addImm(0); - else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) - .addReg(ZReg) - .addReg(SrcReg1) - .addReg(SrcReg2); - } else { - if (UseImm) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) - .addReg(SrcReg1); - else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) - .addReg(SrcReg1) - .addReg(SrcReg2); - } - return true; -} - -bool ARM64FastISel::SelectCmp(const Instruction *I) { - const CmpInst *CI = cast<CmpInst>(I); - - // We may not handle every CC for now. - ARM64CC::CondCode CC = getCompareCC(CI->getPredicate()); - if (CC == ARM64CC::AL) - return false; - - // Emit the cmp. - if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) - return false; - - // Now set a register based on the comparison. - ARM64CC::CondCode invertedCC = getInvertedCondCode(CC); - unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::CSINCWr), - ResultReg) - .addReg(ARM64::WZR) - .addReg(ARM64::WZR) - .addImm(invertedCC); - - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectSelect(const Instruction *I) { - const SelectInst *SI = cast<SelectInst>(I); - - EVT DestEVT = TLI.getValueType(SI->getType(), true); - if (!DestEVT.isSimple()) - return false; - - MVT DestVT = DestEVT.getSimpleVT(); - if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 && - DestVT != MVT::f64) - return false; - - unsigned CondReg = getRegForValue(SI->getCondition()); - if (CondReg == 0) - return false; - unsigned TrueReg = getRegForValue(SI->getTrueValue()); - if (TrueReg == 0) - return false; - unsigned FalseReg = getRegForValue(SI->getFalseValue()); - if (FalseReg == 0) - return false; - - unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) - .addReg(CondReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri)) - .addReg(ANDReg) - .addReg(ANDReg) - .addImm(0) - .addImm(0); - - unsigned SelectOpc; - switch (DestVT.SimpleTy) { - default: - return false; - case MVT::i32: - SelectOpc = ARM64::CSELWr; - break; - case MVT::i64: - SelectOpc = ARM64::CSELXr; - break; - case MVT::f32: - SelectOpc = ARM64::FCSELSrrr; - break; - case MVT::f64: - SelectOpc = ARM64::FCSELDrrr; - break; - } - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc), - ResultReg) - .addReg(TrueReg) - .addReg(FalseReg) - .addImm(ARM64CC::NE); - - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectFPExt(const Instruction *I) { - Value *V = I->getOperand(0); - if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) - return false; - - unsigned Op = getRegForValue(V); - if (Op == 0) - return false; - - unsigned ResultReg = createResultReg(&ARM64::FPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTDSr), - ResultReg).addReg(Op); - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectFPTrunc(const Instruction *I) { - Value *V = I->getOperand(0); - if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) - return false; - - unsigned Op = getRegForValue(V); - if (Op == 0) - return false; - - unsigned ResultReg = createResultReg(&ARM64::FPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTSDr), - ResultReg).addReg(Op); - UpdateValueMap(I, ResultReg); - return true; -} - -// FPToUI and FPToSI -bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) { - MVT DestVT; - if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) - return false; - - unsigned SrcReg = getRegForValue(I->getOperand(0)); - if (SrcReg == 0) - return false; - - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); - - unsigned Opc; - if (SrcVT == MVT::f64) { - if (Signed) - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWDr : ARM64::FCVTZSUXDr; - else - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWDr : ARM64::FCVTZUUXDr; - } else { - if (Signed) - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWSr : ARM64::FCVTZSUXSr; - else - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWSr : ARM64::FCVTZUUXSr; - } - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(SrcReg); - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) { - MVT DestVT; - if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) - return false; - - unsigned SrcReg = getRegForValue(I->getOperand(0)); - if (SrcReg == 0) - return false; - - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); - - // Handle sign-extension. - if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { - SrcReg = - EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); - if (SrcReg == 0) - return false; - } - - unsigned Opc; - if (SrcVT == MVT::i64) { - if (Signed) - Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUXSri : ARM64::SCVTFUXDri; - else - Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUXSri : ARM64::UCVTFUXDri; - } else { - if (Signed) - Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUWSri : ARM64::SCVTFUWDri; - else - Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUWSri : ARM64::UCVTFUWDri; - } - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(SrcReg); - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl<Value *> &Args, - SmallVectorImpl<unsigned> &ArgRegs, - SmallVectorImpl<MVT> &ArgVTs, - SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, - SmallVectorImpl<unsigned> &RegArgs, - CallingConv::ID CC, unsigned &NumBytes) { - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); - CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); - - // Get a count of how many bytes are to be pushed on the stack. - NumBytes = CCInfo.getNextStackOffset(); - - // Issue CALLSEQ_START - unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(NumBytes); - - // Process the args. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - unsigned Arg = ArgRegs[VA.getValNo()]; - MVT ArgVT = ArgVTs[VA.getValNo()]; - - // Handle arg promotion: SExt, ZExt, AExt. - switch (VA.getLocInfo()) { - case CCValAssign::Full: - break; - case CCValAssign::SExt: { - MVT DestVT = VA.getLocVT(); - MVT SrcVT = ArgVT; - Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false); - if (Arg == 0) - return false; - ArgVT = DestVT; - break; - } - case CCValAssign::AExt: - // Intentional fall-through. - case CCValAssign::ZExt: { - MVT DestVT = VA.getLocVT(); - MVT SrcVT = ArgVT; - Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true); - if (Arg == 0) - return false; - ArgVT = DestVT; - break; - } - default: - llvm_unreachable("Unknown arg promotion!"); - } - - // Now copy/store arg to correct locations. - if (VA.isRegLoc() && !VA.needsCustom()) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg); - RegArgs.push_back(VA.getLocReg()); - } else if (VA.needsCustom()) { - // FIXME: Handle custom args. - return false; - } else { - assert(VA.isMemLoc() && "Assuming store on stack."); - - // Need to store on the stack. - Address Addr; - Addr.setKind(Address::RegBase); - Addr.setReg(ARM64::SP); - Addr.setOffset(VA.getLocMemOffset()); - - if (!EmitStore(ArgVT, Arg, Addr)) - return false; - } - } - return true; -} - -bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes) { - // Issue CALLSEQ_END - unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) - .addImm(NumBytes) - .addImm(0); - - // Now the return value. - if (RetVT != MVT::isVoid) { - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); - CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); - - // Only handle a single return value. - if (RVLocs.size() != 1) - return false; - - // Copy all of the result registers out of their specified physreg. - MVT CopyVT = RVLocs[0].getValVT(); - unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), - ResultReg).addReg(RVLocs[0].getLocReg()); - UsedRegs.push_back(RVLocs[0].getLocReg()); - - // Finally update the result. - UpdateValueMap(I, ResultReg); - } - - return true; -} - -bool ARM64FastISel::SelectCall(const Instruction *I, - const char *IntrMemName = 0) { - const CallInst *CI = cast<CallInst>(I); - const Value *Callee = CI->getCalledValue(); - - // Don't handle inline asm or intrinsics. - if (isa<InlineAsm>(Callee)) - return false; - - // Only handle global variable Callees. - const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); - if (!GV) - return false; - - // Check the calling convention. - ImmutableCallSite CS(CI); - CallingConv::ID CC = CS.getCallingConv(); - - // Let SDISel handle vararg functions. - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - if (FTy->isVarArg()) - return false; - - // Handle *simple* calls for now. - MVT RetVT; - Type *RetTy = I->getType(); - if (RetTy->isVoidTy()) - RetVT = MVT::isVoid; - else if (!isTypeLegal(RetTy, RetVT)) - return false; - - // Set up the argument vectors. - SmallVector<Value *, 8> Args; - SmallVector<unsigned, 8> ArgRegs; - SmallVector<MVT, 8> ArgVTs; - SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; - Args.reserve(CS.arg_size()); - ArgRegs.reserve(CS.arg_size()); - ArgVTs.reserve(CS.arg_size()); - ArgFlags.reserve(CS.arg_size()); - - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { - // If we're lowering a memory intrinsic instead of a regular call, skip the - // last two arguments, which shouldn't be passed to the underlying function. - if (IntrMemName && e - i <= 2) - break; - - unsigned Arg = getRegForValue(*i); - if (Arg == 0) - return false; - - ISD::ArgFlagsTy Flags; - unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attribute::SExt)) - Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) - Flags.setZExt(); - - // FIXME: Only handle *easy* calls for now. - if (CS.paramHasAttr(AttrInd, Attribute::InReg) || - CS.paramHasAttr(AttrInd, Attribute::StructRet) || - CS.paramHasAttr(AttrInd, Attribute::Nest) || - CS.paramHasAttr(AttrInd, Attribute::ByVal)) - return false; - - MVT ArgVT; - Type *ArgTy = (*i)->getType(); - if (!isTypeLegal(ArgTy, ArgVT) && - !(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16)) - return false; - - // We don't handle vector parameters yet. - if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) - return false; - - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); - - Args.push_back(*i); - ArgRegs.push_back(Arg); - ArgVTs.push_back(ArgVT); - ArgFlags.push_back(Flags); - } - - // Handle the arguments now that we've gotten them. - SmallVector<unsigned, 4> RegArgs; - unsigned NumBytes; - if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) - return false; - - // Issue the call. - MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BL)); - if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); - - // Add implicit physical register uses to the call. - for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i], RegState::Implicit); - - // Add a register mask with the call-preserved registers. - // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); - - // Finish off the call including any return values. - SmallVector<unsigned, 4> UsedRegs; - if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) - return false; - - // Set all unused physreg defs as dead. - static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); - - return true; -} - -bool ARM64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) { - if (Alignment) - return Len / Alignment <= 4; - else - return Len < 32; -} - -bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, - unsigned Alignment) { - // Make sure we don't bloat code by inlining very large memcpy's. - if (!IsMemCpySmall(Len, Alignment)) - return false; - - int64_t UnscaledOffset = 0; - Address OrigDest = Dest; - Address OrigSrc = Src; - - while (Len) { - MVT VT; - if (!Alignment || Alignment >= 8) { - if (Len >= 8) - VT = MVT::i64; - else if (Len >= 4) - VT = MVT::i32; - else if (Len >= 2) - VT = MVT::i16; - else { - VT = MVT::i8; - } - } else { - // Bound based on alignment. - if (Len >= 4 && Alignment == 4) - VT = MVT::i32; - else if (Len >= 2 && Alignment == 2) - VT = MVT::i16; - else { - VT = MVT::i8; - } - } - - bool RV; - unsigned ResultReg; - RV = EmitLoad(VT, ResultReg, Src); - assert(RV == true && "Should be able to handle this load."); - RV = EmitStore(VT, ResultReg, Dest); - assert(RV == true && "Should be able to handle this store."); - (void)RV; - - int64_t Size = VT.getSizeInBits() / 8; - Len -= Size; - UnscaledOffset += Size; - - // We need to recompute the unscaled offset for each iteration. - Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); - Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); - } - - return true; -} - -bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) { - // FIXME: Handle more intrinsics. - switch (I.getIntrinsicID()) { - default: - return false; - case Intrinsic::memcpy: - case Intrinsic::memmove: { - const MemTransferInst &MTI = cast<MemTransferInst>(I); - // Don't handle volatile. - if (MTI.isVolatile()) - return false; - - // Disable inlining for memmove before calls to ComputeAddress. Otherwise, - // we would emit dead code because we don't currently handle memmoves. - bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); - if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) { - // Small memcpy's are common enough that we want to do them without a call - // if possible. - uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue(); - unsigned Alignment = MTI.getAlignment(); - if (IsMemCpySmall(Len, Alignment)) { - Address Dest, Src; - if (!ComputeAddress(MTI.getRawDest(), Dest) || - !ComputeAddress(MTI.getRawSource(), Src)) - return false; - if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment)) - return true; - } - } - - if (!MTI.getLength()->getType()->isIntegerTy(64)) - return false; - - if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) - // Fast instruction selection doesn't support the special - // address spaces. - return false; - - const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove"; - return SelectCall(&I, IntrMemName); - } - case Intrinsic::memset: { - const MemSetInst &MSI = cast<MemSetInst>(I); - // Don't handle volatile. - if (MSI.isVolatile()) - return false; - - if (!MSI.getLength()->getType()->isIntegerTy(64)) - return false; - - if (MSI.getDestAddressSpace() > 255) - // Fast instruction selection doesn't support the special - // address spaces. - return false; - - return SelectCall(&I, "memset"); - } - case Intrinsic::trap: { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BRK)) - .addImm(1); - return true; - } - } - return false; -} - -bool ARM64FastISel::SelectRet(const Instruction *I) { - const ReturnInst *Ret = cast<ReturnInst>(I); - const Function &F = *I->getParent()->getParent(); - - if (!FuncInfo.CanLowerReturn) - return false; - - if (F.isVarArg()) - return false; - - // Build a list of return value registers. - SmallVector<unsigned, 4> RetRegs; - - if (Ret->getNumOperands() > 0) { - CallingConv::ID CC = F.getCallingConv(); - SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); - - // Analyze operands of the call, assigning locations to each operand. - SmallVector<CCValAssign, 16> ValLocs; - CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, - I->getContext()); - CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; - CCInfo.AnalyzeReturn(Outs, RetCC); - - // Only handle a single return value for now. - if (ValLocs.size() != 1) - return false; - - CCValAssign &VA = ValLocs[0]; - const Value *RV = Ret->getOperand(0); - - // Don't bother handling odd stuff for now. - if (VA.getLocInfo() != CCValAssign::Full) - return false; - // Only handle register returns for now. - if (!VA.isRegLoc()) - return false; - unsigned Reg = getRegForValue(RV); - if (Reg == 0) - return false; - - unsigned SrcReg = Reg + VA.getValNo(); - unsigned DestReg = VA.getLocReg(); - // Avoid a cross-class copy. This is very unlikely. - if (!MRI.getRegClass(SrcReg)->contains(DestReg)) - return false; - - EVT RVEVT = TLI.getValueType(RV->getType()); - if (!RVEVT.isSimple()) - return false; - MVT RVVT = RVEVT.getSimpleVT(); - MVT DestVT = VA.getValVT(); - // Special handling for extended integers. - if (RVVT != DestVT) { - if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) - return false; - - if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) - return false; - - bool isZExt = Outs[0].Flags.isZExt(); - SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt); - if (SrcReg == 0) - return false; - } - - // Make the copy. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); - - // Add register to return instruction. - RetRegs.push_back(VA.getLocReg()); - } - - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM64::RET_ReallyLR)); - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); - return true; -} - -bool ARM64FastISel::SelectTrunc(const Instruction *I) { - Type *DestTy = I->getType(); - Value *Op = I->getOperand(0); - Type *SrcTy = Op->getType(); - - EVT SrcEVT = TLI.getValueType(SrcTy, true); - EVT DestEVT = TLI.getValueType(DestTy, true); - if (!SrcEVT.isSimple()) - return false; - if (!DestEVT.isSimple()) - return false; - - MVT SrcVT = SrcEVT.getSimpleVT(); - MVT DestVT = DestEVT.getSimpleVT(); - - if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && - SrcVT != MVT::i8) - return false; - if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && - DestVT != MVT::i1) - return false; - - unsigned SrcReg = getRegForValue(Op); - if (!SrcReg) - return false; - - // If we're truncating from i64 to a smaller non-legal type then generate an - // AND. Otherwise, we know the high bits are undefined and a truncate doesn't - // generate any code. - if (SrcVT == MVT::i64) { - uint64_t Mask = 0; - switch (DestVT.SimpleTy) { - default: - // Trunc i64 to i32 is handled by the target-independent fast-isel. - return false; - case MVT::i1: - Mask = 0x1; - break; - case MVT::i8: - Mask = 0xff; - break; - case MVT::i16: - Mask = 0xffff; - break; - } - // Issue an extract_subreg to get the lower 32-bits. - unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true, - ARM64::sub_32); - // Create the AND instruction which performs the actual truncation. - unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) - .addReg(Reg32) - .addImm(ARM64_AM::encodeLogicalImmediate(Mask, 32)); - SrcReg = ANDReg; - } - - UpdateValueMap(I, SrcReg); - return true; -} - -unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { - assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || - DestVT == MVT::i64) && - "Unexpected value type."); - // Handle i8 and i16 as i32. - if (DestVT == MVT::i8 || DestVT == MVT::i16) - DestVT = MVT::i32; - - if (isZExt) { - unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ResultReg) - .addReg(SrcReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - - if (DestVT == MVT::i64) { - // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the - // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. - unsigned Reg64 = MRI.createVirtualRegister(&ARM64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM64::SUBREG_TO_REG), Reg64) - .addImm(0) - .addReg(ResultReg) - .addImm(ARM64::sub_32); - ResultReg = Reg64; - } - return ResultReg; - } else { - if (DestVT == MVT::i64) { - // FIXME: We're SExt i1 to i64. - return 0; - } - unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SBFMWri), - ResultReg) - .addReg(SrcReg) - .addImm(0) - .addImm(0); - return ResultReg; - } -} - -unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, - bool isZExt) { - assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); - unsigned Opc; - unsigned Imm = 0; - - switch (SrcVT.SimpleTy) { - default: - return 0; - case MVT::i1: - return Emiti1Ext(SrcReg, DestVT, isZExt); - case MVT::i8: - if (DestVT == MVT::i64) - Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri; - else - Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri; - Imm = 7; - break; - case MVT::i16: - if (DestVT == MVT::i64) - Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri; - else - Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri; - Imm = 15; - break; - case MVT::i32: - assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); - Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri; - Imm = 31; - break; - } - - // Handle i8 and i16 as i32. - if (DestVT == MVT::i8 || DestVT == MVT::i16) - DestVT = MVT::i32; - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(SrcReg) - .addImm(0) - .addImm(Imm); - - return ResultReg; -} - -bool ARM64FastISel::SelectIntExt(const Instruction *I) { - // On ARM, in general, integer casts don't involve legal types; this code - // handles promotable integers. The high bits for a type smaller than - // the register size are assumed to be undefined. - Type *DestTy = I->getType(); - Value *Src = I->getOperand(0); - Type *SrcTy = Src->getType(); - - bool isZExt = isa<ZExtInst>(I); - unsigned SrcReg = getRegForValue(Src); - if (!SrcReg) - return false; - - EVT SrcEVT = TLI.getValueType(SrcTy, true); - EVT DestEVT = TLI.getValueType(DestTy, true); - if (!SrcEVT.isSimple()) - return false; - if (!DestEVT.isSimple()) - return false; - - MVT SrcVT = SrcEVT.getSimpleVT(); - MVT DestVT = DestEVT.getSimpleVT(); - unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt); - if (ResultReg == 0) - return false; - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) { - EVT DestEVT = TLI.getValueType(I->getType(), true); - if (!DestEVT.isSimple()) - return false; - - MVT DestVT = DestEVT.getSimpleVT(); - if (DestVT != MVT::i64 && DestVT != MVT::i32) - return false; - - unsigned DivOpc; - bool is64bit = (DestVT == MVT::i64); - switch (ISDOpcode) { - default: - return false; - case ISD::SREM: - DivOpc = is64bit ? ARM64::SDIVXr : ARM64::SDIVWr; - break; - case ISD::UREM: - DivOpc = is64bit ? ARM64::UDIVXr : ARM64::UDIVWr; - break; - } - unsigned MSubOpc = is64bit ? ARM64::MSUBXrrr : ARM64::MSUBWrrr; - unsigned Src0Reg = getRegForValue(I->getOperand(0)); - if (!Src0Reg) - return false; - - unsigned Src1Reg = getRegForValue(I->getOperand(1)); - if (!Src1Reg) - return false; - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), ResultReg) - .addReg(Src0Reg) - .addReg(Src1Reg); - // The remainder is computed as numerator – (quotient * denominator) using the - // MSUB instruction. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg) - .addReg(ResultReg) - .addReg(Src1Reg) - .addReg(Src0Reg); - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectMul(const Instruction *I) { - EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true); - if (!SrcEVT.isSimple()) - return false; - MVT SrcVT = SrcEVT.getSimpleVT(); - - // Must be simple value type. Don't handle vectors. - if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && - SrcVT != MVT::i8) - return false; - - unsigned Opc; - unsigned ZReg; - switch (SrcVT.SimpleTy) { - default: - return false; - case MVT::i8: - case MVT::i16: - case MVT::i32: - ZReg = ARM64::WZR; - Opc = ARM64::MADDWrrr; - break; - case MVT::i64: - ZReg = ARM64::XZR; - Opc = ARM64::MADDXrrr; - break; - } - - unsigned Src0Reg = getRegForValue(I->getOperand(0)); - if (!Src0Reg) - return false; - - unsigned Src1Reg = getRegForValue(I->getOperand(1)); - if (!Src1Reg) - return false; - - // Create the base instruction, then add the operands. - unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(Src0Reg) - .addReg(Src1Reg) - .addReg(ZReg); - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) { - switch (I->getOpcode()) { - default: - break; - case Instruction::Load: - return SelectLoad(I); - case Instruction::Store: - return SelectStore(I); - case Instruction::Br: - return SelectBranch(I); - case Instruction::IndirectBr: - return SelectIndirectBr(I); - case Instruction::FCmp: - case Instruction::ICmp: - return SelectCmp(I); - case Instruction::Select: - return SelectSelect(I); - case Instruction::FPExt: - return SelectFPExt(I); - case Instruction::FPTrunc: - return SelectFPTrunc(I); - case Instruction::FPToSI: - return SelectFPToInt(I, /*Signed=*/true); - case Instruction::FPToUI: - return SelectFPToInt(I, /*Signed=*/false); - case Instruction::SIToFP: - return SelectIntToFP(I, /*Signed=*/true); - case Instruction::UIToFP: - return SelectIntToFP(I, /*Signed=*/false); - case Instruction::SRem: - return SelectRem(I, ISD::SREM); - case Instruction::URem: - return SelectRem(I, ISD::UREM); - case Instruction::Call: - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) - return SelectIntrinsicCall(*II); - return SelectCall(I); - case Instruction::Ret: - return SelectRet(I); - case Instruction::Trunc: - return SelectTrunc(I); - case Instruction::ZExt: - case Instruction::SExt: - return SelectIntExt(I); - case Instruction::Mul: - // FIXME: This really should be handled by the target-independent selector. - return SelectMul(I); - } - return false; - // Silence warnings. - (void)&CC_ARM64_DarwinPCS_VarArg; -} - -namespace llvm { -llvm::FastISel *ARM64::createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) { - return new ARM64FastISel(funcInfo, libInfo); -} -} diff --git a/lib/Target/ARM64/ARM64FrameLowering.cpp b/lib/Target/ARM64/ARM64FrameLowering.cpp deleted file mode 100644 index 798986c..0000000 --- a/lib/Target/ARM64/ARM64FrameLowering.cpp +++ /dev/null @@ -1,816 +0,0 @@ -//===- ARM64FrameLowering.cpp - ARM64 Frame Lowering -----------*- C++ -*-====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 implementation of TargetFrameLowering class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "frame-info" -#include "ARM64FrameLowering.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64Subtarget.h" -#include "ARM64TargetMachine.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Function.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -static cl::opt<bool> EnableRedZone("arm64-redzone", - cl::desc("enable use of redzone on ARM64"), - cl::init(false), cl::Hidden); - -STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); - -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); - if (FixedOff > Offset) - Offset = FixedOff; - } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset + Align - 1) / Align * Align; - } - // This does not include the 16 bytes used for fp and lr. - return (unsigned)Offset; -} - -bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const { - if (!EnableRedZone) - return false; - // Don't use the red zone if the function explicitly asks us not to. - // This is typically used for kernel code. - if (MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::NoRedZone)) - return false; - - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>(); - unsigned NumBytes = AFI->getLocalStackSize(); - - // Note: currently hasFP() is always true for hasCalls(), but that's an - // implementation detail of the current code, not a strict requirement, - // so stay safe here and check both. - if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128) - return false; - return true; -} - -/// hasFP - Return true if the specified function should have a dedicated frame -/// pointer register. -bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - -#ifndef NDEBUG - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - assert(!RegInfo->needsStackRealignment(MF) && - "No stack realignment on ARM64!"); -#endif - - return (MFI->hasCalls() || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken()); -} - -/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is -/// not required, we reserve argument space for call sites in the function -/// immediately on entry to the current function. This eliminates the need for -/// add/sub sp brackets around call sites. Returns true if the call frame is -/// included as part of the stack frame. -bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - return !MF.getFrameInfo()->hasVarSizedObjects(); -} - -void ARM64FrameLowering::eliminateCallFramePseudoInstr( - MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const ARM64InstrInfo *TII = - static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo()); - if (!TFI->hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc DL = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount + Align - 1) / Align * Align; - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - if (Opc == ARM64::ADJCALLSTACKDOWN) { - emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -Amount, TII); - } else { - assert(Opc == ARM64::ADJCALLSTACKUP && "expected ADJCALLSTACKUP"); - emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII); - } - } - } - MBB.erase(I); -} - -void -ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned FramePtr) const { - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const ARM64InstrInfo *TII = TM.getInstrInfo(); - DebugLoc DL = MBB.findDebugLoc(MBBI); - - // Add callee saved registers to move list. - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - if (CSI.empty()) - return; - - const DataLayout *TD = MF.getTarget().getDataLayout(); - bool HasFP = hasFP(MF); - - // Calculate amount of bytes used for return address storing. - int stackGrowth = -TD->getPointerSize(0); - - // Calculate offsets. - int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth; - unsigned TotalSkipped = 0; - for (const auto &Info : CSI) { - unsigned Reg = Info.getReg(); - int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) - - getOffsetOfLocalArea() + saveAreaOffset; - - // Don't output a new CFI directive if we're re-saving the frame pointer or - // link register. This happens when the PrologEpilogInserter has inserted an - // extra "STP" of the frame pointer and link register -- the "emitPrologue" - // method automatically generates the directives when frame pointers are - // used. If we generate CFI directives for the extra "STP"s, the linker will - // lose track of the correct values for the frame pointer and link register. - if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) { - TotalSkipped += stackGrowth; - continue; - } - - unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - nullptr, DwarfReg, Offset - TotalSkipped)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } -} - -void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. - MachineBasicBlock::iterator MBBI = MBB.begin(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const Function *Fn = MF.getFunction(); - const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo(); - const ARM64InstrInfo *TII = TM.getInstrInfo(); - MachineModuleInfo &MMI = MF.getMMI(); - ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>(); - bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); - bool HasFP = hasFP(MF); - DebugLoc DL = MBB.findDebugLoc(MBBI); - - int NumBytes = (int)MFI->getStackSize(); - if (!AFI->hasStackFrame()) { - assert(!HasFP && "unexpected function without stack frame but with FP"); - - // All of the stack allocation is for locals. - AFI->setLocalStackSize(NumBytes); - - // Label used to tie together the PROLOG_LABEL and the MachineMoves. - MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - - // REDZONE: If the stack size is less than 128 bytes, we don't need - // to actually allocate. - if (NumBytes && !canUseRedZone(MF)) { - emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); - - // Encode the stack size of the leaf function. - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } else if (NumBytes) { - ++NumRedZoneFunctions; - } - - return; - } - - // Only set up FP if we actually need to. - int FPOffset = 0; - if (HasFP) { - // First instruction must a) allocate the stack and b) have an immediate - // that is a multiple of -2. - assert((MBBI->getOpcode() == ARM64::STPXpre || - MBBI->getOpcode() == ARM64::STPDpre) && - MBBI->getOperand(2).getReg() == ARM64::SP && - MBBI->getOperand(3).getImm() < 0 && - (MBBI->getOperand(3).getImm() & 1) == 0); - - // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space - // required for the callee saved register area we get the frame pointer - // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. - FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8; - assert(FPOffset >= 0 && "Bad Framepointer Offset"); - } - - // Move past the saves of the callee-saved registers. - while (MBBI->getOpcode() == ARM64::STPXi || - MBBI->getOpcode() == ARM64::STPDi || - MBBI->getOpcode() == ARM64::STPXpre || - MBBI->getOpcode() == ARM64::STPDpre) { - ++MBBI; - NumBytes -= 16; - } - assert(NumBytes >= 0 && "Negative stack allocation size!?"); - if (HasFP) { - // Issue sub fp, sp, FPOffset or - // mov fp,sp when FPOffset is zero. - // Note: All stores of callee-saved registers are marked as "FrameSetup". - // This code marks the instruction(s) that set the FP also. - emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII, - MachineInstr::FrameSetup); - } - - // All of the remaining stack allocations are for locals. - AFI->setLocalStackSize(NumBytes); - - // Allocate space for the rest of the frame. - if (NumBytes) { - // If we're a leaf function, try using the red zone. - if (!canUseRedZone(MF)) - emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); - } - - // If we need a base pointer, set it up here. It's whatever the value of the - // stack pointer is at this point. Any variable size objects will be allocated - // after this, so we can still use the base pointer to reference locals. - // - // FIXME: Clarify FrameSetup flags here. - // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is - // needed. - // - if (RegInfo->hasBasePointer(MF)) - TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false); - - if (needsFrameMoves) { - const DataLayout *TD = MF.getTarget().getDataLayout(); - const int StackGrowth = -TD->getPointerSize(0); - unsigned FramePtr = RegInfo->getFrameRegister(MF); - - // An example of the prologue: - // - // .globl __foo - // .align 2 - // __foo: - // Ltmp0: - // .cfi_startproc - // .cfi_personality 155, ___gxx_personality_v0 - // Leh_func_begin: - // .cfi_lsda 16, Lexception33 - // - // stp xa,bx, [sp, -#offset]! - // ... - // stp x28, x27, [sp, #offset-32] - // stp fp, lr, [sp, #offset-16] - // add fp, sp, #offset - 16 - // sub sp, sp, #1360 - // - // The Stack: - // +-------------------------------------------+ - // 10000 | ........ | ........ | ........ | ........ | - // 10004 | ........ | ........ | ........ | ........ | - // +-------------------------------------------+ - // 10008 | ........ | ........ | ........ | ........ | - // 1000c | ........ | ........ | ........ | ........ | - // +===========================================+ - // 10010 | X28 Register | - // 10014 | X28 Register | - // +-------------------------------------------+ - // 10018 | X27 Register | - // 1001c | X27 Register | - // +===========================================+ - // 10020 | Frame Pointer | - // 10024 | Frame Pointer | - // +-------------------------------------------+ - // 10028 | Link Register | - // 1002c | Link Register | - // +===========================================+ - // 10030 | ........ | ........ | ........ | ........ | - // 10034 | ........ | ........ | ........ | ........ | - // +-------------------------------------------+ - // 10038 | ........ | ........ | ........ | ........ | - // 1003c | ........ | ........ | ........ | ........ | - // +-------------------------------------------+ - // - // [sp] = 10030 :: >>initial value<< - // sp = 10020 :: stp fp, lr, [sp, #-16]! - // fp = sp == 10020 :: mov fp, sp - // [sp] == 10020 :: stp x28, x27, [sp, #-16]! - // sp == 10010 :: >>final value<< - // - // The frame pointer (w29) points to address 10020. If we use an offset of - // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 - // for w27, and -32 for w28: - // - // Ltmp1: - // .cfi_def_cfa w29, 16 - // Ltmp2: - // .cfi_offset w30, -8 - // Ltmp3: - // .cfi_offset w29, -16 - // Ltmp4: - // .cfi_offset w27, -24 - // Ltmp5: - // .cfi_offset w28, -32 - - if (HasFP) { - // Define the current CFA rule to use the provided FP. - unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // Record the location of the stored LR - unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true); - CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // Record the location of the stored FP - CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } else { - // Encode the stack size of the leaf function. - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - - // Now emit the moves for whatever callee saved regs we have. - emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); - } -} - -static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { - for (unsigned i = 0; CSRegs[i]; ++i) - if (Reg == CSRegs[i]) - return true; - return false; -} - -static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) { - if (MI->getOpcode() == ARM64::LDPXpost || - MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi || - MI->getOpcode() == ARM64::LDPDi) { - if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) || - !isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) || - MI->getOperand(2).getReg() != ARM64::SP) - return false; - return true; - } - - return false; -} - -void ARM64FrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARM64InstrInfo *TII = - static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo()); - const ARM64RegisterInfo *RegInfo = - static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo()); - DebugLoc DL = MBBI->getDebugLoc(); - - int NumBytes = MFI->getStackSize(); - unsigned NumRestores = 0; - // Move past the restores of the callee-saved registers. - MachineBasicBlock::iterator LastPopI = MBBI; - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); - if (LastPopI != MBB.begin()) { - do { - ++NumRestores; - --LastPopI; - } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs)); - if (!isCSRestore(LastPopI, CSRegs)) { - ++LastPopI; - --NumRestores; - } - } - NumBytes -= NumRestores * 16; - assert(NumBytes >= 0 && "Negative stack allocation size!?"); - - if (!hasFP(MF)) { - // If this was a redzone leaf function, we don't need to restore the - // stack pointer. - if (!canUseRedZone(MF)) - emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII); - return; - } - - // Restore the original stack pointer. - // FIXME: Rather than doing the math here, we should instead just use - // non-post-indexed loads for the restores if we aren't actually going to - // be able to save any instructions. - if (NumBytes || MFI->hasVarSizedObjects()) - emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP, - -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags); -} - -/// getFrameIndexOffset - Returns the displacement from the frame register to -/// the stack frame of the specified index. -int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - unsigned FrameReg; - return getFrameIndexReference(MF, FI, FrameReg); -} - -/// getFrameIndexReference - Provide a base+offset reference to an FI slot for -/// debug info. It's the same as what we use for resolving the code-gen -/// references for now. FIXME: This can go wrong when references are -/// SP-relative and simple call frames aren't used. -int ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF, - int FI, - unsigned &FrameReg) const { - return resolveFrameIndexReference(MF, FI, FrameReg); -} - -int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, - int FI, unsigned &FrameReg, - bool PreferFP) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARM64RegisterInfo *RegInfo = - static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo()); - const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>(); - int FPOffset = MFI->getObjectOffset(FI) + 16; - int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); - bool isFixed = MFI->isFixedObjectIndex(FI); - - // Use frame pointer to reference fixed objects. Use it for locals if - // there are VLAs (and thus the SP isn't reliable as a base). - // Make sure useFPForScavengingIndex() does the right thing for the emergency - // spill slot. - bool UseFP = false; - if (AFI->hasStackFrame()) { - // Note: Keeping the following as multiple 'if' statements rather than - // merging to a single expression for readability. - // - // Argument access should always use the FP. - if (isFixed) { - UseFP = hasFP(MF); - } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) { - // Use SP or FP, whichever gives us the best chance of the offset - // being in range for direct access. If the FPOffset is positive, - // that'll always be best, as the SP will be even further away. - // If the FPOffset is negative, we have to keep in mind that the - // available offset range for negative offsets is smaller than for - // positive ones. If we have variable sized objects, we're stuck with - // using the FP regardless, though, as the SP offset is unknown - // and we don't have a base pointer available. If an offset is - // available via the FP and the SP, use whichever is closest. - if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 || - (FPOffset >= -256 && Offset > -FPOffset)) - UseFP = true; - } - } - - if (UseFP) { - FrameReg = RegInfo->getFrameRegister(MF); - return FPOffset; - } - - // Use the base pointer if we have one. - if (RegInfo->hasBasePointer(MF)) - FrameReg = RegInfo->getBaseRegister(); - else { - FrameReg = ARM64::SP; - // If we're using the red zone for this function, the SP won't actually - // be adjusted, so the offsets will be negative. They're also all - // within range of the signed 9-bit immediate instructions. - if (canUseRedZone(MF)) - Offset -= AFI->getLocalStackSize(); - } - - return Offset; -} - -static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { - if (Reg != ARM64::LR) - return getKillRegState(true); - - // LR maybe referred to later by an @llvm.returnaddress intrinsic. - bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR); - bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken()); - return getKillRegState(LRKill); -} - -bool ARM64FrameLowering::spillCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - unsigned Count = CSI.size(); - DebugLoc DL; - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - - if (MI != MBB.end()) - DL = MI->getDebugLoc(); - - for (unsigned i = 0; i < Count; i += 2) { - unsigned idx = Count - i - 2; - unsigned Reg1 = CSI[idx].getReg(); - unsigned Reg2 = CSI[idx + 1].getReg(); - // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI - // list to come in sorted by frame index so that we can issue the store - // pair instructions directly. Assert if we see anything otherwise. - // - // The order of the registers in the list is controlled by - // getCalleeSavedRegs(), so they will always be in-order, as well. - assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() && - "Out of order callee saved regs!"); - unsigned StrOpc; - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); - // Issue sequence of non-sp increment and pi sp spills for cs regs. The - // first spill is a pre-increment that allocates the stack. - // For example: - // stp x22, x21, [sp, #-48]! // addImm(-6) - // stp x20, x19, [sp, #16] // addImm(+2) - // stp fp, lr, [sp, #32] // addImm(+4) - // Rationale: This sequence saves uop updates compared to a sequence of - // pre-increment spills like stp xi,xj,[sp,#-16]! - // Note: Similar rational and sequence for restores in epilog. - if (ARM64::GPR64RegClass.contains(Reg1)) { - assert(ARM64::GPR64RegClass.contains(Reg2) && - "Expected GPR64 callee-saved register pair!"); - // For first spill use pre-increment store. - if (i == 0) - StrOpc = ARM64::STPXpre; - else - StrOpc = ARM64::STPXi; - } else if (ARM64::FPR64RegClass.contains(Reg1)) { - assert(ARM64::FPR64RegClass.contains(Reg2) && - "Expected FPR64 callee-saved register pair!"); - // For first spill use pre-increment store. - if (i == 0) - StrOpc = ARM64::STPDpre; - else - StrOpc = ARM64::STPDi; - } else - llvm_unreachable("Unexpected callee saved register!"); - DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " - << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx() - << ", " << CSI[idx + 1].getFrameIdx() << ")\n"); - // Compute offset: i = 0 => offset = -Count; - // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc. - const int Offset = (i == 0) ? -Count : i; - assert((Offset >= -64 && Offset <= 63) && - "Offset out of bounds for STP immediate"); - BuildMI(MBB, MI, DL, TII.get(StrOpc)) - .addReg(Reg2, getPrologueDeath(MF, Reg2)) - .addReg(Reg1, getPrologueDeath(MF, Reg1)) - .addReg(ARM64::SP) - .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit - .setMIFlag(MachineInstr::FrameSetup); - } - return true; -} - -bool ARM64FrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - unsigned Count = CSI.size(); - DebugLoc DL; - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - - if (MI != MBB.end()) - DL = MI->getDebugLoc(); - - for (unsigned i = 0; i < Count; i += 2) { - unsigned Reg1 = CSI[i].getReg(); - unsigned Reg2 = CSI[i + 1].getReg(); - // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI - // list to come in sorted by frame index so that we can issue the store - // pair instructions directly. Assert if we see anything otherwise. - assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() && - "Out of order callee saved regs!"); - // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only - // the last load is sp-pi post-increment and de-allocates the stack: - // For example: - // ldp fp, lr, [sp, #32] // addImm(+4) - // ldp x20, x19, [sp, #16] // addImm(+2) - // ldp x22, x21, [sp], #48 // addImm(+6) - // Note: see comment in spillCalleeSavedRegisters() - unsigned LdrOpc; - - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); - if (ARM64::GPR64RegClass.contains(Reg1)) { - assert(ARM64::GPR64RegClass.contains(Reg2) && - "Expected GPR64 callee-saved register pair!"); - if (i == Count - 2) - LdrOpc = ARM64::LDPXpost; - else - LdrOpc = ARM64::LDPXi; - } else if (ARM64::FPR64RegClass.contains(Reg1)) { - assert(ARM64::FPR64RegClass.contains(Reg2) && - "Expected FPR64 callee-saved register pair!"); - if (i == Count - 2) - LdrOpc = ARM64::LDPDpost; - else - LdrOpc = ARM64::LDPDi; - } else - llvm_unreachable("Unexpected callee saved register!"); - DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " - << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx() - << ", " << CSI[i + 1].getFrameIdx() << ")\n"); - - // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4; - // etc. - const int Offset = (i == Count - 2) ? Count : Count - i - 2; - assert((Offset >= -64 && Offset <= 63) && - "Offset out of bounds for LDP immediate"); - BuildMI(MBB, MI, DL, TII.get(LdrOpc)) - .addReg(Reg2, getDefRegState(true)) - .addReg(Reg1, getDefRegState(true)) - .addReg(ARM64::SP) - .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8] - // where the factor * 8 is implicit - } - return true; -} - -void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan( - MachineFunction &MF, RegScavenger *RS) const { - const ARM64RegisterInfo *RegInfo = - static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo()); - ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>(); - MachineRegisterInfo *MRI = &MF.getRegInfo(); - SmallVector<unsigned, 4> UnspilledCSGPRs; - SmallVector<unsigned, 4> UnspilledCSFPRs; - - // The frame record needs to be created by saving the appropriate registers - if (hasFP(MF)) { - MRI->setPhysRegUsed(ARM64::FP); - MRI->setPhysRegUsed(ARM64::LR); - } - - // Spill the BasePtr if it's used. Do this first thing so that the - // getCalleeSavedRegs() below will get the right answer. - if (RegInfo->hasBasePointer(MF)) - MRI->setPhysRegUsed(RegInfo->getBaseRegister()); - - // If any callee-saved registers are used, the frame cannot be eliminated. - unsigned NumGPRSpilled = 0; - unsigned NumFPRSpilled = 0; - bool ExtraCSSpill = false; - bool CanEliminateFrame = true; - DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:"); - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); - - // Check pairs of consecutive callee-saved registers. - for (unsigned i = 0; CSRegs[i]; i += 2) { - assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); - - const unsigned OddReg = CSRegs[i]; - const unsigned EvenReg = CSRegs[i + 1]; - assert((ARM64::GPR64RegClass.contains(OddReg) && - ARM64::GPR64RegClass.contains(EvenReg)) ^ - (ARM64::FPR64RegClass.contains(OddReg) && - ARM64::FPR64RegClass.contains(EvenReg)) && - "Register class mismatch!"); - - const bool OddRegUsed = MRI->isPhysRegUsed(OddReg); - const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg); - - // Early exit if none of the registers in the register pair is actually - // used. - if (!OddRegUsed && !EvenRegUsed) { - if (ARM64::GPR64RegClass.contains(OddReg)) { - UnspilledCSGPRs.push_back(OddReg); - UnspilledCSGPRs.push_back(EvenReg); - } else { - UnspilledCSFPRs.push_back(OddReg); - UnspilledCSFPRs.push_back(EvenReg); - } - continue; - } - - unsigned Reg = ARM64::NoRegister; - // If only one of the registers of the register pair is used, make sure to - // mark the other one as used as well. - if (OddRegUsed ^ EvenRegUsed) { - // Find out which register is the additional spill. - Reg = OddRegUsed ? EvenReg : OddReg; - MRI->setPhysRegUsed(Reg); - } - - DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); - DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); - - assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) || - (RegInfo->getEncodingValue(OddReg) + 1 == - RegInfo->getEncodingValue(EvenReg))) && - "Register pair of non-adjacent registers!"); - if (ARM64::GPR64RegClass.contains(OddReg)) { - NumGPRSpilled += 2; - // If it's not a reserved register, we can use it in lieu of an - // emergency spill slot for the register scavenger. - // FIXME: It would be better to instead keep looking and choose another - // unspilled register that isn't reserved, if there is one. - if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) - ExtraCSSpill = true; - } else - NumFPRSpilled += 2; - - CanEliminateFrame = false; - } - - // FIXME: Set BigStack if any stack slot references may be out of range. - // For now, just conservatively guestimate based on unscaled indexing - // range. We'll end up allocating an unnecessary spill slot a lot, but - // realistically that's not a big deal at this stage of the game. - // The CSR spill slots have not been allocated yet, so estimateStackSize - // won't include them. - MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); - DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); - bool BigStack = (CFSize >= 256); - if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) - AFI->setHasStackFrame(true); - - // Estimate if we might need to scavenge a register at some point in order - // to materialize a stack offset. If so, either spill one additional - // callee-saved register or reserve a special spill slot to facilitate - // register scavenging. If we already spilled an extra callee-saved register - // above to keep the number of spills even, we don't need to do anything else - // here. - if (BigStack && !ExtraCSSpill) { - - // If we're adding a register to spill here, we have to add two of them - // to keep the number of regs to spill even. - assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); - unsigned Count = 0; - while (!UnspilledCSGPRs.empty() && Count < 2) { - unsigned Reg = UnspilledCSGPRs.back(); - UnspilledCSGPRs.pop_back(); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) - << " to get a scratch register.\n"); - MRI->setPhysRegUsed(Reg); - ExtraCSSpill = true; - ++Count; - } - - // If we didn't find an extra callee-saved register to spill, create - // an emergency spill slot. - if (!ExtraCSSpill) { - const TargetRegisterClass *RC = &ARM64::GPR64RegClass; - int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); - RS->addScavengingFrameIndex(FI); - DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI - << " as the emergency spill slot.\n"); - } - } -} diff --git a/lib/Target/ARM64/ARM64FrameLowering.h b/lib/Target/ARM64/ARM64FrameLowering.h deleted file mode 100644 index 02edcdb..0000000 --- a/lib/Target/ARM64/ARM64FrameLowering.h +++ /dev/null @@ -1,75 +0,0 @@ -//===-- ARM64FrameLowering.h - TargetFrameLowering for ARM64 ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64_FRAMELOWERING_H -#define ARM64_FRAMELOWERING_H - -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { - -class ARM64Subtarget; -class ARM64TargetMachine; - -class ARM64FrameLowering : public TargetFrameLowering { - const ARM64TargetMachine &TM; - -public: - explicit ARM64FrameLowering(const ARM64TargetMachine &TM, - const ARM64Subtarget &STI) - : TargetFrameLowering(StackGrowsDown, 16, 0, 16, - false /*StackRealignable*/), - TM(TM) {} - - void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned FramePtr) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - int getFrameIndexOffset(const MachineFunction &MF, int FI) const; - int getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const; - int resolveFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg, - bool PreferFP = false) const; - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - - bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - - /// \brief Can this function use the red zone for local allocations. - bool canUseRedZone(const MachineFunction &MF) const; - - bool hasFP(const MachineFunction &MF) const; - bool hasReservedCallFrame(const MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp deleted file mode 100644 index 2e234c9..0000000 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ /dev/null @@ -1,2381 +0,0 @@ -//===-- ARM64ISelDAGToDAG.cpp - A dag to dag inst selector for ARM64 ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the ARM64 target. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-isel" -#include "ARM64TargetMachine.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/Function.h" // To access function attributes. -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -//===--------------------------------------------------------------------===// -/// ARM64DAGToDAGISel - ARM64 specific code to select ARM64 machine -/// instructions for SelectionDAG operations. -/// -namespace { - -class ARM64DAGToDAGISel : public SelectionDAGISel { - ARM64TargetMachine &TM; - - /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can - /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; - - bool ForCodeSize; - -public: - explicit ARM64DAGToDAGISel(ARM64TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm), - Subtarget(&TM.getSubtarget<ARM64Subtarget>()), ForCodeSize(false) {} - - virtual const char *getPassName() const { - return "ARM64 Instruction Selection"; - } - - virtual bool runOnMachineFunction(MachineFunction &MF) { - AttributeSet FnAttrs = MF.getFunction()->getAttributes(); - ForCodeSize = - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize) || - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); - return SelectionDAGISel::runOnMachineFunction(MF); - } - - SDNode *Select(SDNode *Node); - - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for - /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); - - SDNode *SelectMLAV64LaneV128(SDNode *N); - SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N); - bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); - bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); - bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); - bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { - return SelectShiftedRegister(N, false, Reg, Shift); - } - bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { - return SelectShiftedRegister(N, true, Reg, Shift); - } - bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeIndexed(N, 1, Base, OffImm); - } - bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeIndexed(N, 2, Base, OffImm); - } - bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeIndexed(N, 4, Base, OffImm); - } - bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeIndexed(N, 8, Base, OffImm); - } - bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeIndexed(N, 16, Base, OffImm); - } - bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeUnscaled(N, 1, Base, OffImm); - } - bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeUnscaled(N, 2, Base, OffImm); - } - bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeUnscaled(N, 4, Base, OffImm); - } - bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeUnscaled(N, 8, Base, OffImm); - } - bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeUnscaled(N, 16, Base, OffImm); - } - - bool SelectAddrModeRO8(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 1, Base, Offset, Imm); - } - bool SelectAddrModeRO16(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 2, Base, Offset, Imm); - } - bool SelectAddrModeRO32(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 4, Base, Offset, Imm); - } - bool SelectAddrModeRO64(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 8, Base, Offset, Imm); - } - bool SelectAddrModeRO128(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 16, Base, Offset, Imm); - } - bool SelectAddrModeNoIndex(SDValue N, SDValue &Val); - - /// Form sequences of consecutive 64/128-bit registers for use in NEON - /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have - /// between 1 and 4 elements. If it contains a single element that is returned - /// unchanged; otherwise a REG_SEQUENCE value is returned. - SDValue createDTuple(ArrayRef<SDValue> Vecs); - SDValue createQTuple(ArrayRef<SDValue> Vecs); - - /// Generic helper for the createDTuple/createQTuple - /// functions. Those should almost always be called instead. - SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[], - unsigned SubRegs[]); - - SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); - - SDNode *SelectIndexedLoad(SDNode *N, bool &Done); - - SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, - unsigned SubRegIdx); - SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); - - SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); - SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); - - SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node); - SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node); - - SDNode *SelectAtomic(SDNode *Node, unsigned Op8, unsigned Op16, unsigned Op32, - unsigned Op64); - - SDNode *SelectBitfieldExtractOp(SDNode *N); - SDNode *SelectBitfieldInsertOp(SDNode *N); - - SDNode *SelectLIBM(SDNode *N); - -// Include the pieces autogenerated from the target description. -#include "ARM64GenDAGISel.inc" - -private: - bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, - SDValue &Shift); - bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, - SDValue &OffImm); - bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, - SDValue &OffImm); - bool SelectAddrModeRO(SDValue N, unsigned Size, SDValue &Base, - SDValue &Offset, SDValue &Imm); - bool isWorthFolding(SDValue V) const; - bool SelectExtendedSHL(SDValue N, unsigned Size, SDValue &Offset, - SDValue &Imm); -}; -} // end anonymous namespace - -/// isIntImmediate - This method tests to see if the node is a constant -/// operand. If so Imm will receive the 32-bit value. -static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { - if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { - Imm = C->getZExtValue(); - return true; - } - return false; -} - -// isIntImmediate - This method tests to see if a constant operand. -// If so Imm will receive the value. -static bool isIntImmediate(SDValue N, uint64_t &Imm) { - return isIntImmediate(N.getNode(), Imm); -} - -// isOpcWithIntImmediate - This method tests to see if the node is a specific -// opcode and that it has a immediate integer right operand. -// If so Imm will receive the 32 bit value. -static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, - uint64_t &Imm) { - return N->getOpcode() == Opc && - isIntImmediate(N->getOperand(1).getNode(), Imm); -} - -bool ARM64DAGToDAGISel::SelectAddrModeNoIndex(SDValue N, SDValue &Val) { - EVT ValTy = N.getValueType(); - if (ValTy != MVT::i64) - return false; - Val = N; - return true; -} - -bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand( - const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { - assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); - // Require the address to be in a register. That is safe for all ARM64 - // variants and it is hard to do anything much smarter without knowing - // how the operand is used. - OutOps.push_back(Op); - return false; -} - -/// SelectArithImmed - Select an immediate value that can be represented as -/// a 12-bit value shifted left by either 0 or 12. If so, return true with -/// Val set to the 12-bit value and Shift set to the shifter operand. -bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, - SDValue &Shift) { - // This function is called from the addsub_shifted_imm ComplexPattern, - // which lists [imm] as the list of opcode it's interested in, however - // we still need to check whether the operand is actually an immediate - // here because the ComplexPattern opcode list is only used in - // root-level opcode matching. - if (!isa<ConstantSDNode>(N.getNode())) - return false; - - uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); - unsigned ShiftAmt; - - if (Immed >> 12 == 0) { - ShiftAmt = 0; - } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { - ShiftAmt = 12; - Immed = Immed >> 12; - } else - return false; - - unsigned ShVal = ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt); - Val = CurDAG->getTargetConstant(Immed, MVT::i32); - Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); - return true; -} - -/// SelectNegArithImmed - As above, but negates the value before trying to -/// select it. -bool ARM64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, - SDValue &Shift) { - // This function is called from the addsub_shifted_imm ComplexPattern, - // which lists [imm] as the list of opcode it's interested in, however - // we still need to check whether the operand is actually an immediate - // here because the ComplexPattern opcode list is only used in - // root-level opcode matching. - if (!isa<ConstantSDNode>(N.getNode())) - return false; - - // The immediate operand must be a 24-bit zero-extended immediate. - uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); - - // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" - // have the opposite effect on the C flag, so this pattern mustn't match under - // those circumstances. - if (Immed == 0) - return false; - - if (N.getValueType() == MVT::i32) - Immed = ~((uint32_t)Immed) + 1; - else - Immed = ~Immed + 1ULL; - if (Immed & 0xFFFFFFFFFF000000ULL) - return false; - - Immed &= 0xFFFFFFULL; - return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift); -} - -/// getShiftTypeForNode - Translate a shift node to the corresponding -/// ShiftType value. -static ARM64_AM::ShiftType getShiftTypeForNode(SDValue N) { - switch (N.getOpcode()) { - default: - return ARM64_AM::InvalidShift; - case ISD::SHL: - return ARM64_AM::LSL; - case ISD::SRL: - return ARM64_AM::LSR; - case ISD::SRA: - return ARM64_AM::ASR; - case ISD::ROTR: - return ARM64_AM::ROR; - } -} - -/// \brief Determine wether it is worth to fold V into an extended register. -bool ARM64DAGToDAGISel::isWorthFolding(SDValue V) const { - // it hurts if the a value is used at least twice, unless we are optimizing - // for code size. - if (ForCodeSize || V.hasOneUse()) - return true; - return false; -} - -/// SelectShiftedRegister - Select a "shifted register" operand. If the value -/// is not shifted, set the Shift operand to default of "LSL 0". The logical -/// instructions allow the shifted register to be rotated, but the arithmetic -/// instructions do not. The AllowROR parameter specifies whether ROR is -/// supported. -bool ARM64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, - SDValue &Reg, SDValue &Shift) { - ARM64_AM::ShiftType ShType = getShiftTypeForNode(N); - if (ShType == ARM64_AM::InvalidShift) - return false; - if (!AllowROR && ShType == ARM64_AM::ROR) - return false; - - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - unsigned BitSize = N.getValueType().getSizeInBits(); - unsigned Val = RHS->getZExtValue() & (BitSize - 1); - unsigned ShVal = ARM64_AM::getShifterImm(ShType, Val); - - Reg = N.getOperand(0); - Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); - return isWorthFolding(N); - } - - return false; -} - -/// getExtendTypeForNode - Translate an extend node to the corresponding -/// ExtendType value. -static ARM64_AM::ExtendType getExtendTypeForNode(SDValue N, - bool IsLoadStore = false) { - if (N.getOpcode() == ISD::SIGN_EXTEND || - N.getOpcode() == ISD::SIGN_EXTEND_INREG) { - EVT SrcVT; - if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) - SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); - else - SrcVT = N.getOperand(0).getValueType(); - - if (!IsLoadStore && SrcVT == MVT::i8) - return ARM64_AM::SXTB; - else if (!IsLoadStore && SrcVT == MVT::i16) - return ARM64_AM::SXTH; - else if (SrcVT == MVT::i32) - return ARM64_AM::SXTW; - else if (SrcVT == MVT::i64) - return ARM64_AM::SXTX; - - return ARM64_AM::InvalidExtend; - } else if (N.getOpcode() == ISD::ZERO_EXTEND || - N.getOpcode() == ISD::ANY_EXTEND) { - EVT SrcVT = N.getOperand(0).getValueType(); - if (!IsLoadStore && SrcVT == MVT::i8) - return ARM64_AM::UXTB; - else if (!IsLoadStore && SrcVT == MVT::i16) - return ARM64_AM::UXTH; - else if (SrcVT == MVT::i32) - return ARM64_AM::UXTW; - else if (SrcVT == MVT::i64) - return ARM64_AM::UXTX; - - return ARM64_AM::InvalidExtend; - } else if (N.getOpcode() == ISD::AND) { - ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); - if (!CSD) - return ARM64_AM::InvalidExtend; - uint64_t AndMask = CSD->getZExtValue(); - - switch (AndMask) { - default: - return ARM64_AM::InvalidExtend; - case 0xFF: - return !IsLoadStore ? ARM64_AM::UXTB : ARM64_AM::InvalidExtend; - case 0xFFFF: - return !IsLoadStore ? ARM64_AM::UXTH : ARM64_AM::InvalidExtend; - case 0xFFFFFFFF: - return ARM64_AM::UXTW; - } - } - - return ARM64_AM::InvalidExtend; -} - -// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. -static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { - if (DL->getOpcode() != ARM64ISD::DUPLANE16 && - DL->getOpcode() != ARM64ISD::DUPLANE32) - return false; - - SDValue SV = DL->getOperand(0); - if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) - return false; - - SDValue EV = SV.getOperand(1); - if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return false; - - ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); - ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); - LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); - LaneOp = EV.getOperand(0); - - return true; -} - -// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a -// high lane extract. -static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, - SDValue &LaneOp, int &LaneIdx) { - - if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { - std::swap(Op0, Op1); - if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) - return false; - } - StdOp = Op1; - return true; -} - -/// SelectMLAV64LaneV128 - ARM64 supports 64-bit vector MLAs (v4i16 and v2i32) -/// where one multiplicand is a lane in the upper half of a 128-bit vector. -/// Recognize and select this so that we don't emit unnecessary lane extracts. -SDNode *ARM64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. - SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. - int LaneIdx = -1; // Will hold the lane index. - - if (Op1.getOpcode() != ISD::MUL || - !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, - LaneIdx)) { - std::swap(Op0, Op1); - if (Op1.getOpcode() != ISD::MUL || - !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, - LaneIdx)) - return 0; - } - - SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); - - SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; - - unsigned MLAOpc = ~0U; - - switch (N->getSimpleValueType(0).SimpleTy) { - default: - llvm_unreachable("Unrecognized MLA."); - case MVT::v4i16: - MLAOpc = ARM64::MLAv4i16_indexed; - break; - case MVT::v2i32: - MLAOpc = ARM64::MLAv2i32_indexed; - break; - } - - return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops); -} - -SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { - SDValue SMULLOp0; - SDValue SMULLOp1; - int LaneIdx; - - if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, - LaneIdx)) - return 0; - - SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); - - SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; - - unsigned SMULLOpc = ~0U; - - if (IntNo == Intrinsic::arm64_neon_smull) { - switch (N->getSimpleValueType(0).SimpleTy) { - default: - llvm_unreachable("Unrecognized SMULL."); - case MVT::v4i32: - SMULLOpc = ARM64::SMULLv4i16_indexed; - break; - case MVT::v2i64: - SMULLOpc = ARM64::SMULLv2i32_indexed; - break; - } - } else if (IntNo == Intrinsic::arm64_neon_umull) { - switch (N->getSimpleValueType(0).SimpleTy) { - default: - llvm_unreachable("Unrecognized SMULL."); - case MVT::v4i32: - SMULLOpc = ARM64::UMULLv4i16_indexed; - break; - case MVT::v2i64: - SMULLOpc = ARM64::UMULLv2i32_indexed; - break; - } - } else - llvm_unreachable("Unrecognized intrinsic."); - - return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops); -} - -/// SelectArithExtendedRegister - Select a "extended register" operand. This -/// operand folds in an extend followed by an optional left shift. -bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, - SDValue &Shift) { - unsigned ShiftVal = 0; - ARM64_AM::ExtendType Ext; - - if (N.getOpcode() == ISD::SHL) { - ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); - if (!CSD) - return false; - ShiftVal = CSD->getZExtValue(); - if ((ShiftVal & 0x3) != ShiftVal) - return false; - - Ext = getExtendTypeForNode(N.getOperand(0)); - if (Ext == ARM64_AM::InvalidExtend) - return false; - - Reg = N.getOperand(0).getOperand(0); - } else { - Ext = getExtendTypeForNode(N); - if (Ext == ARM64_AM::InvalidExtend) - return false; - - Reg = N.getOperand(0); - } - - // ARM64 mandates that the RHS of the operation must use the smallest - // register classs that could contain the size being extended from. Thus, - // if we're folding a (sext i8), we need the RHS to be a GPR32, even though - // there might not be an actual 32-bit value in the program. We can - // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. - if (Reg.getValueType() == MVT::i64 && Ext != ARM64_AM::UXTX && - Ext != ARM64_AM::SXTX) { - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); - MachineSDNode *Node = CurDAG->getMachineNode( - TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, Reg, SubReg); - Reg = SDValue(Node, 0); - } - - Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); - return isWorthFolding(N); -} - -/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit -/// immediate" address. The "Size" argument is the size in bytes of the memory -/// reference, which determines the scale. -bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, - SDValue &Base, SDValue &OffImm) { - const TargetLowering *TLI = getTargetLowering(); - if (N.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); - OffImm = CurDAG->getTargetConstant(0, MVT::i64); - return true; - } - - if (N.getOpcode() == ARM64ISD::ADDlow) { - GlobalAddressSDNode *GAN = - dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); - Base = N.getOperand(0); - OffImm = N.getOperand(1); - if (!GAN) - return true; - - const GlobalValue *GV = GAN->getGlobal(); - unsigned Alignment = GV->getAlignment(); - const DataLayout *DL = TLI->getDataLayout(); - if (Alignment == 0 && !Subtarget->isTargetDarwin()) - Alignment = DL->getABITypeAlignment(GV->getType()->getElementType()); - - if (Alignment >= Size) - return true; - } - - if (CurDAG->isBaseWithConstantOffset(N)) { - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int64_t RHSC = (int64_t)RHS->getZExtValue(); - unsigned Scale = Log2_32(Size); - if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); - } - OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64); - return true; - } - } - } - - // Before falling back to our general case, check if the unscaled - // instructions can handle this. If so, that's preferable. - if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) - return false; - - // Base only. The address will be materialized into a register before - // the memory is accessed. - // add x0, Xbase, #offset - // ldr x0, [x0] - Base = N; - OffImm = CurDAG->getTargetConstant(0, MVT::i64); - return true; -} - -/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit -/// immediate" address. This should only match when there is an offset that -/// is not valid for a scaled immediate addressing mode. The "Size" argument -/// is the size in bytes of the memory reference, which is needed here to know -/// what is valid for a scaled immediate. -bool ARM64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, - SDValue &Base, SDValue &OffImm) { - if (!CurDAG->isBaseWithConstantOffset(N)) - return false; - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int64_t RHSC = RHS->getSExtValue(); - // If the offset is valid as a scaled immediate, don't match here. - if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && - RHSC < (0x1000 << Log2_32(Size))) - return false; - if (RHSC >= -256 && RHSC < 256) { - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - const TargetLowering *TLI = getTargetLowering(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); - } - OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64); - return true; - } - } - return false; -} - -static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); - SDValue ImpDef = SDValue( - CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64), - 0); - MachineSDNode *Node = CurDAG->getMachineNode( - TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg); - return SDValue(Node, 0); -} - -static SDValue WidenIfNeeded(SelectionDAG *CurDAG, SDValue N) { - if (N.getValueType() == MVT::i32) { - return Widen(CurDAG, N); - } - - return N; -} - -/// \brief Check if the given SHL node (\p N), can be used to form an -/// extended register for an addressing mode. -bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, - SDValue &Offset, SDValue &Imm) { - assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); - ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); - if (CSD && (CSD->getZExtValue() & 0x7) == CSD->getZExtValue()) { - - ARM64_AM::ExtendType Ext = getExtendTypeForNode(N.getOperand(0), true); - if (Ext == ARM64_AM::InvalidExtend) { - Ext = ARM64_AM::UXTX; - Offset = WidenIfNeeded(CurDAG, N.getOperand(0)); - } else { - Offset = WidenIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); - } - - unsigned LegalShiftVal = Log2_32(Size); - unsigned ShiftVal = CSD->getZExtValue(); - - if (ShiftVal != 0 && ShiftVal != LegalShiftVal) - return false; - - Imm = CurDAG->getTargetConstant( - ARM64_AM::getMemExtendImm(Ext, ShiftVal != 0), MVT::i32); - if (isWorthFolding(N)) - return true; - } - return false; -} - -bool ARM64DAGToDAGISel::SelectAddrModeRO(SDValue N, unsigned Size, - SDValue &Base, SDValue &Offset, - SDValue &Imm) { - if (N.getOpcode() != ISD::ADD) - return false; - SDValue LHS = N.getOperand(0); - SDValue RHS = N.getOperand(1); - - // We don't want to match immediate adds here, because they are better lowered - // to the register-immediate addressing modes. - if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) - return false; - - // Check if this particular node is reused in any non-memory related - // operation. If yes, do not try to fold this node into the address - // computation, since the computation will be kept. - const SDNode *Node = N.getNode(); - for (SDNode::use_iterator UI = Node->use_begin(), UE = Node->use_end(); - UI != UE; ++UI) { - if (!isa<MemSDNode>(*UI)) - return false; - } - - // Remember if it is worth folding N when it produces extended register. - bool IsExtendedRegisterWorthFolding = isWorthFolding(N); - - // Try to match a shifted extend on the RHS. - if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && - SelectExtendedSHL(RHS, Size, Offset, Imm)) { - Base = LHS; - return true; - } - - // Try to match a shifted extend on the LHS. - if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && - SelectExtendedSHL(LHS, Size, Offset, Imm)) { - Base = RHS; - return true; - } - - ARM64_AM::ExtendType Ext = ARM64_AM::UXTX; - // Try to match an unshifted extend on the LHS. - if (IsExtendedRegisterWorthFolding && - (Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidExtend) { - Base = RHS; - Offset = WidenIfNeeded(CurDAG, LHS.getOperand(0)); - Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), - MVT::i32); - if (isWorthFolding(LHS)) - return true; - } - - // Try to match an unshifted extend on the RHS. - if (IsExtendedRegisterWorthFolding && - (Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidExtend) { - Base = LHS; - Offset = WidenIfNeeded(CurDAG, RHS.getOperand(0)); - Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), - MVT::i32); - if (isWorthFolding(RHS)) - return true; - } - - // Match any non-shifted, non-extend, non-immediate add expression. - Base = LHS; - Offset = WidenIfNeeded(CurDAG, RHS); - Ext = ARM64_AM::UXTX; - Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), - MVT::i32); - // Reg1 + Reg2 is free: no check needed. - return true; -} - -SDValue ARM64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { - static unsigned RegClassIDs[] = { ARM64::DDRegClassID, ARM64::DDDRegClassID, - ARM64::DDDDRegClassID }; - static unsigned SubRegs[] = { ARM64::dsub0, ARM64::dsub1, - ARM64::dsub2, ARM64::dsub3 }; - - return createTuple(Regs, RegClassIDs, SubRegs); -} - -SDValue ARM64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { - static unsigned RegClassIDs[] = { ARM64::QQRegClassID, ARM64::QQQRegClassID, - ARM64::QQQQRegClassID }; - static unsigned SubRegs[] = { ARM64::qsub0, ARM64::qsub1, - ARM64::qsub2, ARM64::qsub3 }; - - return createTuple(Regs, RegClassIDs, SubRegs); -} - -SDValue ARM64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, - unsigned RegClassIDs[], - unsigned SubRegs[]) { - // There's no special register-class for a vector-list of 1 element: it's just - // a vector. - if (Regs.size() == 1) - return Regs[0]; - - assert(Regs.size() >= 2 && Regs.size() <= 4); - - SDLoc DL(Regs[0].getNode()); - - SmallVector<SDValue, 4> Ops; - - // First operand of REG_SEQUENCE is the desired RegClass. - Ops.push_back( - CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); - - // Then we get pairs of source & subregister-position for the components. - for (unsigned i = 0; i < Regs.size(); ++i) { - Ops.push_back(Regs[i]); - Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); - } - - SDNode *N = - CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); - return SDValue(N, 0); -} - -SDNode *ARM64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, - unsigned Opc, bool isExt) { - SDLoc dl(N); - EVT VT = N->getValueType(0); - - unsigned ExtOff = isExt; - - // Form a REG_SEQUENCE to force register allocation. - unsigned Vec0Off = ExtOff + 1; - SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, - N->op_begin() + Vec0Off + NumVecs); - SDValue RegSeq = createQTuple(Regs); - - SmallVector<SDValue, 6> Ops; - if (isExt) - Ops.push_back(N->getOperand(1)); - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); - return CurDAG->getMachineNode(Opc, dl, VT, Ops); -} - -SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { - LoadSDNode *LD = cast<LoadSDNode>(N); - if (LD->isUnindexed()) - return NULL; - EVT VT = LD->getMemoryVT(); - EVT DstVT = N->getValueType(0); - ISD::MemIndexedMode AM = LD->getAddressingMode(); - bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; - - // We're not doing validity checking here. That was done when checking - // if we should mark the load as indexed or not. We're just selecting - // the right instruction. - unsigned Opcode = 0; - - ISD::LoadExtType ExtType = LD->getExtensionType(); - bool InsertTo64 = false; - if (VT == MVT::i64) - Opcode = IsPre ? ARM64::LDRXpre_isel : ARM64::LDRXpost_isel; - else if (VT == MVT::i32) { - if (ExtType == ISD::NON_EXTLOAD) - Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel; - else if (ExtType == ISD::SEXTLOAD) - Opcode = IsPre ? ARM64::LDRSWpre_isel : ARM64::LDRSWpost_isel; - else { - Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel; - InsertTo64 = true; - // The result of the load is only i32. It's the subreg_to_reg that makes - // it into an i64. - DstVT = MVT::i32; - } - } else if (VT == MVT::i16) { - if (ExtType == ISD::SEXTLOAD) { - if (DstVT == MVT::i64) - Opcode = IsPre ? ARM64::LDRSHXpre_isel : ARM64::LDRSHXpost_isel; - else - Opcode = IsPre ? ARM64::LDRSHWpre_isel : ARM64::LDRSHWpost_isel; - } else { - Opcode = IsPre ? ARM64::LDRHHpre_isel : ARM64::LDRHHpost_isel; - InsertTo64 = DstVT == MVT::i64; - // The result of the load is only i32. It's the subreg_to_reg that makes - // it into an i64. - DstVT = MVT::i32; - } - } else if (VT == MVT::i8) { - if (ExtType == ISD::SEXTLOAD) { - if (DstVT == MVT::i64) - Opcode = IsPre ? ARM64::LDRSBXpre_isel : ARM64::LDRSBXpost_isel; - else - Opcode = IsPre ? ARM64::LDRSBWpre_isel : ARM64::LDRSBWpost_isel; - } else { - Opcode = IsPre ? ARM64::LDRBBpre_isel : ARM64::LDRBBpost_isel; - InsertTo64 = DstVT == MVT::i64; - // The result of the load is only i32. It's the subreg_to_reg that makes - // it into an i64. - DstVT = MVT::i32; - } - } else if (VT == MVT::f32) { - Opcode = IsPre ? ARM64::LDRSpre_isel : ARM64::LDRSpost_isel; - } else if (VT == MVT::f64) { - Opcode = IsPre ? ARM64::LDRDpre_isel : ARM64::LDRDpost_isel; - } else - return NULL; - SDValue Chain = LD->getChain(); - SDValue Base = LD->getBasePtr(); - ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); - int OffsetVal = (int)OffsetOp->getZExtValue(); - SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64); - SDValue Ops[] = { Base, Offset, Chain }; - SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), DstVT, MVT::i64, - MVT::Other, Ops); - // Either way, we're replacing the node, so tell the caller that. - Done = true; - if (InsertTo64) { - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); - SDNode *Sub = CurDAG->getMachineNode( - ARM64::SUBREG_TO_REG, SDLoc(N), MVT::i64, - CurDAG->getTargetConstant(0, MVT::i64), SDValue(Res, 0), SubReg); - ReplaceUses(SDValue(N, 0), SDValue(Sub, 0)); - ReplaceUses(SDValue(N, 1), SDValue(Res, 1)); - ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); - return 0; - } - return Res; -} - -SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, - unsigned SubRegIdx) { - SDLoc dl(N); - EVT VT = N->getValueType(0); - SDValue Chain = N->getOperand(0); - - SmallVector<SDValue, 6> Ops; - Ops.push_back(N->getOperand(2)); // Mem operand; - Ops.push_back(Chain); - - std::vector<EVT> ResTys; - ResTys.push_back(MVT::Untyped); - ResTys.push_back(MVT::Other); - - SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - SDValue SuperReg = SDValue(Ld, 0); - - // MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - // MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); - // cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); - - switch (NumVecs) { - case 4: - ReplaceUses(SDValue(N, 3), CurDAG->getTargetExtractSubreg(SubRegIdx + 3, dl, - VT, SuperReg)); - // FALLTHROUGH - case 3: - ReplaceUses(SDValue(N, 2), CurDAG->getTargetExtractSubreg(SubRegIdx + 2, dl, - VT, SuperReg)); - // FALLTHROUGH - case 2: - ReplaceUses(SDValue(N, 1), CurDAG->getTargetExtractSubreg(SubRegIdx + 1, dl, - VT, SuperReg)); - ReplaceUses(SDValue(N, 0), - CurDAG->getTargetExtractSubreg(SubRegIdx, dl, VT, SuperReg)); - break; - case 1: - ReplaceUses(SDValue(N, 0), SuperReg); - break; - } - - ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); - - return 0; -} - -SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, - unsigned Opc) { - SDLoc dl(N); - EVT VT = N->getOperand(2)->getValueType(0); - - // Form a REG_SEQUENCE to force register allocation. - bool Is128Bit = VT.getSizeInBits() == 128; - SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); - SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); - - SmallVector<SDValue, 6> Ops; - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(NumVecs + 2)); - Ops.push_back(N->getOperand(0)); - SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); - - return St; -} - -/// WidenVector - Given a value in the V64 register class, produce the -/// equivalent value in the V128 register class. -class WidenVector { - SelectionDAG &DAG; - -public: - WidenVector(SelectionDAG &DAG) : DAG(DAG) {} - - SDValue operator()(SDValue V64Reg) { - EVT VT = V64Reg.getValueType(); - unsigned NarrowSize = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType().getSimpleVT(); - MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); - SDLoc DL(V64Reg); - - SDValue Undef = - SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); - return DAG.getTargetInsertSubreg(ARM64::dsub, DL, WideTy, Undef, V64Reg); - } -}; - -/// NarrowVector - Given a value in the V128 register class, produce the -/// equivalent value in the V64 register class. -static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { - EVT VT = V128Reg.getValueType(); - unsigned WideSize = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType().getSimpleVT(); - MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); - - return DAG.getTargetExtractSubreg(ARM64::dsub, SDLoc(V128Reg), NarrowTy, - V128Reg); -} - -SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, - unsigned Opc) { - SDLoc dl(N); - EVT VT = N->getValueType(0); - bool Narrow = VT.getSizeInBits() == 64; - - // Form a REG_SEQUENCE to force register allocation. - SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); - - if (Narrow) - std::transform(Regs.begin(), Regs.end(), Regs.begin(), - WidenVector(*CurDAG)); - - SDValue RegSeq = createQTuple(Regs); - - std::vector<EVT> ResTys; - ResTys.push_back(MVT::Untyped); - ResTys.push_back(MVT::Other); - - unsigned LaneNo = - cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); - - SmallVector<SDValue, 6> Ops; - Ops.push_back(RegSeq); - Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); - Ops.push_back(N->getOperand(NumVecs + 3)); - Ops.push_back(N->getOperand(0)); - SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - SDValue SuperReg = SDValue(Ld, 0); - - EVT WideVT = RegSeq.getOperand(1)->getValueType(0); - switch (NumVecs) { - case 4: { - SDValue NV3 = - CurDAG->getTargetExtractSubreg(ARM64::qsub3, dl, WideVT, SuperReg); - if (Narrow) - ReplaceUses(SDValue(N, 3), NarrowVector(NV3, *CurDAG)); - else - ReplaceUses(SDValue(N, 3), NV3); - } - // FALLTHROUGH - case 3: { - SDValue NV2 = - CurDAG->getTargetExtractSubreg(ARM64::qsub2, dl, WideVT, SuperReg); - if (Narrow) - ReplaceUses(SDValue(N, 2), NarrowVector(NV2, *CurDAG)); - else - ReplaceUses(SDValue(N, 2), NV2); - } - // FALLTHROUGH - case 2: { - SDValue NV1 = - CurDAG->getTargetExtractSubreg(ARM64::qsub1, dl, WideVT, SuperReg); - SDValue NV0 = - CurDAG->getTargetExtractSubreg(ARM64::qsub0, dl, WideVT, SuperReg); - if (Narrow) { - ReplaceUses(SDValue(N, 1), NarrowVector(NV1, *CurDAG)); - ReplaceUses(SDValue(N, 0), NarrowVector(NV0, *CurDAG)); - } else { - ReplaceUses(SDValue(N, 1), NV1); - ReplaceUses(SDValue(N, 0), NV0); - } - break; - } - } - - ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); - - return Ld; -} - -SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, - unsigned Opc) { - SDLoc dl(N); - EVT VT = N->getOperand(2)->getValueType(0); - bool Narrow = VT.getSizeInBits() == 64; - - // Form a REG_SEQUENCE to force register allocation. - SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); - - if (Narrow) - std::transform(Regs.begin(), Regs.end(), Regs.begin(), - WidenVector(*CurDAG)); - - SDValue RegSeq = createQTuple(Regs); - - unsigned LaneNo = - cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); - - SmallVector<SDValue, 6> Ops; - Ops.push_back(RegSeq); - Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); - Ops.push_back(N->getOperand(NumVecs + 3)); - Ops.push_back(N->getOperand(0)); - SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); - - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); - cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); - - return St; -} - -SDNode *ARM64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, - unsigned Op16, unsigned Op32, - unsigned Op64) { - // Mostly direct translation to the given operations, except that we preserve - // the AtomicOrdering for use later on. - AtomicSDNode *AN = cast<AtomicSDNode>(Node); - EVT VT = AN->getMemoryVT(); - - unsigned Op; - if (VT == MVT::i8) - Op = Op8; - else if (VT == MVT::i16) - Op = Op16; - else if (VT == MVT::i32) - Op = Op32; - else if (VT == MVT::i64) - Op = Op64; - else - llvm_unreachable("Unexpected atomic operation"); - - SmallVector<SDValue, 4> Ops; - for (unsigned i = 1; i < AN->getNumOperands(); ++i) - Ops.push_back(AN->getOperand(i)); - - Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); - Ops.push_back(AN->getOperand(0)); // Chain moves to the end - - return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other, - &Ops[0], Ops.size()); -} - -static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, - unsigned &Opc, SDValue &Opd0, - unsigned &LSB, unsigned &MSB, - unsigned NumberOfIgnoredLowBits, - bool BiggerPattern) { - assert(N->getOpcode() == ISD::AND && - "N must be a AND operation to call this function"); - - EVT VT = N->getValueType(0); - - // Here we can test the type of VT and return false when the type does not - // match, but since it is done prior to that call in the current context - // we turned that into an assert to avoid redundant code. - assert((VT == MVT::i32 || VT == MVT::i64) && - "Type checking must have been done before calling this function"); - - // FIXME: simplify-demanded-bits in DAGCombine will probably have - // changed the AND node to a 32-bit mask operation. We'll have to - // undo that as part of the transform here if we want to catch all - // the opportunities. - // Currently the NumberOfIgnoredLowBits argument helps to recover - // form these situations when matching bigger pattern (bitfield insert). - - // For unsigned extracts, check for a shift right and mask - uint64_t And_imm = 0; - if (!isOpcWithIntImmediate(N, ISD::AND, And_imm)) - return false; - - const SDNode *Op0 = N->getOperand(0).getNode(); - - // Because of simplify-demanded-bits in DAGCombine, the mask may have been - // simplified. Try to undo that - And_imm |= (1 << NumberOfIgnoredLowBits) - 1; - - // The immediate is a mask of the low bits iff imm & (imm+1) == 0 - if (And_imm & (And_imm + 1)) - return false; - - bool ClampMSB = false; - uint64_t Srl_imm = 0; - // Handle the SRL + ANY_EXTEND case. - if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && - isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) { - // Extend the incoming operand of the SRL to 64-bit. - Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); - // Make sure to clamp the MSB so that we preserve the semantics of the - // original operations. - ClampMSB = true; - } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) { - Opd0 = Op0->getOperand(0); - } else if (BiggerPattern) { - // Let's pretend a 0 shift right has been performed. - // The resulting code will be at least as good as the original one - // plus it may expose more opportunities for bitfield insert pattern. - // FIXME: Currently we limit this to the bigger pattern, because - // some optimizations expect AND and not UBFM - Opd0 = N->getOperand(0); - } else - return false; - - assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) && - "bad amount in shift node!"); - - LSB = Srl_imm; - MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm) - : CountTrailingOnes_64(And_imm)) - - 1; - if (ClampMSB) - // Since we're moving the extend before the right shift operation, we need - // to clamp the MSB to make sure we don't shift in undefined bits instead of - // the zeros which would get shifted in with the original right shift - // operation. - MSB = MSB > 31 ? 31 : MSB; - - Opc = VT == MVT::i32 ? ARM64::UBFMWri : ARM64::UBFMXri; - return true; -} - -static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, - unsigned &LSB, unsigned &MSB) { - // We are looking for the following pattern which basically extracts a single - // bit from the source value and places it in the LSB of the destination - // value, all other bits of the destination value or set to zero: - // - // Value2 = AND Value, MaskImm - // SRL Value2, ShiftImm - // - // with MaskImm >> ShiftImm == 1. - // - // This gets selected into a single UBFM: - // - // UBFM Value, ShiftImm, ShiftImm - // - - if (N->getOpcode() != ISD::SRL) - return false; - - uint64_t And_mask = 0; - if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask)) - return false; - - Opd0 = N->getOperand(0).getOperand(0); - - uint64_t Srl_imm = 0; - if (!isIntImmediate(N->getOperand(1), Srl_imm)) - return false; - - // Check whether we really have a one bit extract here. - if (And_mask >> Srl_imm == 0x1) { - if (N->getValueType(0) == MVT::i32) - Opc = ARM64::UBFMWri; - else - Opc = ARM64::UBFMXri; - - LSB = MSB = Srl_imm; - - return true; - } - - return false; -} - -static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, - unsigned &LSB, unsigned &MSB, - bool BiggerPattern) { - assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && - "N must be a SHR/SRA operation to call this function"); - - EVT VT = N->getValueType(0); - - // Here we can test the type of VT and return false when the type does not - // match, but since it is done prior to that call in the current context - // we turned that into an assert to avoid redundant code. - assert((VT == MVT::i32 || VT == MVT::i64) && - "Type checking must have been done before calling this function"); - - // Check for AND + SRL doing a one bit extract. - if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) - return true; - - // we're looking for a shift of a shift - uint64_t Shl_imm = 0; - if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { - Opd0 = N->getOperand(0).getOperand(0); - } else if (BiggerPattern) { - // Let's pretend a 0 shift left has been performed. - // FIXME: Currently we limit this to the bigger pattern case, - // because some optimizations expect AND and not UBFM - Opd0 = N->getOperand(0); - } else - return false; - - assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!"); - uint64_t Srl_imm = 0; - if (!isIntImmediate(N->getOperand(1), Srl_imm)) - return false; - - assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && - "bad amount in shift node!"); - // Note: The width operand is encoded as width-1. - unsigned Width = VT.getSizeInBits() - Srl_imm - 1; - int sLSB = Srl_imm - Shl_imm; - if (sLSB < 0) - return false; - LSB = sLSB; - MSB = LSB + Width; - // SRA requires a signed extraction - if (VT == MVT::i32) - Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMWri : ARM64::UBFMWri; - else - Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMXri : ARM64::UBFMXri; - return true; -} - -static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, - SDValue &Opd0, unsigned &LSB, unsigned &MSB, - unsigned NumberOfIgnoredLowBits = 0, - bool BiggerPattern = false) { - if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) - return false; - - switch (N->getOpcode()) { - default: - if (!N->isMachineOpcode()) - return false; - break; - case ISD::AND: - return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB, - NumberOfIgnoredLowBits, BiggerPattern); - case ISD::SRL: - case ISD::SRA: - return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern); - } - - unsigned NOpc = N->getMachineOpcode(); - switch (NOpc) { - default: - return false; - case ARM64::SBFMWri: - case ARM64::UBFMWri: - case ARM64::SBFMXri: - case ARM64::UBFMXri: - Opc = NOpc; - Opd0 = N->getOperand(0); - LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); - MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); - return true; - } - // Unreachable - return false; -} - -SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { - unsigned Opc, LSB, MSB; - SDValue Opd0; - if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB)) - return NULL; - - EVT VT = N->getValueType(0); - SDValue Ops[] = { Opd0, CurDAG->getTargetConstant(LSB, VT), - CurDAG->getTargetConstant(MSB, VT) }; - return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 3); -} - -// Is mask a i32 or i64 binary sequence 1..10..0 and -// CountTrailingZeros(mask) == ExpectedTrailingZeros -static bool isHighMask(uint64_t Mask, unsigned ExpectedTrailingZeros, - unsigned NumberOfIgnoredHighBits, EVT VT) { - assert((VT == MVT::i32 || VT == MVT::i64) && - "i32 or i64 mask type expected!"); - - uint64_t ExpectedMask; - if (VT == MVT::i32) { - uint32_t ExpectedMaski32 = ~0 << ExpectedTrailingZeros; - ExpectedMask = ExpectedMaski32; - if (NumberOfIgnoredHighBits) { - uint32_t highMask = ~0 << (32 - NumberOfIgnoredHighBits); - Mask |= highMask; - } - } else { - ExpectedMask = ((uint64_t) ~0) << ExpectedTrailingZeros; - if (NumberOfIgnoredHighBits) - Mask |= ((uint64_t) ~0) << (64 - NumberOfIgnoredHighBits); - } - - return Mask == ExpectedMask; -} - -// Look for bits that will be useful for later uses. -// A bit is consider useless as soon as it is dropped and never used -// before it as been dropped. -// E.g., looking for useful bit of x -// 1. y = x & 0x7 -// 2. z = y >> 2 -// After #1, x useful bits are 0x7, then the useful bits of x, live through -// y. -// After #2, the useful bits of x are 0x4. -// However, if x is used on an unpredicatable instruction, then all its bits -// are useful. -// E.g. -// 1. y = x & 0x7 -// 2. z = y >> 2 -// 3. str x, [@x] -static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); - -static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, - unsigned Depth) { - uint64_t Imm = - cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); - Imm = ARM64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); - UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); - getUsefulBits(Op, UsefulBits, Depth + 1); -} - -static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, - uint64_t Imm, uint64_t MSB, - unsigned Depth) { - // inherit the bitwidth value - APInt OpUsefulBits(UsefulBits); - OpUsefulBits = 1; - - if (MSB >= Imm) { - OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); - --OpUsefulBits; - // The interesting part will be in the lower part of the result - getUsefulBits(Op, OpUsefulBits, Depth + 1); - // The interesting part was starting at Imm in the argument - OpUsefulBits = OpUsefulBits.shl(Imm); - } else { - OpUsefulBits = OpUsefulBits.shl(MSB + 1); - --OpUsefulBits; - // The interesting part will be shifted in the result - OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); - getUsefulBits(Op, OpUsefulBits, Depth + 1); - // The interesting part was at zero in the argument - OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); - } - - UsefulBits &= OpUsefulBits; -} - -static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, - unsigned Depth) { - uint64_t Imm = - cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); - uint64_t MSB = - cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); - - getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); -} - -static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, - unsigned Depth) { - uint64_t ShiftTypeAndValue = - cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); - APInt Mask(UsefulBits); - Mask.clearAllBits(); - Mask.flipAllBits(); - - if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSL) { - // Shift Left - uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue); - Mask = Mask.shl(ShiftAmt); - getUsefulBits(Op, Mask, Depth + 1); - Mask = Mask.lshr(ShiftAmt); - } else if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSR) { - // Shift Right - // We do not handle ARM64_AM::ASR, because the sign will change the - // number of useful bits - uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue); - Mask = Mask.lshr(ShiftAmt); - getUsefulBits(Op, Mask, Depth + 1); - Mask = Mask.shl(ShiftAmt); - } else - return; - - UsefulBits &= Mask; -} - -static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, - unsigned Depth) { - uint64_t Imm = - cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); - uint64_t MSB = - cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); - - if (Op.getOperand(1) == Orig) - return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); - - APInt OpUsefulBits(UsefulBits); - OpUsefulBits = 1; - - if (MSB >= Imm) { - OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); - --OpUsefulBits; - UsefulBits &= ~OpUsefulBits; - getUsefulBits(Op, UsefulBits, Depth + 1); - } else { - OpUsefulBits = OpUsefulBits.shl(MSB + 1); - --OpUsefulBits; - UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm)); - getUsefulBits(Op, UsefulBits, Depth + 1); - } -} - -static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, - SDValue Orig, unsigned Depth) { - - // Users of this node should have already been instruction selected - // FIXME: Can we turn that into an assert? - if (!UserNode->isMachineOpcode()) - return; - - switch (UserNode->getMachineOpcode()) { - default: - return; - case ARM64::ANDSWri: - case ARM64::ANDSXri: - case ARM64::ANDWri: - case ARM64::ANDXri: - // We increment Depth only when we call the getUsefulBits - return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, - Depth); - case ARM64::UBFMWri: - case ARM64::UBFMXri: - return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); - - case ARM64::ORRWrs: - case ARM64::ORRXrs: - if (UserNode->getOperand(1) != Orig) - return; - return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, - Depth); - case ARM64::BFMWri: - case ARM64::BFMXri: - return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); - } -} - -static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { - if (Depth >= 6) - return; - // Initialize UsefulBits - if (!Depth) { - unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits(); - // At the beginning, assume every produced bits is useful - UsefulBits = APInt(Bitwidth, 0); - UsefulBits.flipAllBits(); - } - APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); - - for (SDNode::use_iterator UseIt = Op.getNode()->use_begin(), - UseEnd = Op.getNode()->use_end(); - UseIt != UseEnd; ++UseIt) { - // A use cannot produce useful bits - APInt UsefulBitsForUse = APInt(UsefulBits); - getUsefulBitsForUse(*UseIt, UsefulBitsForUse, Op, Depth); - UsersUsefulBits |= UsefulBitsForUse; - } - // UsefulBits contains the produced bits that are meaningful for the - // current definition, thus a user cannot make a bit meaningful at - // this point - UsefulBits &= UsersUsefulBits; -} - -// Given a OR operation, check if we have the following pattern -// ubfm c, b, imm, imm2 (or something that does the same jobs, see -// isBitfieldExtractOp) -// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and -// countTrailingZeros(mask2) == imm2 - imm + 1 -// f = d | c -// if yes, given reference arguments will be update so that one can replace -// the OR instruction with: -// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2 -static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Opd0, - SDValue &Opd1, unsigned &LSB, - unsigned &MSB, SelectionDAG *CurDAG) { - assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); - - // Set Opc - EVT VT = N->getValueType(0); - if (VT == MVT::i32) - Opc = ARM64::BFMWri; - else if (VT == MVT::i64) - Opc = ARM64::BFMXri; - else - return false; - - // Because of simplify-demanded-bits in DAGCombine, involved masks may not - // have the expected shape. Try to undo that. - APInt UsefulBits; - getUsefulBits(SDValue(N, 0), UsefulBits); - - unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); - unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); - - // OR is commutative, check both possibilities (does llvm provide a - // way to do that directely, e.g., via code matcher?) - SDValue OrOpd1Val = N->getOperand(1); - SDNode *OrOpd0 = N->getOperand(0).getNode(); - SDNode *OrOpd1 = N->getOperand(1).getNode(); - for (int i = 0; i < 2; - ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) { - unsigned BFXOpc; - // Set Opd1, LSB and MSB arguments by looking for - // c = ubfm b, imm, imm2 - if (!isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Opd1, LSB, MSB, - NumberOfIgnoredLowBits, true)) - continue; - - // Check that the returned opcode is compatible with the pattern, - // i.e., same type and zero extended (U and not S) - if ((BFXOpc != ARM64::UBFMXri && VT == MVT::i64) || - (BFXOpc != ARM64::UBFMWri && VT == MVT::i32)) - continue; - - // Compute the width of the bitfield insertion - int sMSB = MSB - LSB + 1; - // FIXME: This constraints is to catch bitfield insertion we may - // want to widen the pattern if we want to grab general bitfied - // move case - if (sMSB <= 0) - continue; - - // Check the second part of the pattern - EVT VT = OrOpd1->getValueType(0); - if (VT != MVT::i32 && VT != MVT::i64) - continue; - - // Compute the Known Zero for the candidate of the first operand. - // This allows to catch more general case than just looking for - // AND with imm. Indeed, simplify-demanded-bits may have removed - // the AND instruction because it proves it was useless. - APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(OrOpd1Val, KnownZero, KnownOne); - - // Check if there is enough room for the second operand to appear - // in the first one - if (KnownZero.countTrailingOnes() < (unsigned)sMSB) - continue; - - // Set the first operand - uint64_t Imm; - if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && - isHighMask(Imm, sMSB, NumberOfIgnoredHighBits, VT)) - // In that case, we can eliminate the AND - Opd0 = OrOpd1->getOperand(0); - else - // Maybe the AND has been removed by simplify-demanded-bits - // or is useful because it discards more bits - Opd0 = OrOpd1Val; - - // both parts match - return true; - } - - return false; -} - -SDNode *ARM64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { - if (N->getOpcode() != ISD::OR) - return NULL; - - unsigned Opc; - unsigned LSB, MSB; - SDValue Opd0, Opd1; - - if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG)) - return NULL; - - EVT VT = N->getValueType(0); - SDValue Ops[] = { Opd0, - Opd1, - CurDAG->getTargetConstant(LSB, VT), - CurDAG->getTargetConstant(MSB, VT) }; - return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 4); -} - -SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) { - EVT VT = N->getValueType(0); - unsigned Variant; - unsigned Opc; - unsigned FRINTXOpcs[] = { ARM64::FRINTXSr, ARM64::FRINTXDr }; - - if (VT == MVT::f32) { - Variant = 0; - } else if (VT == MVT::f64) { - Variant = 1; - } else - return 0; // Unrecognized argument type. Fall back on default codegen. - - // Pick the FRINTX variant needed to set the flags. - unsigned FRINTXOpc = FRINTXOpcs[Variant]; - - switch (N->getOpcode()) { - default: - return 0; // Unrecognized libm ISD node. Fall back on default codegen. - case ISD::FCEIL: { - unsigned FRINTPOpcs[] = { ARM64::FRINTPSr, ARM64::FRINTPDr }; - Opc = FRINTPOpcs[Variant]; - break; - } - case ISD::FFLOOR: { - unsigned FRINTMOpcs[] = { ARM64::FRINTMSr, ARM64::FRINTMDr }; - Opc = FRINTMOpcs[Variant]; - break; - } - case ISD::FTRUNC: { - unsigned FRINTZOpcs[] = { ARM64::FRINTZSr, ARM64::FRINTZDr }; - Opc = FRINTZOpcs[Variant]; - break; - } - case ISD::FROUND: { - unsigned FRINTAOpcs[] = { ARM64::FRINTASr, ARM64::FRINTADr }; - Opc = FRINTAOpcs[Variant]; - break; - } - } - - SDLoc dl(N); - SDValue In = N->getOperand(0); - SmallVector<SDValue, 2> Ops; - Ops.push_back(In); - - if (!TM.Options.UnsafeFPMath) { - SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In); - Ops.push_back(SDValue(FRINTX, 1)); - } - - return CurDAG->getMachineNode(Opc, dl, VT, Ops); -} - -SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { - // Dump information about the Node being selected - DEBUG(errs() << "Selecting: "); - DEBUG(Node->dump(CurDAG)); - DEBUG(errs() << "\n"); - - // If we have a custom node, we already have selected! - if (Node->isMachineOpcode()) { - DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); - Node->setNodeId(-1); - return NULL; - } - - // Few custom selection stuff. - SDNode *ResNode = 0; - EVT VT = Node->getValueType(0); - - switch (Node->getOpcode()) { - default: - break; - - case ISD::ADD: - if (SDNode *I = SelectMLAV64LaneV128(Node)) - return I; - break; - - case ISD::ATOMIC_LOAD_ADD: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_ADD_I8, - ARM64::ATOMIC_LOAD_ADD_I16, ARM64::ATOMIC_LOAD_ADD_I32, - ARM64::ATOMIC_LOAD_ADD_I64); - case ISD::ATOMIC_LOAD_SUB: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_SUB_I8, - ARM64::ATOMIC_LOAD_SUB_I16, ARM64::ATOMIC_LOAD_SUB_I32, - ARM64::ATOMIC_LOAD_SUB_I64); - case ISD::ATOMIC_LOAD_AND: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_AND_I8, - ARM64::ATOMIC_LOAD_AND_I16, ARM64::ATOMIC_LOAD_AND_I32, - ARM64::ATOMIC_LOAD_AND_I64); - case ISD::ATOMIC_LOAD_OR: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_OR_I8, - ARM64::ATOMIC_LOAD_OR_I16, ARM64::ATOMIC_LOAD_OR_I32, - ARM64::ATOMIC_LOAD_OR_I64); - case ISD::ATOMIC_LOAD_XOR: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_XOR_I8, - ARM64::ATOMIC_LOAD_XOR_I16, ARM64::ATOMIC_LOAD_XOR_I32, - ARM64::ATOMIC_LOAD_XOR_I64); - case ISD::ATOMIC_LOAD_NAND: - return SelectAtomic( - Node, ARM64::ATOMIC_LOAD_NAND_I8, ARM64::ATOMIC_LOAD_NAND_I16, - ARM64::ATOMIC_LOAD_NAND_I32, ARM64::ATOMIC_LOAD_NAND_I64); - case ISD::ATOMIC_LOAD_MIN: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MIN_I8, - ARM64::ATOMIC_LOAD_MIN_I16, ARM64::ATOMIC_LOAD_MIN_I32, - ARM64::ATOMIC_LOAD_MIN_I64); - case ISD::ATOMIC_LOAD_MAX: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MAX_I8, - ARM64::ATOMIC_LOAD_MAX_I16, ARM64::ATOMIC_LOAD_MAX_I32, - ARM64::ATOMIC_LOAD_MAX_I64); - case ISD::ATOMIC_LOAD_UMIN: - return SelectAtomic( - Node, ARM64::ATOMIC_LOAD_UMIN_I8, ARM64::ATOMIC_LOAD_UMIN_I16, - ARM64::ATOMIC_LOAD_UMIN_I32, ARM64::ATOMIC_LOAD_UMIN_I64); - case ISD::ATOMIC_LOAD_UMAX: - return SelectAtomic( - Node, ARM64::ATOMIC_LOAD_UMAX_I8, ARM64::ATOMIC_LOAD_UMAX_I16, - ARM64::ATOMIC_LOAD_UMAX_I32, ARM64::ATOMIC_LOAD_UMAX_I64); - case ISD::ATOMIC_SWAP: - return SelectAtomic(Node, ARM64::ATOMIC_SWAP_I8, ARM64::ATOMIC_SWAP_I16, - ARM64::ATOMIC_SWAP_I32, ARM64::ATOMIC_SWAP_I64); - case ISD::ATOMIC_CMP_SWAP: - return SelectAtomic(Node, ARM64::ATOMIC_CMP_SWAP_I8, - ARM64::ATOMIC_CMP_SWAP_I16, ARM64::ATOMIC_CMP_SWAP_I32, - ARM64::ATOMIC_CMP_SWAP_I64); - - case ISD::LOAD: { - // Try to select as an indexed load. Fall through to normal processing - // if we can't. - bool Done = false; - SDNode *I = SelectIndexedLoad(Node, Done); - if (Done) - return I; - break; - } - - case ISD::SRL: - case ISD::AND: - case ISD::SRA: - if (SDNode *I = SelectBitfieldExtractOp(Node)) - return I; - break; - - case ISD::OR: - if (SDNode *I = SelectBitfieldInsertOp(Node)) - return I; - break; - - case ISD::EXTRACT_VECTOR_ELT: { - // Extracting lane zero is a special case where we can just use a plain - // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for - // the rest of the compiler, especially the register allocator and copyi - // propagation, to reason about, so is preferred when it's possible to - // use it. - ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1)); - // Bail and use the default Select() for non-zero lanes. - if (LaneNode->getZExtValue() != 0) - break; - // If the element type is not the same as the result type, likewise - // bail and use the default Select(), as there's more to do than just - // a cross-class COPY. This catches extracts of i8 and i16 elements - // since they will need an explicit zext. - if (VT != Node->getOperand(0).getValueType().getVectorElementType()) - break; - unsigned SubReg; - switch (Node->getOperand(0) - .getValueType() - .getVectorElementType() - .getSizeInBits()) { - default: - assert(0 && "Unexpected vector element type!"); - case 64: - SubReg = ARM64::dsub; - break; - case 32: - SubReg = ARM64::ssub; - break; - case 16: // FALLTHROUGH - case 8: - llvm_unreachable("unexpected zext-requiring extract element!"); - } - SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, - Node->getOperand(0)); - DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); - DEBUG(Extract->dumpr(CurDAG)); - DEBUG(dbgs() << "\n"); - return Extract.getNode(); - } - case ISD::Constant: { - // Materialize zero constants as copies from WZR/XZR. This allows - // the coalescer to propagate these into other instructions. - ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); - if (ConstNode->isNullValue()) { - if (VT == MVT::i32) - return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), - ARM64::WZR, MVT::i32).getNode(); - else if (VT == MVT::i64) - return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), - ARM64::XZR, MVT::i64).getNode(); - } - break; - } - - case ISD::FrameIndex: { - // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. - int FI = cast<FrameIndexSDNode>(Node)->getIndex(); - unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0); - const TargetLowering *TLI = getTargetLowering(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); - SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), - CurDAG->getTargetConstant(Shifter, MVT::i32) }; - return CurDAG->SelectNodeTo(Node, ARM64::ADDXri, MVT::i64, Ops, 3); - } - case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); - switch (IntNo) { - default: - break; - case Intrinsic::arm64_ldxp: { - SDValue MemAddr = Node->getOperand(2); - SDLoc DL(Node); - SDValue Chain = Node->getOperand(0); - - SDNode *Ld = CurDAG->getMachineNode(ARM64::LDXPX, DL, MVT::i64, MVT::i64, - MVT::Other, MemAddr, Chain); - - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); - cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); - return Ld; - } - case Intrinsic::arm64_stxp: { - SDLoc DL(Node); - SDValue Chain = Node->getOperand(0); - SDValue ValLo = Node->getOperand(2); - SDValue ValHi = Node->getOperand(3); - SDValue MemAddr = Node->getOperand(4); - - // Place arguments in the right order. - SmallVector<SDValue, 7> Ops; - Ops.push_back(ValLo); - Ops.push_back(ValHi); - Ops.push_back(MemAddr); - Ops.push_back(Chain); - - SDNode *St = - CurDAG->getMachineNode(ARM64::STXPX, DL, MVT::i32, MVT::Other, Ops); - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); - cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); - - return St; - } - case Intrinsic::arm64_neon_ld1x2: - if (VT == MVT::v8i8) - return SelectLoad(Node, 2, ARM64::LD1Twov8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 2, ARM64::LD1Twov16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 2, ARM64::LD1Twov4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 2, ARM64::LD1Twov8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 2, ARM64::LD1Twov2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 2, ARM64::LD1Twov4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 2, ARM64::LD1Twov2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld1x3: - if (VT == MVT::v8i8) - return SelectLoad(Node, 3, ARM64::LD1Threev8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 3, ARM64::LD1Threev16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 3, ARM64::LD1Threev4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 3, ARM64::LD1Threev8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 3, ARM64::LD1Threev2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 3, ARM64::LD1Threev4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 3, ARM64::LD1Threev2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld1x4: - if (VT == MVT::v8i8) - return SelectLoad(Node, 4, ARM64::LD1Fourv8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 4, ARM64::LD1Fourv16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 4, ARM64::LD1Fourv4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 4, ARM64::LD1Fourv8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 4, ARM64::LD1Fourv2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 4, ARM64::LD1Fourv4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 4, ARM64::LD1Fourv2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld2: - if (VT == MVT::v8i8) - return SelectLoad(Node, 2, ARM64::LD2Twov8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 2, ARM64::LD2Twov16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 2, ARM64::LD2Twov4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 2, ARM64::LD2Twov8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 2, ARM64::LD2Twov2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 2, ARM64::LD2Twov4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 2, ARM64::LD2Twov2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld3: - if (VT == MVT::v8i8) - return SelectLoad(Node, 3, ARM64::LD3Threev8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 3, ARM64::LD3Threev16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 3, ARM64::LD3Threev4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 3, ARM64::LD3Threev8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 3, ARM64::LD3Threev2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 3, ARM64::LD3Threev4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 3, ARM64::LD3Threev2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld4: - if (VT == MVT::v8i8) - return SelectLoad(Node, 4, ARM64::LD4Fourv8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 4, ARM64::LD4Fourv16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 4, ARM64::LD4Fourv4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 4, ARM64::LD4Fourv8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 4, ARM64::LD4Fourv2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 4, ARM64::LD4Fourv4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 4, ARM64::LD4Fourv2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld2r: - if (VT == MVT::v8i8) - return SelectLoad(Node, 2, ARM64::LD2Rv8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 2, ARM64::LD2Rv16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 2, ARM64::LD2Rv4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 2, ARM64::LD2Rv8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 2, ARM64::LD2Rv2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 2, ARM64::LD2Rv4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 2, ARM64::LD2Rv1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 2, ARM64::LD2Rv2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld3r: - if (VT == MVT::v8i8) - return SelectLoad(Node, 3, ARM64::LD3Rv8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 3, ARM64::LD3Rv16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 3, ARM64::LD3Rv4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 3, ARM64::LD3Rv8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 3, ARM64::LD3Rv2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 3, ARM64::LD3Rv4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 3, ARM64::LD3Rv1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 3, ARM64::LD3Rv2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld4r: - if (VT == MVT::v8i8) - return SelectLoad(Node, 4, ARM64::LD4Rv8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 4, ARM64::LD4Rv16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 4, ARM64::LD4Rv4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 4, ARM64::LD4Rv8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 4, ARM64::LD4Rv2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 4, ARM64::LD4Rv4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 4, ARM64::LD4Rv1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 4, ARM64::LD4Rv2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld2lane: - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectLoadLane(Node, 2, ARM64::LD2i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectLoadLane(Node, 2, ARM64::LD2i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectLoadLane(Node, 2, ARM64::LD2i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectLoadLane(Node, 2, ARM64::LD2i64); - break; - case Intrinsic::arm64_neon_ld3lane: - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectLoadLane(Node, 3, ARM64::LD3i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectLoadLane(Node, 3, ARM64::LD3i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectLoadLane(Node, 3, ARM64::LD3i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectLoadLane(Node, 3, ARM64::LD3i64); - break; - case Intrinsic::arm64_neon_ld4lane: - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectLoadLane(Node, 4, ARM64::LD4i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectLoadLane(Node, 4, ARM64::LD4i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectLoadLane(Node, 4, ARM64::LD4i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectLoadLane(Node, 4, ARM64::LD4i64); - break; - } - } break; - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); - switch (IntNo) { - default: - break; - case Intrinsic::arm64_neon_tbl2: - return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBLv8i8Two - : ARM64::TBLv16i8Two, - false); - case Intrinsic::arm64_neon_tbl3: - return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBLv8i8Three - : ARM64::TBLv16i8Three, - false); - case Intrinsic::arm64_neon_tbl4: - return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBLv8i8Four - : ARM64::TBLv16i8Four, - false); - case Intrinsic::arm64_neon_tbx2: - return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBXv8i8Two - : ARM64::TBXv16i8Two, - true); - case Intrinsic::arm64_neon_tbx3: - return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBXv8i8Three - : ARM64::TBXv16i8Three, - true); - case Intrinsic::arm64_neon_tbx4: - return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBXv8i8Four - : ARM64::TBXv16i8Four, - true); - case Intrinsic::arm64_neon_smull: - case Intrinsic::arm64_neon_umull: - if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node)) - return N; - break; - } - break; - } - case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); - if (Node->getNumOperands() >= 3) - VT = Node->getOperand(2)->getValueType(0); - switch (IntNo) { - default: - break; - case Intrinsic::arm64_neon_st1x2: { - if (VT == MVT::v8i8) - return SelectStore(Node, 2, ARM64::ST1Twov8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 2, ARM64::ST1Twov16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 2, ARM64::ST1Twov4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 2, ARM64::ST1Twov8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 2, ARM64::ST1Twov2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 2, ARM64::ST1Twov4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 2, ARM64::ST1Twov2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 2, ARM64::ST1Twov1d); - break; - } - case Intrinsic::arm64_neon_st1x3: { - if (VT == MVT::v8i8) - return SelectStore(Node, 3, ARM64::ST1Threev8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 3, ARM64::ST1Threev16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 3, ARM64::ST1Threev4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 3, ARM64::ST1Threev8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 3, ARM64::ST1Threev2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 3, ARM64::ST1Threev4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 3, ARM64::ST1Threev2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 3, ARM64::ST1Threev1d); - break; - } - case Intrinsic::arm64_neon_st1x4: { - if (VT == MVT::v8i8) - return SelectStore(Node, 4, ARM64::ST1Fourv8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 4, ARM64::ST1Fourv16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 4, ARM64::ST1Fourv4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 4, ARM64::ST1Fourv8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 4, ARM64::ST1Fourv2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 4, ARM64::ST1Fourv4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 4, ARM64::ST1Fourv2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 4, ARM64::ST1Fourv1d); - break; - } - case Intrinsic::arm64_neon_st2: { - if (VT == MVT::v8i8) - return SelectStore(Node, 2, ARM64::ST2Twov8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 2, ARM64::ST2Twov16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 2, ARM64::ST2Twov4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 2, ARM64::ST2Twov8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 2, ARM64::ST2Twov2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 2, ARM64::ST2Twov4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 2, ARM64::ST2Twov2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 2, ARM64::ST1Twov1d); - break; - } - case Intrinsic::arm64_neon_st3: { - if (VT == MVT::v8i8) - return SelectStore(Node, 3, ARM64::ST3Threev8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 3, ARM64::ST3Threev16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 3, ARM64::ST3Threev4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 3, ARM64::ST3Threev8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 3, ARM64::ST3Threev2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 3, ARM64::ST3Threev4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 3, ARM64::ST3Threev2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 3, ARM64::ST1Threev1d); - break; - } - case Intrinsic::arm64_neon_st4: { - if (VT == MVT::v8i8) - return SelectStore(Node, 4, ARM64::ST4Fourv8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 4, ARM64::ST4Fourv16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 4, ARM64::ST4Fourv4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 4, ARM64::ST4Fourv8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 4, ARM64::ST4Fourv2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 4, ARM64::ST4Fourv4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 4, ARM64::ST4Fourv2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 4, ARM64::ST1Fourv1d); - break; - } - case Intrinsic::arm64_neon_st2lane: { - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectStoreLane(Node, 2, ARM64::ST2i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectStoreLane(Node, 2, ARM64::ST2i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectStoreLane(Node, 2, ARM64::ST2i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectStoreLane(Node, 2, ARM64::ST2i64); - break; - } - case Intrinsic::arm64_neon_st3lane: { - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectStoreLane(Node, 3, ARM64::ST3i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectStoreLane(Node, 3, ARM64::ST3i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectStoreLane(Node, 3, ARM64::ST3i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectStoreLane(Node, 3, ARM64::ST3i64); - break; - } - case Intrinsic::arm64_neon_st4lane: { - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectStoreLane(Node, 4, ARM64::ST4i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectStoreLane(Node, 4, ARM64::ST4i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectStoreLane(Node, 4, ARM64::ST4i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectStoreLane(Node, 4, ARM64::ST4i64); - break; - } - } - } - - case ISD::FCEIL: - case ISD::FFLOOR: - case ISD::FTRUNC: - case ISD::FROUND: - if (SDNode *I = SelectLIBM(Node)) - return I; - break; - } - - // Select the default instruction - ResNode = SelectCode(Node); - - DEBUG(errs() << "=> "); - if (ResNode == NULL || ResNode == Node) - DEBUG(Node->dump(CurDAG)); - else - DEBUG(ResNode->dump(CurDAG)); - DEBUG(errs() << "\n"); - - return ResNode; -} - -/// createARM64ISelDag - This pass converts a legalized DAG into a -/// ARM64-specific DAG, ready for instruction scheduling. -FunctionPass *llvm::createARM64ISelDag(ARM64TargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new ARM64DAGToDAGISel(TM, OptLevel); -} diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp deleted file mode 100644 index 641f591..0000000 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ /dev/null @@ -1,7551 +0,0 @@ -//===-- ARM64ISelLowering.cpp - ARM64 DAG Lowering Implementation --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64TargetLowering class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-lower" - -#include "ARM64ISelLowering.h" -#include "ARM64PerfectShuffle.h" -#include "ARM64Subtarget.h" -#include "ARM64CallingConv.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64TargetMachine.h" -#include "ARM64TargetObjectFile.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOptions.h" -using namespace llvm; - -STATISTIC(NumTailCalls, "Number of tail calls"); -STATISTIC(NumShiftInserts, "Number of vector shift inserts"); - -// This option should go away when tail calls fully work. -static cl::opt<bool> -EnableARM64TailCalls("arm64-tail-calls", cl::Hidden, - cl::desc("Generate ARM64 tail calls (TEMPORARY OPTION)."), - cl::init(true)); - -static cl::opt<bool> -StrictAlign("arm64-strict-align", cl::Hidden, - cl::desc("Disallow all unaligned memory accesses")); - -// Place holder until extr generation is tested fully. -static cl::opt<bool> -EnableARM64ExtrGeneration("arm64-extr-generation", cl::Hidden, - cl::desc("Allow ARM64 (or (shift)(shift))->extract"), - cl::init(true)); - -static cl::opt<bool> -EnableARM64SlrGeneration("arm64-shift-insert-generation", cl::Hidden, - cl::desc("Allow ARM64 SLI/SRI formation"), - cl::init(false)); - -//===----------------------------------------------------------------------===// -// ARM64 Lowering public interface. -//===----------------------------------------------------------------------===// -static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget<ARM64Subtarget>().isTargetDarwin()) - return new ARM64_MachoTargetObjectFile(); - - return new ARM64_ELFTargetObjectFile(); -} - -ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { - Subtarget = &TM.getSubtarget<ARM64Subtarget>(); - - // ARM64 doesn't have comparisons which set GPRs or setcc instructions, so - // we have to make something up. Arbitrarily, choose ZeroOrOne. - setBooleanContents(ZeroOrOneBooleanContent); - // When comparing vectors the result sets the different elements in the - // vector to all-one or all-zero. - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - - // Set up the register classes. - addRegisterClass(MVT::i32, &ARM64::GPR32allRegClass); - addRegisterClass(MVT::i64, &ARM64::GPR64allRegClass); - addRegisterClass(MVT::f32, &ARM64::FPR32RegClass); - addRegisterClass(MVT::f64, &ARM64::FPR64RegClass); - addRegisterClass(MVT::f128, &ARM64::FPR128RegClass); - addRegisterClass(MVT::v16i8, &ARM64::FPR8RegClass); - addRegisterClass(MVT::v8i16, &ARM64::FPR16RegClass); - - // Someone set us up the NEON. - addDRTypeForNEON(MVT::v2f32); - addDRTypeForNEON(MVT::v8i8); - addDRTypeForNEON(MVT::v4i16); - addDRTypeForNEON(MVT::v2i32); - addDRTypeForNEON(MVT::v1i64); - addDRTypeForNEON(MVT::v1f64); - - addQRTypeForNEON(MVT::v4f32); - addQRTypeForNEON(MVT::v2f64); - addQRTypeForNEON(MVT::v16i8); - addQRTypeForNEON(MVT::v8i16); - addQRTypeForNEON(MVT::v4i32); - addQRTypeForNEON(MVT::v2i64); - - // Compute derived properties from the register classes - computeRegisterProperties(); - - // Provide all sorts of operation actions - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); - setOperationAction(ISD::SETCC, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::i64, Custom); - setOperationAction(ISD::SETCC, MVT::f32, Custom); - setOperationAction(ISD::SETCC, MVT::f64, Custom); - setOperationAction(ISD::BRCOND, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Custom); - setOperationAction(ISD::BR_CC, MVT::i64, Custom); - setOperationAction(ISD::BR_CC, MVT::f32, Custom); - setOperationAction(ISD::BR_CC, MVT::f64, Custom); - setOperationAction(ISD::SELECT, MVT::i32, Custom); - setOperationAction(ISD::SELECT, MVT::i64, Custom); - setOperationAction(ISD::SELECT, MVT::f32, Custom); - setOperationAction(ISD::SELECT, MVT::f64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::JumpTable, MVT::i64, Custom); - - setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); - - setOperationAction(ISD::FREM, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f80, Expand); - - // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to - // silliness like this: - setOperationAction(ISD::FABS, MVT::v1f64, Expand); - setOperationAction(ISD::FADD, MVT::v1f64, Expand); - setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); - setOperationAction(ISD::FCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FDIV, MVT::v1f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); - setOperationAction(ISD::FMA, MVT::v1f64, Expand); - setOperationAction(ISD::FMUL, MVT::v1f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); - setOperationAction(ISD::FNEG, MVT::v1f64, Expand); - setOperationAction(ISD::FPOW, MVT::v1f64, Expand); - setOperationAction(ISD::FREM, MVT::v1f64, Expand); - setOperationAction(ISD::FROUND, MVT::v1f64, Expand); - setOperationAction(ISD::FRINT, MVT::v1f64, Expand); - setOperationAction(ISD::FSIN, MVT::v1f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); - setOperationAction(ISD::FSUB, MVT::v1f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); - setOperationAction(ISD::SETCC, MVT::v1f64, Expand); - setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); - - setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); - - // Custom lowering hooks are needed for XOR - // to fold it into CSINC/CSINV. - setOperationAction(ISD::XOR, MVT::i32, Custom); - setOperationAction(ISD::XOR, MVT::i64, Custom); - - // Virtually no operation on f128 is legal, but LLVM can't expand them when - // there's a valid register class, so we need custom operations in most cases. - setOperationAction(ISD::FABS, MVT::f128, Expand); - setOperationAction(ISD::FADD, MVT::f128, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); - setOperationAction(ISD::FCOS, MVT::f128, Expand); - setOperationAction(ISD::FDIV, MVT::f128, Custom); - setOperationAction(ISD::FMA, MVT::f128, Expand); - setOperationAction(ISD::FMUL, MVT::f128, Custom); - setOperationAction(ISD::FNEG, MVT::f128, Expand); - setOperationAction(ISD::FPOW, MVT::f128, Expand); - setOperationAction(ISD::FREM, MVT::f128, Expand); - setOperationAction(ISD::FRINT, MVT::f128, Expand); - setOperationAction(ISD::FSIN, MVT::f128, Expand); - setOperationAction(ISD::FSINCOS, MVT::f128, Expand); - setOperationAction(ISD::FSQRT, MVT::f128, Expand); - setOperationAction(ISD::FSUB, MVT::f128, Custom); - setOperationAction(ISD::FTRUNC, MVT::f128, Expand); - setOperationAction(ISD::SETCC, MVT::f128, Custom); - setOperationAction(ISD::BR_CC, MVT::f128, Custom); - setOperationAction(ISD::SELECT, MVT::f128, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); - - // Lowering for many of the conversions is actually specified by the non-f128 - // type. The LowerXXX function will be trivial when f128 isn't involved. - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); - - // 128-bit atomics - setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i128, Custom); - // These are surprisingly difficult. The only single-copy atomic 128-bit - // instruction on AArch64 is stxp (when it succeeds). So a store can safely - // become a simple swap, but a load can only be determined to have been atomic - // if storing the same value back succeeds. - setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Expand); - - // Variable arguments. - setOperationAction(ISD::VASTART, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::Other, Custom); - setOperationAction(ISD::VACOPY, MVT::Other, Custom); - setOperationAction(ISD::VAEND, MVT::Other, Expand); - - // Variable-sized objects. - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - - // Exception handling. - // FIXME: These are guesses. Has this been defined yet? - setExceptionPointerRegister(ARM64::X0); - setExceptionSelectorRegister(ARM64::X1); - - // Constant pool entries - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); - - // BlockAddress - setOperationAction(ISD::BlockAddress, MVT::i64, Custom); - - // Add/Sub overflow ops with MVT::Glues are lowered to CPSR dependences. - setOperationAction(ISD::ADDC, MVT::i32, Custom); - setOperationAction(ISD::ADDE, MVT::i32, Custom); - setOperationAction(ISD::SUBC, MVT::i32, Custom); - setOperationAction(ISD::SUBE, MVT::i32, Custom); - setOperationAction(ISD::ADDC, MVT::i64, Custom); - setOperationAction(ISD::ADDE, MVT::i64, Custom); - setOperationAction(ISD::SUBC, MVT::i64, Custom); - setOperationAction(ISD::SUBE, MVT::i64, Custom); - - // ARM64 lacks both left-rotate and popcount instructions. - setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::ROTL, MVT::i64, Expand); - - // ARM64 doesn't have a direct vector ->f32 conversion instructions for - // elements smaller than i32, so promote the input to i32 first. - setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote); - // Similarly, there is no direct i32 -> f64 vector conversion instruction. - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); - - // ARM64 doesn't have {U|S}MUL_LOHI. - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - - // ARM64 doesn't have MUL.2d: - setOperationAction(ISD::MUL, MVT::v2i64, Expand); - - // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero - // counterparts, which ARM64 supports directly. - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - - setOperationAction(ISD::CTPOP, MVT::i32, Custom); - setOperationAction(ISD::CTPOP, MVT::i64, Custom); - - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - - // Custom lower Add/Sub/Mul with overflow. - setOperationAction(ISD::SADDO, MVT::i32, Custom); - setOperationAction(ISD::SADDO, MVT::i64, Custom); - setOperationAction(ISD::UADDO, MVT::i32, Custom); - setOperationAction(ISD::UADDO, MVT::i64, Custom); - setOperationAction(ISD::SSUBO, MVT::i32, Custom); - setOperationAction(ISD::SSUBO, MVT::i64, Custom); - setOperationAction(ISD::USUBO, MVT::i32, Custom); - setOperationAction(ISD::USUBO, MVT::i64, Custom); - setOperationAction(ISD::SMULO, MVT::i32, Custom); - setOperationAction(ISD::SMULO, MVT::i64, Custom); - setOperationAction(ISD::UMULO, MVT::i32, Custom); - setOperationAction(ISD::UMULO, MVT::i64, Custom); - - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FSIN, MVT::f64, Expand); - setOperationAction(ISD::FCOS, MVT::f32, Expand); - setOperationAction(ISD::FCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FPOW, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); - - // ARM64 has implementations of a lot of rounding-like FP operations. - static MVT RoundingTypes[] = { MVT::f32, MVT::f64, MVT::v2f32, - MVT::v4f32, MVT::v2f64 }; - for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) { - MVT Ty = RoundingTypes[I]; - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - } - - setOperationAction(ISD::PREFETCH, MVT::Other, Custom); - - if (Subtarget->isTargetMachO()) { - // For iOS, we don't want to the normal expansion of a libcall to - // sincos. We want to issue a libcall to __sincos_stret to avoid memory - // traffic. - setOperationAction(ISD::FSINCOS, MVT::f64, Custom); - setOperationAction(ISD::FSINCOS, MVT::f32, Custom); - } else { - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - } - - // ARM64 does not have floating-point extending loads, i1 sign-extending load, - // floating-point truncating stores, or v2i32->v2i16 truncating store. - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - setTruncStoreAction(MVT::f128, MVT::f80, Expand); - setTruncStoreAction(MVT::f128, MVT::f64, Expand); - setTruncStoreAction(MVT::f128, MVT::f32, Expand); - setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); - // Indexed loads and stores are supported. - for (unsigned im = (unsigned)ISD::PRE_INC; - im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { - setIndexedLoadAction(im, MVT::i8, Legal); - setIndexedLoadAction(im, MVT::i16, Legal); - setIndexedLoadAction(im, MVT::i32, Legal); - setIndexedLoadAction(im, MVT::i64, Legal); - setIndexedLoadAction(im, MVT::f64, Legal); - setIndexedLoadAction(im, MVT::f32, Legal); - setIndexedStoreAction(im, MVT::i8, Legal); - setIndexedStoreAction(im, MVT::i16, Legal); - setIndexedStoreAction(im, MVT::i32, Legal); - setIndexedStoreAction(im, MVT::i64, Legal); - setIndexedStoreAction(im, MVT::f64, Legal); - setIndexedStoreAction(im, MVT::f32, Legal); - } - - // Likewise, narrowing and extending vector loads/stores aren't handled - // directly. - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - - setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, - Expand); - - for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) - setTruncStoreAction((MVT::SimpleValueType)VT, - (MVT::SimpleValueType)InnerVT, Expand); - setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); - setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); - } - - // Trap. - setOperationAction(ISD::TRAP, MVT::Other, Legal); - setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); - - // We combine OR nodes for bitfield operations. - setTargetDAGCombine(ISD::OR); - - // Vector add and sub nodes may conceal a high-half opportunity. - // Also, try to fold ADD into CSINC/CSINV.. - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::SUB); - - setTargetDAGCombine(ISD::XOR); - setTargetDAGCombine(ISD::SINT_TO_FP); - setTargetDAGCombine(ISD::UINT_TO_FP); - - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - - setTargetDAGCombine(ISD::ANY_EXTEND); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::BITCAST); - setTargetDAGCombine(ISD::CONCAT_VECTORS); - setTargetDAGCombine(ISD::STORE); - - setTargetDAGCombine(ISD::MUL); - - MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8; - MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4; - MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4; - - setStackPointerRegisterToSaveRestore(ARM64::SP); - - setSchedulingPreference(Sched::Hybrid); - - // Enable TBZ/TBNZ - MaskAndBranchFoldingIsLegal = true; - - setMinFunctionAlignment(2); - - RequireStrictAlign = StrictAlign; -} - -void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { - if (VT == MVT::v2f32) { - setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32); - - setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32); - } else if (VT == MVT::v2f64 || VT == MVT::v4f32) { - setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64); - - setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i64); - } - - // Mark vector float intrinsics as expand. - if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { - setOperationAction(ISD::FSIN, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FCOS, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FPOWI, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FPOW, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FLOG, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FLOG2, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand); - } - - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); - setOperationAction(ISD::AND, VT.getSimpleVT(), Custom); - setOperationAction(ISD::OR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); - - setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); - setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand); - setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); - - setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); - - setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); -} - -void ARM64TargetLowering::addDRTypeForNEON(MVT VT) { - addRegisterClass(VT, &ARM64::FPR64RegClass); - addTypeForNEON(VT, MVT::v2i32); -} - -void ARM64TargetLowering::addQRTypeForNEON(MVT VT) { - addRegisterClass(VT, &ARM64::FPR128RegClass); - addTypeForNEON(VT, MVT::v4i32); -} - -EVT ARM64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - if (!VT.isVector()) - return MVT::i32; - return VT.changeVectorElementTypeToInteger(); -} - -/// computeMaskedBitsForTargetNode - Determine which of the bits specified in -/// Mask are known to be either zero or one and return them in the -/// KnownZero/KnownOne bitsets. -void ARM64TargetLowering::computeMaskedBitsForTargetNode( - const SDValue Op, APInt &KnownZero, APInt &KnownOne, - const SelectionDAG &DAG, unsigned Depth) const { - switch (Op.getOpcode()) { - default: - break; - case ARM64ISD::CSEL: { - APInt KnownZero2, KnownOne2; - DAG.ComputeMaskedBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1); - DAG.ComputeMaskedBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; - break; - } - case ISD::INTRINSIC_W_CHAIN: - break; - case ISD::INTRINSIC_WO_CHAIN: - case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - switch (IntNo) { - default: - break; - case Intrinsic::arm64_neon_umaxv: - case Intrinsic::arm64_neon_uminv: { - // Figure out the datatype of the vector operand. The UMINV instruction - // will zero extend the result, so we can mark as known zero all the - // bits larger than the element datatype. 32-bit or larget doesn't need - // this as those are legal types and will be handled by isel directly. - MVT VT = Op.getOperand(1).getValueType().getSimpleVT(); - unsigned BitWidth = KnownZero.getBitWidth(); - if (VT == MVT::v8i8 || VT == MVT::v16i8) { - assert(BitWidth >= 8 && "Unexpected width!"); - APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); - KnownZero |= Mask; - } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { - assert(BitWidth >= 16 && "Unexpected width!"); - APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); - KnownZero |= Mask; - } - break; - } break; - } - } - } -} - -MVT ARM64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::i64; -} - -unsigned ARM64TargetLowering::getMaximalGlobalOffset() const { - // FIXME: On ARM64, this depends on the type. - // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes(). - // and the offset has to be a multiple of the related size in bytes. - return 4095; -} - -FastISel * -ARM64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) const { - return ARM64::createFastISel(funcInfo, libInfo); -} - -const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: - return 0; - case ARM64ISD::CALL: return "ARM64ISD::CALL"; - case ARM64ISD::ADRP: return "ARM64ISD::ADRP"; - case ARM64ISD::ADDlow: return "ARM64ISD::ADDlow"; - case ARM64ISD::LOADgot: return "ARM64ISD::LOADgot"; - case ARM64ISD::RET_FLAG: return "ARM64ISD::RET_FLAG"; - case ARM64ISD::BRCOND: return "ARM64ISD::BRCOND"; - case ARM64ISD::CSEL: return "ARM64ISD::CSEL"; - case ARM64ISD::FCSEL: return "ARM64ISD::FCSEL"; - case ARM64ISD::CSINV: return "ARM64ISD::CSINV"; - case ARM64ISD::CSNEG: return "ARM64ISD::CSNEG"; - case ARM64ISD::CSINC: return "ARM64ISD::CSINC"; - case ARM64ISD::THREAD_POINTER: return "ARM64ISD::THREAD_POINTER"; - case ARM64ISD::TLSDESC_CALL: return "ARM64ISD::TLSDESC_CALL"; - case ARM64ISD::ADC: return "ARM64ISD::ADC"; - case ARM64ISD::SBC: return "ARM64ISD::SBC"; - case ARM64ISD::ADDS: return "ARM64ISD::ADDS"; - case ARM64ISD::SUBS: return "ARM64ISD::SUBS"; - case ARM64ISD::ADCS: return "ARM64ISD::ADCS"; - case ARM64ISD::SBCS: return "ARM64ISD::SBCS"; - case ARM64ISD::ANDS: return "ARM64ISD::ANDS"; - case ARM64ISD::FCMP: return "ARM64ISD::FCMP"; - case ARM64ISD::FMIN: return "ARM64ISD::FMIN"; - case ARM64ISD::FMAX: return "ARM64ISD::FMAX"; - case ARM64ISD::DUP: return "ARM64ISD::DUP"; - case ARM64ISD::DUPLANE8: return "ARM64ISD::DUPLANE8"; - case ARM64ISD::DUPLANE16: return "ARM64ISD::DUPLANE16"; - case ARM64ISD::DUPLANE32: return "ARM64ISD::DUPLANE32"; - case ARM64ISD::DUPLANE64: return "ARM64ISD::DUPLANE64"; - case ARM64ISD::MOVI: return "ARM64ISD::MOVI"; - case ARM64ISD::MOVIshift: return "ARM64ISD::MOVIshift"; - case ARM64ISD::MOVIedit: return "ARM64ISD::MOVIedit"; - case ARM64ISD::MOVImsl: return "ARM64ISD::MOVImsl"; - case ARM64ISD::FMOV: return "ARM64ISD::FMOV"; - case ARM64ISD::MVNIshift: return "ARM64ISD::MVNIshift"; - case ARM64ISD::MVNImsl: return "ARM64ISD::MVNImsl"; - case ARM64ISD::BICi: return "ARM64ISD::BICi"; - case ARM64ISD::ORRi: return "ARM64ISD::ORRi"; - case ARM64ISD::NEG: return "ARM64ISD::NEG"; - case ARM64ISD::EXTR: return "ARM64ISD::EXTR"; - case ARM64ISD::ZIP1: return "ARM64ISD::ZIP1"; - case ARM64ISD::ZIP2: return "ARM64ISD::ZIP2"; - case ARM64ISD::UZP1: return "ARM64ISD::UZP1"; - case ARM64ISD::UZP2: return "ARM64ISD::UZP2"; - case ARM64ISD::TRN1: return "ARM64ISD::TRN1"; - case ARM64ISD::TRN2: return "ARM64ISD::TRN2"; - case ARM64ISD::REV16: return "ARM64ISD::REV16"; - case ARM64ISD::REV32: return "ARM64ISD::REV32"; - case ARM64ISD::REV64: return "ARM64ISD::REV64"; - case ARM64ISD::EXT: return "ARM64ISD::EXT"; - case ARM64ISD::VSHL: return "ARM64ISD::VSHL"; - case ARM64ISD::VLSHR: return "ARM64ISD::VLSHR"; - case ARM64ISD::VASHR: return "ARM64ISD::VASHR"; - case ARM64ISD::CMEQ: return "ARM64ISD::CMEQ"; - case ARM64ISD::CMGE: return "ARM64ISD::CMGE"; - case ARM64ISD::CMGT: return "ARM64ISD::CMGT"; - case ARM64ISD::CMHI: return "ARM64ISD::CMHI"; - case ARM64ISD::CMHS: return "ARM64ISD::CMHS"; - case ARM64ISD::FCMEQ: return "ARM64ISD::FCMEQ"; - case ARM64ISD::FCMGE: return "ARM64ISD::FCMGE"; - case ARM64ISD::FCMGT: return "ARM64ISD::FCMGT"; - case ARM64ISD::CMEQz: return "ARM64ISD::CMEQz"; - case ARM64ISD::CMGEz: return "ARM64ISD::CMGEz"; - case ARM64ISD::CMGTz: return "ARM64ISD::CMGTz"; - case ARM64ISD::CMLEz: return "ARM64ISD::CMLEz"; - case ARM64ISD::CMLTz: return "ARM64ISD::CMLTz"; - case ARM64ISD::FCMEQz: return "ARM64ISD::FCMEQz"; - case ARM64ISD::FCMGEz: return "ARM64ISD::FCMGEz"; - case ARM64ISD::FCMGTz: return "ARM64ISD::FCMGTz"; - case ARM64ISD::FCMLEz: return "ARM64ISD::FCMLEz"; - case ARM64ISD::FCMLTz: return "ARM64ISD::FCMLTz"; - case ARM64ISD::NOT: return "ARM64ISD::NOT"; - case ARM64ISD::BIT: return "ARM64ISD::BIT"; - case ARM64ISD::CBZ: return "ARM64ISD::CBZ"; - case ARM64ISD::CBNZ: return "ARM64ISD::CBNZ"; - case ARM64ISD::TBZ: return "ARM64ISD::TBZ"; - case ARM64ISD::TBNZ: return "ARM64ISD::TBNZ"; - case ARM64ISD::TC_RETURN: return "ARM64ISD::TC_RETURN"; - case ARM64ISD::SITOF: return "ARM64ISD::SITOF"; - case ARM64ISD::UITOF: return "ARM64ISD::UITOF"; - case ARM64ISD::SQSHL_I: return "ARM64ISD::SQSHL_I"; - case ARM64ISD::UQSHL_I: return "ARM64ISD::UQSHL_I"; - case ARM64ISD::SRSHR_I: return "ARM64ISD::SRSHR_I"; - case ARM64ISD::URSHR_I: return "ARM64ISD::URSHR_I"; - case ARM64ISD::SQSHLU_I: return "ARM64ISD::SQSHLU_I"; - case ARM64ISD::WrapperLarge: return "ARM64ISD::WrapperLarge"; - } -} - -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, - unsigned &LdrOpc, unsigned &StrOpc) { - static unsigned LoadBares[] = { ARM64::LDXRB, ARM64::LDXRH, ARM64::LDXRW, - ARM64::LDXRX, ARM64::LDXPX }; - static unsigned LoadAcqs[] = { ARM64::LDAXRB, ARM64::LDAXRH, ARM64::LDAXRW, - ARM64::LDAXRX, ARM64::LDAXPX }; - static unsigned StoreBares[] = { ARM64::STXRB, ARM64::STXRH, ARM64::STXRW, - ARM64::STXRX, ARM64::STXPX }; - static unsigned StoreRels[] = { ARM64::STLXRB, ARM64::STLXRH, ARM64::STLXRW, - ARM64::STLXRX, ARM64::STLXPX }; - - unsigned *LoadOps, *StoreOps; - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) - LoadOps = LoadAcqs; - else - LoadOps = LoadBares; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - StoreOps = StoreRels; - else - StoreOps = StoreBares; - - assert(isPowerOf2_32(Size) && Size <= 16 && - "unsupported size for atomic binary op!"); - - LdrOpc = LoadOps[Log2_32(Size)]; - StrOpc = StoreOps[Log2_32(Size)]; -} - -MachineBasicBlock *ARM64TargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const { - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned oldval = MI->getOperand(2).getReg(); - unsigned newval = MI->getOperand(3).getReg(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm()); - unsigned scratch = BB->getParent()->getRegInfo().createVirtualRegister( - &ARM64::GPR32RegClass); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - - // FIXME: We currently always generate a seq_cst operation; we should - // be able to relax this in some cases. - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - - // loop1MBB: - // ldrex dest, [ptr] - // cmp dest, oldval - // bne exitMBB - BB = loop1MBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - BuildMI(BB, dl, TII->get(Size == 8 ? ARM64::SUBSXrr : ARM64::SUBSWrr)) - .addReg(Size == 8 ? ARM64::XZR : ARM64::WZR, RegState::Define) - .addReg(dest) - .addReg(oldval); - BuildMI(BB, dl, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(exitMBB); - BB->addSuccessor(loop2MBB); - BB->addSuccessor(exitMBB); - - // loop2MBB: - // strex scratch, newval, [ptr] - // cmp scratch, #0 - // bne loop1MBB - BB = loop2MBB; - BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); - BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARM64TargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned scratch = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass); - unsigned scratch2 = - (!BinOpcode) - ? incr - : RegInfo.createVirtualRegister(Size == 8 ? &ARM64::GPR64RegClass - : &ARM64::GPR32RegClass); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldxr dest, ptr - // <binop> scratch2, dest, incr - // stxr scratch, scratch2, ptr - // cbnz scratch, loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (BinOpcode) { - // operand order needs to go the other way for NAND - if (BinOpcode == ARM64::BICWrr || BinOpcode == ARM64::BICXrr) - BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(incr).addReg(dest); - else - BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(dest).addReg(incr); - } - - BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loopMBB); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock *ARM64TargetLowering::EmitAtomicBinary128( - MachineInstr *MI, MachineBasicBlock *BB, unsigned BinOpcodeLo, - unsigned BinOpcodeHi) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned DestLo = MI->getOperand(0).getReg(); - unsigned DestHi = MI->getOperand(1).getReg(); - unsigned Ptr = MI->getOperand(2).getReg(); - unsigned IncrLo = MI->getOperand(3).getReg(); - unsigned IncrHi = MI->getOperand(4).getReg(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(5).getImm()); - DebugLoc DL = MI->getDebugLoc(); - - unsigned LdrOpc, StrOpc; - getExclusiveOperation(16, Ord, LdrOpc, StrOpc); - - MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, LoopMBB); - MF->insert(It, ExitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - ExitMBB->splice(ExitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(BB); - - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass); - unsigned ScratchLo = IncrLo, ScratchHi = IncrHi; - if (BinOpcodeLo) { - assert(BinOpcodeHi && "Expect neither or both opcodes to be defined"); - ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - } - - // ThisMBB: - // ... - // fallthrough --> LoopMBB - BB->addSuccessor(LoopMBB); - - // LoopMBB: - // ldxp DestLo, DestHi, Ptr - // <binoplo> ScratchLo, DestLo, IncrLo - // <binophi> ScratchHi, DestHi, IncrHi - // stxp ScratchRes, ScratchLo, ScratchHi, ptr - // cbnz ScratchRes, LoopMBB - // fallthrough --> ExitMBB - BB = LoopMBB; - BuildMI(BB, DL, TII->get(LdrOpc), DestLo) - .addReg(DestHi, RegState::Define) - .addReg(Ptr); - if (BinOpcodeLo) { - // operand order needs to go the other way for NAND - if (BinOpcodeLo == ARM64::BICXrr) { - std::swap(IncrLo, DestLo); - std::swap(IncrHi, DestHi); - } - - BuildMI(BB, DL, TII->get(BinOpcodeLo), ScratchLo).addReg(DestLo).addReg( - IncrLo); - BuildMI(BB, DL, TII->get(BinOpcodeHi), ScratchHi).addReg(DestHi).addReg( - IncrHi); - } - - BuildMI(BB, DL, TII->get(StrOpc), ScratchRes) - .addReg(ScratchLo) - .addReg(ScratchHi) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB); - - BB->addSuccessor(LoopMBB); - BB->addSuccessor(ExitMBB); - - // ExitMBB: - // ... - BB = ExitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARM64TargetLowering::EmitAtomicCmpSwap128(MachineInstr *MI, - MachineBasicBlock *BB) const { - unsigned DestLo = MI->getOperand(0).getReg(); - unsigned DestHi = MI->getOperand(1).getReg(); - unsigned Ptr = MI->getOperand(2).getReg(); - unsigned OldValLo = MI->getOperand(3).getReg(); - unsigned OldValHi = MI->getOperand(4).getReg(); - unsigned NewValLo = MI->getOperand(5).getReg(); - unsigned NewValHi = MI->getOperand(6).getReg(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(7).getImm()); - unsigned ScratchRes = BB->getParent()->getRegInfo().createVirtualRegister( - &ARM64::GPR32RegClass); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - - unsigned LdrOpc, StrOpc; - getExclusiveOperation(16, Ord, LdrOpc, StrOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *Loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *Loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, Loop1MBB); - MF->insert(It, Loop2MBB); - MF->insert(It, ExitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - ExitMBB->splice(ExitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // ThisMBB: - // ... - // fallthrough --> Loop1MBB - BB->addSuccessor(Loop1MBB); - - // Loop1MBB: - // ldxp DestLo, DestHi, [Ptr] - // cmp DestLo, OldValLo - // sbc xzr, DestHi, OldValHi - // bne ExitMBB - BB = Loop1MBB; - BuildMI(BB, DL, TII->get(LdrOpc), DestLo) - .addReg(DestHi, RegState::Define) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg( - OldValLo); - BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg( - OldValHi); - - BuildMI(BB, DL, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(ExitMBB); - BB->addSuccessor(Loop2MBB); - BB->addSuccessor(ExitMBB); - - // Loop2MBB: - // stxp ScratchRes, NewValLo, NewValHi, [Ptr] - // cbnz ScratchRes, Loop1MBB - BB = Loop2MBB; - BuildMI(BB, DL, TII->get(StrOpc), ScratchRes) - .addReg(NewValLo) - .addReg(NewValHi) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(Loop1MBB); - BB->addSuccessor(Loop1MBB); - BB->addSuccessor(ExitMBB); - - // ExitMBB: - // ... - BB = ExitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock *ARM64TargetLowering::EmitAtomicMinMax128( - MachineInstr *MI, MachineBasicBlock *BB, unsigned CondCode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned DestLo = MI->getOperand(0).getReg(); - unsigned DestHi = MI->getOperand(1).getReg(); - unsigned Ptr = MI->getOperand(2).getReg(); - unsigned IncrLo = MI->getOperand(3).getReg(); - unsigned IncrHi = MI->getOperand(4).getReg(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(5).getImm()); - DebugLoc DL = MI->getDebugLoc(); - - unsigned LdrOpc, StrOpc; - getExclusiveOperation(16, Ord, LdrOpc, StrOpc); - - MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, LoopMBB); - MF->insert(It, ExitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - ExitMBB->splice(ExitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(BB); - - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass); - unsigned ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - unsigned ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - - // ThisMBB: - // ... - // fallthrough --> LoopMBB - BB->addSuccessor(LoopMBB); - - // LoopMBB: - // ldxp DestLo, DestHi, Ptr - // cmp ScratchLo, DestLo, IncrLo - // sbc xzr, ScratchHi, DestHi, IncrHi - // csel ScratchLo, DestLo, IncrLo, <cmp-op> - // csel ScratchHi, DestHi, IncrHi, <cmp-op> - // stxp ScratchRes, ScratchLo, ScratchHi, ptr - // cbnz ScratchRes, LoopMBB - // fallthrough --> ExitMBB - BB = LoopMBB; - BuildMI(BB, DL, TII->get(LdrOpc), DestLo) - .addReg(DestHi, RegState::Define) - .addReg(Ptr); - - BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg( - IncrLo); - BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg( - IncrHi); - - BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchLo) - .addReg(DestLo) - .addReg(IncrLo) - .addImm(CondCode); - BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchHi) - .addReg(DestHi) - .addReg(IncrHi) - .addImm(CondCode); - - BuildMI(BB, DL, TII->get(StrOpc), ScratchRes) - .addReg(ScratchLo) - .addReg(ScratchHi) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB); - - BB->addSuccessor(LoopMBB); - BB->addSuccessor(ExitMBB); - - // ExitMBB: - // ... - BB = ExitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI, - MachineBasicBlock *MBB) const { - // We materialise the F128CSEL pseudo-instruction as some control flow and a - // phi node: - - // OrigBB: - // [... previous instrs leading to comparison ...] - // b.ne TrueBB - // b EndBB - // TrueBB: - // ; Fallthrough - // EndBB: - // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB] - - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - MachineFunction *MF = MBB->getParent(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - DebugLoc DL = MI->getDebugLoc(); - MachineFunction::iterator It = MBB; - ++It; - - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned IfTrueReg = MI->getOperand(1).getReg(); - unsigned IfFalseReg = MI->getOperand(2).getReg(); - unsigned CondCode = MI->getOperand(3).getImm(); - bool CPSRKilled = MI->getOperand(4).isKill(); - - MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, TrueBB); - MF->insert(It, EndBB); - - // Transfer rest of current basic-block to EndBB - EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), - MBB->end()); - EndBB->transferSuccessorsAndUpdatePHIs(MBB); - - BuildMI(MBB, DL, TII->get(ARM64::Bcc)).addImm(CondCode).addMBB(TrueBB); - BuildMI(MBB, DL, TII->get(ARM64::B)).addMBB(EndBB); - MBB->addSuccessor(TrueBB); - MBB->addSuccessor(EndBB); - - // TrueBB falls through to the end. - TrueBB->addSuccessor(EndBB); - - if (!CPSRKilled) { - TrueBB->addLiveIn(ARM64::CPSR); - EndBB->addLiveIn(ARM64::CPSR); - } - - BuildMI(*EndBB, EndBB->begin(), DL, TII->get(ARM64::PHI), DestReg) - .addReg(IfTrueReg) - .addMBB(TrueBB) - .addReg(IfFalseReg) - .addMBB(MBB); - - MI->eraseFromParent(); - return EndBB; -} - -MachineBasicBlock * -ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { - switch (MI->getOpcode()) { - default: -#ifndef NDEBUG - MI->dump(); -#endif - assert(0 && "Unexpected instruction for custom inserter!"); - break; - - case ARM64::ATOMIC_LOAD_ADD_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::ADDWrr); - case ARM64::ATOMIC_LOAD_ADD_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::ADDWrr); - case ARM64::ATOMIC_LOAD_ADD_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::ADDWrr); - case ARM64::ATOMIC_LOAD_ADD_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::ADDXrr); - case ARM64::ATOMIC_LOAD_ADD_I128: - return EmitAtomicBinary128(MI, BB, ARM64::ADDSXrr, ARM64::ADCXr); - - case ARM64::ATOMIC_LOAD_AND_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::ANDWrr); - case ARM64::ATOMIC_LOAD_AND_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::ANDWrr); - case ARM64::ATOMIC_LOAD_AND_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::ANDWrr); - case ARM64::ATOMIC_LOAD_AND_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::ANDXrr); - case ARM64::ATOMIC_LOAD_AND_I128: - return EmitAtomicBinary128(MI, BB, ARM64::ANDXrr, ARM64::ANDXrr); - - case ARM64::ATOMIC_LOAD_OR_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::ORRWrr); - case ARM64::ATOMIC_LOAD_OR_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::ORRWrr); - case ARM64::ATOMIC_LOAD_OR_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::ORRWrr); - case ARM64::ATOMIC_LOAD_OR_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::ORRXrr); - case ARM64::ATOMIC_LOAD_OR_I128: - return EmitAtomicBinary128(MI, BB, ARM64::ORRXrr, ARM64::ORRXrr); - - case ARM64::ATOMIC_LOAD_XOR_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::EORWrr); - case ARM64::ATOMIC_LOAD_XOR_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::EORWrr); - case ARM64::ATOMIC_LOAD_XOR_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::EORWrr); - case ARM64::ATOMIC_LOAD_XOR_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::EORXrr); - case ARM64::ATOMIC_LOAD_XOR_I128: - return EmitAtomicBinary128(MI, BB, ARM64::EORXrr, ARM64::EORXrr); - - case ARM64::ATOMIC_LOAD_NAND_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::BICWrr); - case ARM64::ATOMIC_LOAD_NAND_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::BICWrr); - case ARM64::ATOMIC_LOAD_NAND_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::BICWrr); - case ARM64::ATOMIC_LOAD_NAND_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::BICXrr); - case ARM64::ATOMIC_LOAD_NAND_I128: - return EmitAtomicBinary128(MI, BB, ARM64::BICXrr, ARM64::BICXrr); - - case ARM64::ATOMIC_LOAD_SUB_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::SUBWrr); - case ARM64::ATOMIC_LOAD_SUB_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::SUBWrr); - case ARM64::ATOMIC_LOAD_SUB_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::SUBWrr); - case ARM64::ATOMIC_LOAD_SUB_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::SUBXrr); - case ARM64::ATOMIC_LOAD_SUB_I128: - return EmitAtomicBinary128(MI, BB, ARM64::SUBSXrr, ARM64::SBCXr); - - case ARM64::ATOMIC_LOAD_MIN_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::LT); - - case ARM64::ATOMIC_LOAD_MAX_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::GT); - - case ARM64::ATOMIC_LOAD_UMIN_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::CC); - - case ARM64::ATOMIC_LOAD_UMAX_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::HI); - - case ARM64::ATOMIC_SWAP_I8: - return EmitAtomicBinary(MI, BB, 1, 0); - case ARM64::ATOMIC_SWAP_I16: - return EmitAtomicBinary(MI, BB, 2, 0); - case ARM64::ATOMIC_SWAP_I32: - return EmitAtomicBinary(MI, BB, 4, 0); - case ARM64::ATOMIC_SWAP_I64: - return EmitAtomicBinary(MI, BB, 8, 0); - case ARM64::ATOMIC_SWAP_I128: - return EmitAtomicBinary128(MI, BB, 0, 0); - - case ARM64::ATOMIC_CMP_SWAP_I8: - return EmitAtomicCmpSwap(MI, BB, 1); - case ARM64::ATOMIC_CMP_SWAP_I16: - return EmitAtomicCmpSwap(MI, BB, 2); - case ARM64::ATOMIC_CMP_SWAP_I32: - return EmitAtomicCmpSwap(MI, BB, 4); - case ARM64::ATOMIC_CMP_SWAP_I64: - return EmitAtomicCmpSwap(MI, BB, 8); - case ARM64::ATOMIC_CMP_SWAP_I128: - return EmitAtomicCmpSwap128(MI, BB); - - case ARM64::F128CSEL: - return EmitF128CSEL(MI, BB); - - case TargetOpcode::STACKMAP: - case TargetOpcode::PATCHPOINT: - return emitPatchPoint(MI, BB); - } - llvm_unreachable("Unexpected instruction for custom inserter!"); -} - -//===----------------------------------------------------------------------===// -// ARM64 Lowering private implementation. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Lowering Code -//===----------------------------------------------------------------------===// - -/// changeIntCCToARM64CC - Convert a DAG integer condition code to an ARM64 CC -static ARM64CC::CondCode changeIntCCToARM64CC(ISD::CondCode CC) { - switch (CC) { - default: - llvm_unreachable("Unknown condition code!"); - case ISD::SETNE: - return ARM64CC::NE; - case ISD::SETEQ: - return ARM64CC::EQ; - case ISD::SETGT: - return ARM64CC::GT; - case ISD::SETGE: - return ARM64CC::GE; - case ISD::SETLT: - return ARM64CC::LT; - case ISD::SETLE: - return ARM64CC::LE; - case ISD::SETUGT: - return ARM64CC::HI; - case ISD::SETUGE: - return ARM64CC::CS; - case ISD::SETULT: - return ARM64CC::CC; - case ISD::SETULE: - return ARM64CC::LS; - } -} - -/// changeFPCCToARM64CC - Convert a DAG fp condition code to an ARM64 CC. -static void changeFPCCToARM64CC(ISD::CondCode CC, ARM64CC::CondCode &CondCode, - ARM64CC::CondCode &CondCode2) { - CondCode2 = ARM64CC::AL; - switch (CC) { - default: - llvm_unreachable("Unknown FP condition!"); - case ISD::SETEQ: - case ISD::SETOEQ: - CondCode = ARM64CC::EQ; - break; - case ISD::SETGT: - case ISD::SETOGT: - CondCode = ARM64CC::GT; - break; - case ISD::SETGE: - case ISD::SETOGE: - CondCode = ARM64CC::GE; - break; - case ISD::SETOLT: - CondCode = ARM64CC::MI; - break; - case ISD::SETOLE: - CondCode = ARM64CC::LS; - break; - case ISD::SETONE: - CondCode = ARM64CC::MI; - CondCode2 = ARM64CC::GT; - break; - case ISD::SETO: - CondCode = ARM64CC::VC; - break; - case ISD::SETUO: - CondCode = ARM64CC::VS; - break; - case ISD::SETUEQ: - CondCode = ARM64CC::EQ; - CondCode2 = ARM64CC::VS; - break; - case ISD::SETUGT: - CondCode = ARM64CC::HI; - break; - case ISD::SETUGE: - CondCode = ARM64CC::PL; - break; - case ISD::SETLT: - case ISD::SETULT: - CondCode = ARM64CC::LT; - break; - case ISD::SETLE: - case ISD::SETULE: - CondCode = ARM64CC::LE; - break; - case ISD::SETNE: - case ISD::SETUNE: - CondCode = ARM64CC::NE; - break; - } -} - -static bool isLegalArithImmed(uint64_t C) { - // Matches ARM64DAGToDAGISel::SelectArithImmed(). - return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); -} - -static SDValue emitComparison(SDValue LHS, SDValue RHS, SDLoc dl, - SelectionDAG &DAG) { - EVT VT = LHS.getValueType(); - - if (VT.isFloatingPoint()) - return DAG.getNode(ARM64ISD::FCMP, dl, VT, LHS, RHS); - - // The CMP instruction is just an alias for SUBS, and representing it as - // SUBS means that it's possible to get CSE with subtract operations. - // A later phase can perform the optimization of setting the destination - // register to WZR/XZR if it ends up being unused. - return DAG.getNode(ARM64ISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS) - .getValue(1); -} - -static SDValue getARM64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARM64cc, SelectionDAG &DAG, SDLoc dl) { - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { - EVT VT = RHS.getValueType(); - uint64_t C = RHSC->getZExtValue(); - if (!isLegalArithImmed(C)) { - // Constant does not fit, try adjusting it by one? - switch (CC) { - default: - break; - case ISD::SETLT: - case ISD::SETGE: - if ((VT == MVT::i32 && C != 0x80000000 && - isLegalArithImmed((uint32_t)(C - 1))) || - (VT == MVT::i64 && C != 0x80000000ULL && - isLegalArithImmed(C - 1ULL))) { - CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; - C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; - RHS = DAG.getConstant(C, VT); - } - break; - case ISD::SETULT: - case ISD::SETUGE: - if ((VT == MVT::i32 && C != 0 && - isLegalArithImmed((uint32_t)(C - 1))) || - (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) { - CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; - C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; - RHS = DAG.getConstant(C, VT); - } - break; - case ISD::SETLE: - case ISD::SETGT: - if ((VT == MVT::i32 && C != 0x7fffffff && - isLegalArithImmed((uint32_t)(C + 1))) || - (VT == MVT::i64 && C != 0x7ffffffffffffffULL && - isLegalArithImmed(C + 1ULL))) { - CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; - C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; - RHS = DAG.getConstant(C, VT); - } - break; - case ISD::SETULE: - case ISD::SETUGT: - if ((VT == MVT::i32 && C != 0xffffffff && - isLegalArithImmed((uint32_t)(C + 1))) || - (VT == MVT::i64 && C != 0xfffffffffffffffULL && - isLegalArithImmed(C + 1ULL))) { - CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; - C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; - RHS = DAG.getConstant(C, VT); - } - break; - } - } - } - - SDValue Cmp = emitComparison(LHS, RHS, dl, DAG); - ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC); - ARM64cc = DAG.getConstant(ARM64CC, MVT::i32); - return Cmp; -} - -static std::pair<SDValue, SDValue> -getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { - assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && - "Unsupported value type"); - SDValue Value, Overflow; - SDLoc DL(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - unsigned Opc = 0; - switch (Op.getOpcode()) { - default: - llvm_unreachable("Unknown overflow instruction!"); - case ISD::SADDO: - Opc = ARM64ISD::ADDS; - CC = ARM64CC::VS; - break; - case ISD::UADDO: - Opc = ARM64ISD::ADDS; - CC = ARM64CC::CS; - break; - case ISD::SSUBO: - Opc = ARM64ISD::SUBS; - CC = ARM64CC::VS; - break; - case ISD::USUBO: - Opc = ARM64ISD::SUBS; - CC = ARM64CC::CC; - break; - // Multiply needs a little bit extra work. - case ISD::SMULO: - case ISD::UMULO: { - CC = ARM64CC::NE; - bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false; - if (Op.getValueType() == MVT::i32) { - unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - // For a 32 bit multiply with overflow check we want the instruction - // selector to generate a widening multiply (SMADDL/UMADDL). For that we - // need to generate the following pattern: - // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b)) - LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS); - RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS); - SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); - SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul, - DAG.getConstant(0, MVT::i64)); - // On ARM64 the upper 32 bits are always zero extended for a 32 bit - // operation. We need to clear out the upper 32 bits, because we used a - // widening multiply that wrote all 64 bits. In the end this should be a - // noop. - Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add); - if (IsSigned) { - // The signed overflow check requires more than just a simple check for - // any bit set in the upper 32 bits of the result. These bits could be - // just the sign bits of a negative number. To perform the overflow - // check we have to arithmetic shift right the 32nd bit of the result by - // 31 bits. Then we compare the result to the upper 32 bits. - SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add, - DAG.getConstant(32, MVT::i64)); - UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits); - SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value, - DAG.getConstant(31, MVT::i64)); - // It is important that LowerBits is last, otherwise the arithmetic - // shift will not be folded into the compare (SUBS). - SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32); - Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits) - .getValue(1); - } else { - // The overflow check for unsigned multiply is easy. We only need to - // check if any of the upper 32 bits are set. This can be done with a - // CMP (shifted register). For that we need to generate the following - // pattern: - // (i64 ARM64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32) - SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, - DAG.getConstant(32, MVT::i64)); - SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); - Overflow = - DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), - UpperBits).getValue(1); - } - break; - } - assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type"); - // For the 64 bit multiply - Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); - if (IsSigned) { - SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS); - SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value, - DAG.getConstant(63, MVT::i64)); - // It is important that LowerBits is last, otherwise the arithmetic - // shift will not be folded into the compare (SUBS). - SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); - Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits) - .getValue(1); - } else { - SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); - SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); - Overflow = - DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), - UpperBits).getValue(1); - } - break; - } - } // switch (...) - - if (Opc) { - SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32); - - // Emit the ARM64 operation with overflow check. - Value = DAG.getNode(Opc, DL, VTs, LHS, RHS); - Overflow = Value.getValue(1); - } - return std::make_pair(Value, Overflow); -} - -SDValue ARM64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const { - SmallVector<SDValue, 2> Ops; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) - Ops.push_back(Op.getOperand(i)); - - return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false, - SDLoc(Op)).first; -} - -static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { - SDValue Sel = Op.getOperand(0); - SDValue Other = Op.getOperand(1); - - // If neither operand is a SELECT_CC, give up. - if (Sel.getOpcode() != ISD::SELECT_CC) - std::swap(Sel, Other); - if (Sel.getOpcode() != ISD::SELECT_CC) - return Op; - - // The folding we want to perform is: - // (xor x, (select_cc a, b, cc, 0, -1) ) - // --> - // (csel x, (xor x, -1), cc ...) - // - // The latter will get matched to a CSINV instruction. - - ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get(); - SDValue LHS = Sel.getOperand(0); - SDValue RHS = Sel.getOperand(1); - SDValue TVal = Sel.getOperand(2); - SDValue FVal = Sel.getOperand(3); - SDLoc dl(Sel); - - // FIXME: This could be generalized to non-integer comparisons. - if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) - return Op; - - ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal); - ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal); - - // The the values aren't constants, this isn't the pattern we're looking for. - if (!CFVal || !CTVal) - return Op; - - // We can commute the SELECT_CC by inverting the condition. This - // might be needed to make this fit into a CSINV pattern. - if (CTVal->isAllOnesValue() && CFVal->isNullValue()) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } - - // If the constants line up, perform the transform! - if (CTVal->isNullValue() && CFVal->isAllOnesValue()) { - SDValue CCVal; - SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - - FVal = Other; - TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other, - DAG.getConstant(-1ULL, Other.getValueType())); - - return DAG.getNode(ARM64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal, - CCVal, Cmp); - } - - return Op; -} - -static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - - // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - SDVTList VTs = DAG.getVTList(VT, MVT::i32); - - unsigned Opc; - bool ExtraOp = false; - switch (Op.getOpcode()) { - default: - assert(0 && "Invalid code"); - case ISD::ADDC: - Opc = ARM64ISD::ADDS; - break; - case ISD::SUBC: - Opc = ARM64ISD::SUBS; - break; - case ISD::ADDE: - Opc = ARM64ISD::ADCS; - ExtraOp = true; - break; - case ISD::SUBE: - Opc = ARM64ISD::SBCS; - ExtraOp = true; - break; - } - - if (!ExtraOp) - return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); - return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2)); -} - -static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { - // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) - return SDValue(); - - ARM64CC::CondCode CC; - // The actual operation that sets the overflow or carry flag. - SDValue Value, Overflow; - std::tie(Value, Overflow) = getARM64XALUOOp(CC, Op, DAG); - - // We use 0 and 1 as false and true values. - SDValue TVal = DAG.getConstant(1, MVT::i32); - SDValue FVal = DAG.getConstant(0, MVT::i32); - - // We use an inverted condition, because the conditional select is inverted - // too. This will allow it to be selected to a single instruction: - // CSINC Wd, WZR, WZR, invert(cond). - SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32); - Overflow = DAG.getNode(ARM64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal, CCVal, - Overflow); - - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); -} - -// Prefetch operands are: -// 1: Address to prefetch -// 2: bool isWrite -// 3: int locality (0 = no locality ... 3 = extreme locality) -// 4: bool isDataCache -static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { - SDLoc DL(Op); - unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); - unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - // The data thing is not used. - // unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); - - bool IsStream = !Locality; - // When the locality number is set - if (Locality) { - // The front-end should have filtered out the out-of-range values - assert(Locality <= 3 && "Prefetch locality out-of-range"); - // The locality degree is the opposite of the cache speed. - // Put the number the other way around. - // The encoding starts at 0 for level 1 - Locality = 3 - Locality; - } - - // built the mask value encoding the expected behavior. - unsigned PrfOp = (IsWrite << 4) | // Load/Store bit - (Locality << 1) | // Cache level bits - (unsigned)IsStream; // Stream bit - return DAG.getNode(ARM64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), - DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1)); -} - -SDValue ARM64TargetLowering::LowerFP_EXTEND(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); - - RTLIB::Libcall LC; - LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128Call(Op, DAG, LC); -} - -SDValue ARM64TargetLowering::LowerFP_ROUND(SDValue Op, - SelectionDAG &DAG) const { - if (Op.getOperand(0).getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); - - // FP_ROUND node has a second operand indicating whether it is known to be - // precise. That doesn't take part in the LibCall so we can't directly use - // LowerF128Call. - SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)).first; -} - -static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { - // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp. - // Any additional optimization in this function should be recorded - // in the cost tables. - EVT InVT = Op.getOperand(0).getValueType(); - EVT VT = Op.getValueType(); - - // FP_TO_XINT conversion from the same type are legal. - if (VT.getSizeInBits() == InVT.getSizeInBits()) - return Op; - - if (InVT == MVT::v2f64) { - SDLoc dl(Op); - SDValue Cv = DAG.getNode(Op.getOpcode(), dl, MVT::v2i64, Op.getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); - } - - // Type changing conversions are illegal. - return SDValue(); -} - -SDValue ARM64TargetLowering::LowerFP_TO_INT(SDValue Op, - SelectionDAG &DAG) const { - if (Op.getOperand(0).getValueType().isVector()) - return LowerVectorFP_TO_INT(Op, DAG); - - if (Op.getOperand(0).getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - if (Op.getOpcode() == ISD::FP_TO_SINT) - LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); - else - LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); - - SmallVector<SDValue, 2> Ops; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) - Ops.push_back(Op.getOperand(i)); - - return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false, - SDLoc(Op)).first; -} - -static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { - // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp. - // Any additional optimization in this function should be recorded - // in the cost tables. - EVT VT = Op.getValueType(); - SDLoc dl(Op); - SDValue In = Op.getOperand(0); - EVT InVT = In.getValueType(); - - // v2i32 to v2f32 is legal. - if (VT == MVT::v2f32 && InVT == MVT::v2i32) - return Op; - - // This function only handles v2f64 outputs. - if (VT == MVT::v2f64) { - // Extend the input argument to a v2i64 that we can feed into the - // floating point conversion. Zero or sign extend based on whether - // we're doing a signed or unsigned float conversion. - unsigned Opc = - Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; - assert(Op.getNumOperands() == 1 && "FP conversions take one argument"); - SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0)); - return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted); - } - - // Scalarize v2i64 to v2f32 conversions. - std::vector<SDValue> BuildVectorOps; - for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { - SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In, - DAG.getConstant(i, MVT::i64)); - Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr); - BuildVectorOps.push_back(Sclr); - } - - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &BuildVectorOps[0], - BuildVectorOps.size()); -} - -SDValue ARM64TargetLowering::LowerINT_TO_FP(SDValue Op, - SelectionDAG &DAG) const { - if (Op.getValueType().isVector()) - return LowerVectorINT_TO_FP(Op, DAG); - - // i128 conversions are libcalls. - if (Op.getOperand(0).getValueType() == MVT::i128) - return SDValue(); - - // Other conversions are legal, unless it's to the completely software-based - // fp128. - if (Op.getValueType() != MVT::f128) - return Op; - - RTLIB::Libcall LC; - if (Op.getOpcode() == ISD::SINT_TO_FP) - LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - else - LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128Call(Op, DAG, LC); -} - -SDValue ARM64TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { - // For iOS, we want to call an alternative entry point: __sincos_stret, - // which returns the values in two S / D registers. - SDLoc dl(Op); - SDValue Arg = Op.getOperand(0); - EVT ArgVT = Arg.getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - - ArgListTy Args; - ArgListEntry Entry; - - Entry.Node = Arg; - Entry.Ty = ArgTy; - Entry.isSExt = false; - Entry.isZExt = false; - Args.push_back(Entry); - - const char *LibcallName = - (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; - SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - - StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); - TargetLowering::CallLoweringInfo CLI( - DAG.getEntryNode(), RetTy, false, false, false, false, 0, - CallingConv::Fast, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/ true, Callee, Args, DAG, dl); - std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); - return CallResult.first; -} - -SDValue ARM64TargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: - llvm_unreachable("unimplemented operand"); - return SDValue(); - case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: - return LowerGlobalTLSAddress(Op, DAG); - case ISD::SETCC: - return LowerSETCC(Op, DAG); - case ISD::BR_CC: - return LowerBR_CC(Op, DAG); - case ISD::SELECT: - return LowerSELECT(Op, DAG); - case ISD::SELECT_CC: - return LowerSELECT_CC(Op, DAG); - case ISD::JumpTable: - return LowerJumpTable(Op, DAG); - case ISD::ConstantPool: - return LowerConstantPool(Op, DAG); - case ISD::BlockAddress: - return LowerBlockAddress(Op, DAG); - case ISD::VASTART: - return LowerVASTART(Op, DAG); - case ISD::VACOPY: - return LowerVACOPY(Op, DAG); - case ISD::VAARG: - return LowerVAARG(Op, DAG); - case ISD::ADDC: - case ISD::ADDE: - case ISD::SUBC: - case ISD::SUBE: - return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); - case ISD::SADDO: - case ISD::UADDO: - case ISD::SSUBO: - case ISD::USUBO: - case ISD::SMULO: - case ISD::UMULO: - return LowerXALUO(Op, DAG); - case ISD::FADD: - return LowerF128Call(Op, DAG, RTLIB::ADD_F128); - case ISD::FSUB: - return LowerF128Call(Op, DAG, RTLIB::SUB_F128); - case ISD::FMUL: - return LowerF128Call(Op, DAG, RTLIB::MUL_F128); - case ISD::FDIV: - return LowerF128Call(Op, DAG, RTLIB::DIV_F128); - case ISD::FP_ROUND: - return LowerFP_ROUND(Op, DAG); - case ISD::FP_EXTEND: - return LowerFP_EXTEND(Op, DAG); - case ISD::FRAMEADDR: - return LowerFRAMEADDR(Op, DAG); - case ISD::RETURNADDR: - return LowerRETURNADDR(Op, DAG); - case ISD::INSERT_VECTOR_ELT: - return LowerINSERT_VECTOR_ELT(Op, DAG); - case ISD::EXTRACT_VECTOR_ELT: - return LowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::SCALAR_TO_VECTOR: - return LowerSCALAR_TO_VECTOR(Op, DAG); - case ISD::BUILD_VECTOR: - return LowerBUILD_VECTOR(Op, DAG); - case ISD::VECTOR_SHUFFLE: - return LowerVECTOR_SHUFFLE(Op, DAG); - case ISD::EXTRACT_SUBVECTOR: - return LowerEXTRACT_SUBVECTOR(Op, DAG); - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: - return LowerVectorSRA_SRL_SHL(Op, DAG); - case ISD::SHL_PARTS: - return LowerShiftLeftParts(Op, DAG); - case ISD::SRL_PARTS: - case ISD::SRA_PARTS: - return LowerShiftRightParts(Op, DAG); - case ISD::CTPOP: - return LowerCTPOP(Op, DAG); - case ISD::FCOPYSIGN: - return LowerFCOPYSIGN(Op, DAG); - case ISD::AND: - return LowerVectorAND(Op, DAG); - case ISD::OR: - return LowerVectorOR(Op, DAG); - case ISD::XOR: - return LowerXOR(Op, DAG); - case ISD::PREFETCH: - return LowerPREFETCH(Op, DAG); - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - return LowerINT_TO_FP(Op, DAG); - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - return LowerFP_TO_INT(Op, DAG); - case ISD::FSINCOS: - return LowerFSINCOS(Op, DAG); - } -} - -/// getFunctionAlignment - Return the Log2 alignment of this function. -unsigned ARM64TargetLowering::getFunctionAlignment(const Function *F) const { - return 2; -} - -//===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// - -#include "ARM64GenCallingConv.inc" - -/// Selects the correct CCAssignFn for a the given CallingConvention -/// value. -CCAssignFn *ARM64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, - bool IsVarArg) const { - switch (CC) { - default: - llvm_unreachable("Unsupported calling convention."); - case CallingConv::WebKit_JS: - return CC_ARM64_WebKit_JS; - case CallingConv::C: - case CallingConv::Fast: - if (!Subtarget->isTargetDarwin()) - return CC_ARM64_AAPCS; - return IsVarArg ? CC_ARM64_DarwinPCS_VarArg : CC_ARM64_DarwinPCS; - } -} - -SDValue ARM64TargetLowering::LowerFormalArguments( - SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Assign locations to all of the incoming arguments. - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - - // At this point, Ins[].VT may already be promoted to i32. To correctly - // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and - // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT. - // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here - // we use a special version of AnalyzeFormalArguments to pass in ValVT and - // LocVT. - unsigned NumArgs = Ins.size(); - Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); - unsigned CurArgIdx = 0; - for (unsigned i = 0; i != NumArgs; ++i) { - MVT ValVT = Ins[i].VT; - std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx); - CurArgIdx = Ins[i].OrigArgIndex; - - // Get type of the original argument. - EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true); - MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other; - // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. - MVT LocVT = ValVT; - if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) - LocVT = MVT::i8; - else if (ActualMVT == MVT::i16) - LocVT = MVT::i16; - - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); - bool Res = - AssignFn(i, ValVT, LocVT, CCValAssign::Full, Ins[i].Flags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; - } - - SmallVector<SDValue, 16> ArgValues; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - - // Arguments stored in registers. - if (VA.isRegLoc()) { - EVT RegVT = VA.getLocVT(); - - SDValue ArgValue; - const TargetRegisterClass *RC; - - if (RegVT == MVT::i32) - RC = &ARM64::GPR32RegClass; - else if (RegVT == MVT::i64) - RC = &ARM64::GPR64RegClass; - else if (RegVT == MVT::f32) - RC = &ARM64::FPR32RegClass; - else if (RegVT == MVT::f64 || RegVT == MVT::v1i64 || - RegVT == MVT::v1f64 || RegVT == MVT::v2i32 || - RegVT == MVT::v4i16 || RegVT == MVT::v8i8) - RC = &ARM64::FPR64RegClass; - else if (RegVT == MVT::v2i64 || RegVT == MVT::v4i32 || - RegVT == MVT::v8i16 || RegVT == MVT::v16i8) - RC = &ARM64::FPR128RegClass; - else - llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); - - // Transform the arguments in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); - - // If this is an 8, 16 or 32-bit value, it is really passed promoted - // to 64 bits. Insert an assert[sz]ext to capture this, then - // truncate to the right size. - switch (VA.getLocInfo()) { - default: - llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: - break; - case CCValAssign::BCvt: - ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); - break; - case CCValAssign::SExt: - ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); - break; - case CCValAssign::ZExt: - ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); - break; - } - - InVals.push_back(ArgValue); - - } else { // VA.isRegLoc() - assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); - unsigned ArgOffset = VA.getLocMemOffset(); - unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; - int FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true); - - // Create load nodes to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), false, - false, false, 0)); - } - } - - // varargs - if (isVarArg) { - if (!Subtarget->isTargetDarwin()) { - // The AAPCS variadic function ABI is identical to the non-variadic - // one. As a result there may be more arguments in registers and we should - // save them for future reference. - saveVarArgRegisters(CCInfo, DAG, DL, Chain); - } - - ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>(); - // This will point to the next argument passed via stack. - unsigned StackOffset = CCInfo.getNextStackOffset(); - // We currently pass all varargs at 8-byte alignment. - StackOffset = ((StackOffset + 7) & ~7); - AFI->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true)); - } - - return Chain; -} - -void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo, - SelectionDAG &DAG, SDLoc DL, - SDValue &Chain) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>(); - - SmallVector<SDValue, 8> MemOps; - - static const uint16_t GPRArgRegs[] = { ARM64::X0, ARM64::X1, ARM64::X2, - ARM64::X3, ARM64::X4, ARM64::X5, - ARM64::X6, ARM64::X7 }; - static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs); - unsigned FirstVariadicGPR = - CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs); - - static const uint16_t FPRArgRegs[] = { ARM64::Q0, ARM64::Q1, ARM64::Q2, - ARM64::Q3, ARM64::Q4, ARM64::Q5, - ARM64::Q6, ARM64::Q7 }; - static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs); - unsigned FirstVariadicFPR = - CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs); - - unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); - int GPRIdx = 0; - if (GPRSaveSize != 0) { - GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); - - SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &ARM64::GPR64RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); - SDValue Store = - DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 8), false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(8, getPointerTy())); - } - } - - unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); - int FPRIdx = 0; - if (FPRSaveSize != 0) { - FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); - - SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &ARM64::FPR128RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::v2i64); - SDValue Store = - DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 16), false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(16, getPointerTy())); - } - } - - FuncInfo->setVarArgsGPRIndex(GPRIdx); - FuncInfo->setVarArgsGPRSize(GPRSaveSize); - FuncInfo->setVarArgsFPRIndex(FPRIdx); - FuncInfo->setVarArgsFPRSize(FPRSaveSize); - - if (!MemOps.empty()) { - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], - MemOps.size()); - } -} - -/// LowerCallResult - Lower the result values of a call into the -/// appropriate copies out of appropriate physical registers. -SDValue ARM64TargetLowering::LowerCallResult( - SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, bool isThisReturn, - SDValue ThisVal) const { - CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; - // Assign locations to each value returned by this call. - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC); - - // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign VA = RVLocs[i]; - - // Pass 'this' value directly from the argument to return value, to avoid - // reg unit interference - if (i == 0 && isThisReturn) { - assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 && - "unexpected return calling convention register assignment"); - InVals.push_back(ThisVal); - continue; - } - - SDValue Val = - DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); - - switch (VA.getLocInfo()) { - default: - llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: - break; - case CCValAssign::BCvt: - Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); - break; - } - - InVals.push_back(Val); - } - - return Chain; -} - -bool ARM64TargetLowering::isEligibleForTailCallOptimization( - SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - bool isCalleeStructRet, bool isCallerStructRet, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const { - // Look for obvious safe cases to perform tail call optimization that do not - // require ABI changes. This is what gcc calls sibcall. - - // Do not sibcall optimize vararg calls unless the call site is not passing - // any arguments. - if (isVarArg && !Outs.empty()) - return false; - - // Also avoid sibcall optimization if either caller or callee uses struct - // return semantics. - if (isCalleeStructRet || isCallerStructRet) - return false; - - // Note that currently ARM64 "C" calling convention and "Fast" calling - // convention are compatible. If/when that ever changes, we'll need to - // add checks here to make sure any interactions are OK. - - // If the callee takes no arguments then go on to check the results of the - // call. - if (!Outs.empty()) { - // Check if stack adjustment is needed. For now, do not do this if any - // argument is passed on the stack. - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - CCAssignFn *AssignFn = CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false); - CCInfo.AnalyzeCallOperands(Outs, AssignFn); - if (CCInfo.getNextStackOffset()) { - // Check if the arguments are already laid out in the right way as - // the caller's fixed stack objects. - for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; - ++i, ++realArgIdx) { - CCValAssign &VA = ArgLocs[i]; - if (VA.getLocInfo() == CCValAssign::Indirect) - return false; - if (VA.needsCustom()) { - // Just don't handle anything that needs custom adjustments for now. - // If need be, we can revisit later, but we shouldn't ever end up - // here. - return false; - } else if (!VA.isRegLoc()) { - // Likewise, don't try to handle stack based arguments for the - // time being. - return false; - } - } - } - } - - return true; -} -/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain, -/// and add input and output parameter nodes. -SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const { - SelectionDAG &DAG = CLI.DAG; - SDLoc &DL = CLI.DL; - SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - SmallVector<SDValue, 32> &OutVals = CLI.OutVals; - SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &IsTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; - bool IsVarArg = CLI.IsVarArg; - - MachineFunction &MF = DAG.getMachineFunction(); - bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); - bool IsThisReturn = false; - - // If tail calls are explicitly disabled, make sure not to use them. - if (!EnableARM64TailCalls) - IsTailCall = false; - - if (IsTailCall) { - // Check if it's really possible to do a tail call. - IsTailCall = isEligibleForTailCallOptimization( - Callee, CallConv, IsVarArg, IsStructRet, - MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); - // We don't support GuaranteedTailCallOpt, only automatically - // detected sibcalls. - // FIXME: Re-evaluate. Is this true? Should it be true? - if (IsTailCall) - ++NumTailCalls; - } - - // Analyze operands of the call, assigning locations to each operand. - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - - if (IsVarArg) { - // Handle fixed and variable vector arguments differently. - // Variable vector arguments always go into memory. - unsigned NumArgs = Outs.size(); - - for (unsigned i = 0; i != NumArgs; ++i) { - MVT ArgVT = Outs[i].VT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, - /*IsVarArg=*/ !Outs[i].IsFixed); - bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; - } - } else { - // At this point, Outs[].VT may already be promoted to i32. To correctly - // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and - // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT. - // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here - // we use a special version of AnalyzeCallOperands to pass in ValVT and - // LocVT. - unsigned NumArgs = Outs.size(); - for (unsigned i = 0; i != NumArgs; ++i) { - MVT ValVT = Outs[i].VT; - // Get type of the original argument. - EVT ActualVT = getValueType(CLI.Args[Outs[i].OrigArgIndex].Ty, - /*AllowUnknown*/ true); - MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. - MVT LocVT = ValVT; - if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) - LocVT = MVT::i8; - else if (ActualMVT == MVT::i16) - LocVT = MVT::i16; - - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); - bool Res = AssignFn(i, ValVT, LocVT, CCValAssign::Full, ArgFlags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; - } - } - - // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); - - // Adjust the stack pointer for the new arguments... - // These operations are automatically eliminated by the prolog/epilog pass - if (!IsTailCall) - Chain = - DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL); - - SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, ARM64::SP, getPointerTy()); - - SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; - SmallVector<SDValue, 8> MemOpChains; - - // Walk the register/memloc assignments, inserting copies/loads. - for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; - ++i, ++realArgIdx) { - CCValAssign &VA = ArgLocs[i]; - SDValue Arg = OutVals[realArgIdx]; - ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; - - // Promote the value if needed. - switch (VA.getLocInfo()) { - default: - llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: - break; - case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); - break; - case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); - break; - case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); - break; - case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); - break; - case CCValAssign::FPExt: - Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); - break; - } - - if (VA.isRegLoc()) { - if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) { - assert(VA.getLocVT() == MVT::i64 && - "unexpected calling convention register assignment"); - assert(!Ins.empty() && Ins[0].VT == MVT::i64 && - "unexpected use of 'returned'"); - IsThisReturn = true; - } - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { - assert(VA.isMemLoc()); - // There's no reason we can't support stack args w/ tailcall, but - // we currently don't, so assert if we see one. - assert(!IsTailCall && "stack argument with tail call!?"); - unsigned LocMemOffset = VA.getLocMemOffset(); - SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); - PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); - - // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already - // promoted to a legal register type i32, we should truncate Arg back to - // i1/i8/i16. - if (Arg.getValueType().isSimple() && - Arg.getValueType().getSimpleVT() == MVT::i32 && - (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 || - VA.getLocVT() == MVT::i16)) - Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg); - - SDValue Store = DAG.getStore(Chain, DL, Arg, PtrOff, - MachinePointerInfo::getStack(LocMemOffset), - false, false, 0); - MemOpChains.push_back(Store); - } - } - - if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOpChains[0], - MemOpChains.size()); - - // Build a sequence of copy-to-reg nodes chained together with token chain - // and flag operands which copy the outgoing args into the appropriate regs. - SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. - if (getTargetMachine().getCodeModel() == CodeModel::Large && - Subtarget->isTargetMachO()) { - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = G->getGlobal(); - bool InternalLinkage = GV->hasInternalLinkage(); - if (InternalLinkage) - Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0); - else { - Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, - ARM64II::MO_GOT); - Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee); - } - } else if (ExternalSymbolSDNode *S = - dyn_cast<ExternalSymbolSDNode>(Callee)) { - const char *Sym = S->getSymbol(); - Callee = - DAG.getTargetExternalSymbol(Sym, getPointerTy(), ARM64II::MO_GOT); - Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee); - } - } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0); - } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - const char *Sym = S->getSymbol(); - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0); - } - - std::vector<SDValue> Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); - - // Add argument registers to the end of the list so that they are known live - // into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // Add a register mask operand representing the call-preserved registers. - const uint32_t *Mask; - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI); - if (IsThisReturn) { - // For 'this' returns, use the X0-preserving mask if applicable - Mask = ARI->getThisReturnPreservedMask(CallConv); - if (!Mask) { - IsThisReturn = false; - Mask = ARI->getCallPreservedMask(CallConv); - } - } else - Mask = ARI->getCallPreservedMask(CallConv); - - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - - if (InFlag.getNode()) - Ops.push_back(InFlag); - - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - - // If we're doing a tall call, use a TC_RETURN here rather than an - // actual call instruction. - if (IsTailCall) - return DAG.getNode(ARM64ISD::TC_RETURN, DL, NodeTys, &Ops[0], Ops.size()); - - // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(ARM64ISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag, DL); - if (!Ins.empty()) - InFlag = Chain.getValue(1); - - // Handle result values, copying them out of physregs into vregs that we - // return. - return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, - InVals, IsThisReturn, - IsThisReturn ? OutVals[0] : SDValue()); -} - -bool ARM64TargetLowering::CanLowerReturn( - CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { - CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); - return CCInfo.CheckReturn(Outs, RetCC); -} - -SDValue -ARM64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - SDLoc DL, SelectionDAG &DAG) const { - CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeReturn(Outs, RetCC); - - // Copy the result values into the output registers. - SDValue Flag; - SmallVector<SDValue, 4> RetOps(1, Chain); - for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); - ++i, ++realRVLocIdx) { - CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Arg = OutVals[realRVLocIdx]; - - switch (VA.getLocInfo()) { - default: - llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: - break; - case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); - break; - } - - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); - Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); - } - - RetOps[0] = Chain; // Update chain. - - // Add the flag if we have it. - if (Flag.getNode()) - RetOps.push_back(Flag); - - return DAG.getNode(ARM64ISD::RET_FLAG, DL, MVT::Other, &RetOps[0], - RetOps.size()); -} - -//===----------------------------------------------------------------------===// -// Other Lowering Code -//===----------------------------------------------------------------------===// - -SDValue ARM64TargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - unsigned char OpFlags = - Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); - - assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && - "unexpected offset in global node"); - - // This also catched the large code model case for Darwin. - if ((OpFlags & ARM64II::MO_GOT) != 0) { - SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); - // FIXME: Once remat is capable of dealing with instructions with register - // operands, expand this into two nodes instead of using a wrapper node. - return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr); - } - - if (getTargetMachine().getCodeModel() == CodeModel::Large) { - const unsigned char MO_NC = ARM64II::MO_NC; - return DAG.getNode( - ARM64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G3), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G2 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G1 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G0 | MO_NC)); - } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and - // the only correct model on Darwin. - SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - OpFlags | ARM64II::MO_PAGE); - unsigned char LoFlags = OpFlags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC; - SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags); - - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); - } -} - -/// \brief Convert a TLS address reference into the correct sequence of loads -/// and calls to compute the variable's address (for Darwin, currently) and -/// return an SDValue containing the final node. - -/// Darwin only has one TLS scheme which must be capable of dealing with the -/// fully general situation, in the worst case. This means: -/// + "extern __thread" declaration. -/// + Defined in a possibly unknown dynamic library. -/// -/// The general system is that each __thread variable has a [3 x i64] descriptor -/// which contains information used by the runtime to calculate the address. The -/// only part of this the compiler needs to know about is the first xword, which -/// contains a function pointer that must be called with the address of the -/// entire descriptor in "x0". -/// -/// Since this descriptor may be in a different unit, in general even the -/// descriptor must be accessed via an indirect load. The "ideal" code sequence -/// is: -/// adrp x0, _var@TLVPPAGE -/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor -/// ldr x1, [x0] ; x1 contains 1st entry of descriptor, -/// ; the function pointer -/// blr x1 ; Uses descriptor address in x0 -/// ; Address of _var is now in x0. -/// -/// If the address of _var's descriptor *is* known to the linker, then it can -/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for -/// a slight efficiency gain. -SDValue -ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); - - SDLoc DL(Op); - MVT PtrVT = getPointerTy(); - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - - SDValue TLVPAddr = - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS); - SDValue DescAddr = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TLVPAddr); - - // The first entry in the descriptor is a function pointer that we must call - // to obtain the address of the variable. - SDValue Chain = DAG.getEntryNode(); - SDValue FuncTLVGet = - DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(), - false, true, true, 8); - Chain = FuncTLVGet.getValue(1); - - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - MFI->setAdjustsStack(true); - - // TLS calls preserve all registers except those that absolutely must be - // trashed: X0 (it takes an argument), LR (it's a call) and CPSR (let's not be - // silly). - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI); - const uint32_t *Mask = ARI->getTLSCallPreservedMask(); - - // Finally, we can make the call. This is just a degenerate version of a - // normal ARM64 call node: x0 takes the address of the descriptor, and returns - // the address of the variable in this thread. - Chain = DAG.getCopyToReg(Chain, DL, ARM64::X0, DescAddr, SDValue()); - Chain = DAG.getNode(ARM64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), - Chain, FuncTLVGet, DAG.getRegister(ARM64::X0, MVT::i64), - DAG.getRegisterMask(Mask), Chain.getValue(1)); - return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Chain.getValue(1)); -} - -/// When accessing thread-local variables under either the general-dynamic or -/// local-dynamic system, we make a "TLS-descriptor" call. The variable will -/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry -/// is a function pointer to carry out the resolution. This function takes the -/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All -/// other registers (except LR, CPSR) are preserved. -/// -/// Thus, the ideal call sequence on AArch64 is: -/// -/// adrp x0, :tlsdesc:thread_var -/// ldr x8, [x0, :tlsdesc_lo12:thread_var] -/// add x0, x0, :tlsdesc_lo12:thread_var -/// .tlsdesccall thread_var -/// blr x8 -/// (TPIDR_EL0 offset now in x0). -/// -/// The ".tlsdesccall" directive instructs the assembler to insert a particular -/// relocation to help the linker relax this sequence if it turns out to be too -/// conservative. -/// -/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this -/// is harmless. -SDValue ARM64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr, - SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); - - // The function we need to call is simply the first entry in the GOT for this - // descriptor, load it in preparation. - SDValue Func = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, SymAddr); - - // TLS calls preserve all registers except those that absolutely must be - // trashed: X0 (it takes an argument), LR (it's a call) and CPSR (let's not be - // silly). - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI); - const uint32_t *Mask = ARI->getTLSCallPreservedMask(); - - // The function takes only one argument: the address of the descriptor itself - // in X0. - SDValue Glue, Chain; - Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM64::X0, DescAddr, Glue); - Glue = Chain.getValue(1); - - // We're now ready to populate the argument list, as with a normal call: - SmallVector<SDValue, 6> Ops; - Ops.push_back(Chain); - Ops.push_back(Func); - Ops.push_back(SymAddr); - Ops.push_back(DAG.getRegister(ARM64::X0, PtrVT)); - Ops.push_back(DAG.getRegisterMask(Mask)); - Ops.push_back(Glue); - - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(ARM64ISD::TLSDESC_CALL, DL, NodeTys, &Ops[0], Ops.size()); - Glue = Chain.getValue(1); - - return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Glue); -} - -SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - assert(Subtarget->isTargetELF() && "This function expects an ELF target"); - assert(getTargetMachine().getCodeModel() == CodeModel::Small && - "ELF TLS only supported in small memory model"); - const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); - - TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); - - SDValue TPOff; - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - const GlobalValue *GV = GA->getGlobal(); - - SDValue ThreadBase = DAG.getNode(ARM64ISD::THREAD_POINTER, DL, PtrVT); - - if (Model == TLSModel::LocalExec) { - SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G1); - SDValue LoVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC); - - TPOff = SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - TPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); - } else if (Model == TLSModel::InitialExec) { - TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS); - TPOff = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TPOff); - } else if (Model == TLSModel::LocalDynamic) { - // Local-dynamic accesses proceed in two phases. A general-dynamic TLS - // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate - // the beginning of the module's TLS region, followed by a DTPREL offset - // calculation. - - // These accesses will need deduplicating if there's more than one. - ARM64FunctionInfo *MFI = - DAG.getMachineFunction().getInfo<ARM64FunctionInfo>(); - MFI->incNumLocalDynamicTLSAccesses(); - - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, ARM64II::MO_TLS | ARM64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, - ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - - // The call needs a relocation too for linker relaxation. It doesn't make - // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of - // the address. - SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, - ARM64II::MO_TLS); - - // Now we can calculate the offset from TPIDR_EL0 to this module's - // thread-local area. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); - - // Now use :dtprel_whatever: operations to calculate this variable's offset - // in its thread-storage area. - SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G1); - SDValue LoVar = DAG.getTargetGlobalAddress( - GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC); - - SDValue DTPOff = - SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - DTPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, DTPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); - - TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff); - } else if (Model == TLSModel::GeneralDynamic) { - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, - ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - - // The call needs a relocation too for linker relaxation. It doesn't make - // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of - // the address. - SDValue SymAddr = - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS); - - // Finally we can make a call to calculate the offset from tpidr_el0. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); - } else - llvm_unreachable("Unsupported ELF TLS access model"); - - return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); -} - -SDValue ARM64TargetLowering::LowerGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - if (Subtarget->isTargetDarwin()) - return LowerDarwinGlobalTLSAddress(Op, DAG); - else if (Subtarget->isTargetELF()) - return LowerELFGlobalTLSAddress(Op, DAG); - - llvm_unreachable("Unexpected platform trying to use TLS"); -} -SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue Dest = Op.getOperand(4); - SDLoc dl(Op); - - // Handle f128 first, since lowering it will result in comparing the return - // value of a libcall against zero, which is just what the rest of LowerBR_CC - // is expecting to deal with. - if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, we need to compare the result - // against zero to select between true and false values. - if (RHS.getNode() == 0) { - RHS = DAG.getConstant(0, LHS.getValueType()); - CC = ISD::SETNE; - } - } - - // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch - // instruction. - unsigned Opc = LHS.getOpcode(); - if (LHS.getResNo() == 1 && isa<ConstantSDNode>(RHS) && - cast<ConstantSDNode>(RHS)->isOne() && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { - assert((CC == ISD::SETEQ || CC == ISD::SETNE) && - "Unexpected condition code."); - // Only lower legal XALUO ops. - if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) - return SDValue(); - - // The actual operation with overflow check. - ARM64CC::CondCode OFCC; - SDValue Value, Overflow; - std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, LHS.getValue(0), DAG); - - if (CC == ISD::SETNE) - OFCC = getInvertedCondCode(OFCC); - SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - - return DAG.getNode(ARM64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest, - CCVal, Overflow); - } - - if (LHS.getValueType().isInteger()) { - assert((LHS.getValueType() == RHS.getValueType()) && - (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); - - // If the RHS of the comparison is zero, we can potentially fold this - // to a specialized branch. - const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS); - if (RHSC && RHSC->getZExtValue() == 0) { - if (CC == ISD::SETEQ) { - // See if we can use a TBZ to fold in an AND as well. - // TBZ has a smaller branch displacement than CBZ. If the offset is - // out of bounds, a late MI-layer pass rewrites branches. - // 403.gcc is an example that hits this case. - if (LHS.getOpcode() == ISD::AND && - isa<ConstantSDNode>(LHS.getOperand(1)) && - isPowerOf2_64(LHS.getConstantOperandVal(1))) { - SDValue Test = LHS.getOperand(0); - uint64_t Mask = LHS.getConstantOperandVal(1); - - // TBZ only operates on i64's, but the ext should be free. - if (Test.getValueType() == MVT::i32) - Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64); - - return DAG.getNode(ARM64ISD::TBZ, dl, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), MVT::i64), Dest); - } - - return DAG.getNode(ARM64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest); - } else if (CC == ISD::SETNE) { - // See if we can use a TBZ to fold in an AND as well. - // TBZ has a smaller branch displacement than CBZ. If the offset is - // out of bounds, a late MI-layer pass rewrites branches. - // 403.gcc is an example that hits this case. - if (LHS.getOpcode() == ISD::AND && - isa<ConstantSDNode>(LHS.getOperand(1)) && - isPowerOf2_64(LHS.getConstantOperandVal(1))) { - SDValue Test = LHS.getOperand(0); - uint64_t Mask = LHS.getConstantOperandVal(1); - - // TBNZ only operates on i64's, but the ext should be free. - if (Test.getValueType() == MVT::i32) - Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64); - - return DAG.getNode(ARM64ISD::TBNZ, dl, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), MVT::i64), Dest); - } - - return DAG.getNode(ARM64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest); - } - } - - SDValue CCVal; - SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, - Cmp); - } - - assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally - // clean. Some of them require two branches to implement. - SDValue Cmp = emitComparison(LHS, RHS, dl, DAG); - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); - SDValue BR1 = - DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); - if (CC2 != ARM64CC::AL) { - SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); - return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, - Cmp); - } - - return BR1; -} - -SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - SDLoc DL(Op); - - SDValue In1 = Op.getOperand(0); - SDValue In2 = Op.getOperand(1); - EVT SrcVT = In2.getValueType(); - if (SrcVT != VT) { - if (SrcVT == MVT::f32 && VT == MVT::f64) - In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); - else if (SrcVT == MVT::f64 && VT == MVT::f32) - In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0)); - else - // FIXME: Src type is different, bail out for now. Can VT really be a - // vector type? - return SDValue(); - } - - EVT VecVT; - EVT EltVT; - SDValue EltMask, VecVal1, VecVal2; - if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) { - EltVT = MVT::i32; - VecVT = MVT::v4i32; - EltMask = DAG.getConstant(0x80000000ULL, EltVT); - - if (!VT.isVector()) { - VecVal1 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT, - DAG.getUNDEF(VecVT), In1); - VecVal2 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT, - DAG.getUNDEF(VecVT), In2); - } else { - VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); - VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); - } - } else if (VT == MVT::f64 || VT == MVT::v2f64) { - EltVT = MVT::i64; - VecVT = MVT::v2i64; - - // We want to materialize a mask with the the high bit set, but the AdvSIMD - // immediate moves cannot materialize that in a single instruction for - // 64-bit elements. Instead, materialize zero and then negate it. - EltMask = DAG.getConstant(0, EltVT); - - if (!VT.isVector()) { - VecVal1 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT, - DAG.getUNDEF(VecVT), In1); - VecVal2 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT, - DAG.getUNDEF(VecVT), In2); - } else { - VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); - VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); - } - } else { - llvm_unreachable("Invalid type for copysign!"); - } - - std::vector<SDValue> BuildVectorOps; - for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) - BuildVectorOps.push_back(EltMask); - - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, - &BuildVectorOps[0], BuildVectorOps.size()); - - // If we couldn't materialize the mask above, then the mask vector will be - // the zero vector, and we need to negate it here. - if (VT == MVT::f64 || VT == MVT::v2f64) { - BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec); - BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec); - BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec); - } - - SDValue Sel = - DAG.getNode(ARM64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec); - - if (VT == MVT::f32) - return DAG.getTargetExtractSubreg(ARM64::ssub, DL, VT, Sel); - else if (VT == MVT::f64) - return DAG.getTargetExtractSubreg(ARM64::dsub, DL, VT, Sel); - else - return DAG.getNode(ISD::BITCAST, DL, VT, Sel); -} - -SDValue ARM64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { - if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::NoImplicitFloat)) - return SDValue(); - - // While there is no integer popcount instruction, it can - // be more efficiently lowered to the following sequence that uses - // AdvSIMD registers/instructions as long as the copies to/from - // the AdvSIMD registers are cheap. - // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd - // CNT V0.8B, V0.8B // 8xbyte pop-counts - // ADDV B0, V0.8B // sum 8xbyte pop-counts - // UMOV X0, V0.B[0] // copy byte result back to integer reg - SDValue Val = Op.getOperand(0); - SDLoc DL(Op); - EVT VT = Op.getValueType(); - SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8); - - SDValue VecVal; - if (VT == MVT::i32) { - VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); - VecVal = - DAG.getTargetInsertSubreg(ARM64::ssub, DL, MVT::v8i8, ZeroVec, VecVal); - } else { - VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val); - } - - SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal); - SDValue UaddLV = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, - DAG.getConstant(Intrinsic::arm64_neon_uaddlv, MVT::i32), CtPop); - - if (VT == MVT::i64) - UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV); - return UaddLV; -} - -SDValue ARM64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - - if (Op.getValueType().isVector()) - return LowerVSETCC(Op, DAG); - - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - SDLoc dl(Op); - - // We chose ZeroOrOneBooleanContents, so use zero and one. - EVT VT = Op.getValueType(); - SDValue TVal = DAG.getConstant(1, VT); - SDValue FVal = DAG.getConstant(0, VT); - - // Handle f128 first, since one possible outcome is a normal integer - // comparison which gets picked up by the next if statement. - if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, use it. - if (RHS.getNode() == 0) { - assert(LHS.getValueType() == Op.getValueType() && - "Unexpected setcc expansion!"); - return LHS; - } - } - - if (LHS.getValueType().isInteger()) { - SDValue CCVal; - SDValue Cmp = - getARM64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl); - - // Note that we inverted the condition above, so we reverse the order of - // the true and false operands here. This will allow the setcc to be - // matched to a single CSINC instruction. - return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp); - } - - // Now we know we're dealing with FP values. - assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - - // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead - // and do the comparison. - SDValue Cmp = emitComparison(LHS, RHS, dl, DAG); - - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); - if (CC2 == ARM64CC::AL) { - changeFPCCToARM64CC(ISD::getSetCCInverse(CC, false), CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); - - // Note that we inverted the condition above, so we reverse the order of - // the true and false operands here. This will allow the setcc to be - // matched to a single CSINC instruction. - return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp); - } else { - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally - // clean. Some of them require two CSELs to implement. As is in this case, - // we emit the first CSEL and then emit a second using the output of the - // first as the RHS. We're effectively OR'ing the two CC's together. - - // FIXME: It would be nice if we could match the two CSELs to two CSINCs. - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); - SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); - - SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); - return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); - } -} - -/// A SELECT_CC operation is really some kind of max or min if both values being -/// compared are, in some sense, equal to the results in either case. However, -/// it is permissible to compare f32 values and produce directly extended f64 -/// values. -/// -/// Extending the comparison operands would also be allowed, but is less likely -/// to happen in practice since their use is right here. Note that truncate -/// operations would *not* be semantically equivalent. -static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { - if (Cmp == Result) - return true; - - ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp); - ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result); - if (CCmp && CResult && Cmp.getValueType() == MVT::f32 && - Result.getValueType() == MVT::f64) { - bool Lossy; - APFloat CmpVal = CCmp->getValueAPF(); - CmpVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Lossy); - return CResult->getValueAPF().bitwiseIsEqual(CmpVal); - } - - return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp; -} - -SDValue ARM64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - SDValue CC = Op->getOperand(0); - SDValue TVal = Op->getOperand(1); - SDValue FVal = Op->getOperand(2); - SDLoc DL(Op); - - unsigned Opc = CC.getOpcode(); - // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select - // instruction. - if (CC.getResNo() == 1 && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { - // Only lower legal XALUO ops. - if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0))) - return SDValue(); - - ARM64CC::CondCode OFCC; - SDValue Value, Overflow; - std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, CC.getValue(0), DAG); - SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - - return DAG.getNode(ARM64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, CCVal, - Overflow); - } - - if (CC.getOpcode() == ISD::SETCC) - return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal, - cast<CondCodeSDNode>(CC.getOperand(2))->get()); - else - return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal, - FVal, ISD::SETNE); -} - -SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, - SelectionDAG &DAG) const { - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue TVal = Op.getOperand(2); - SDValue FVal = Op.getOperand(3); - SDLoc dl(Op); - - // Handle f128 first, because it will result in a comparison of some RTLIB - // call result against zero. - if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, we need to compare the result - // against zero to select between true and false values. - if (RHS.getNode() == 0) { - RHS = DAG.getConstant(0, LHS.getValueType()); - CC = ISD::SETNE; - } - } - - // Handle integers first. - if (LHS.getValueType().isInteger()) { - assert((LHS.getValueType() == RHS.getValueType()) && - (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); - - unsigned Opcode = ARM64ISD::CSEL; - - // If both the TVal and the FVal are constants, see if we can swap them in - // order to for a CSINV or CSINC out of them. - ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal); - ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal); - - if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } else if (TVal.getOpcode() == ISD::XOR) { - // If TVal is a NOT we want to swap TVal and FVal so that we can match - // with a CSINV rather than a CSEL. - ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(1)); - - if (CVal && CVal->isAllOnesValue()) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } - } else if (TVal.getOpcode() == ISD::SUB) { - // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so - // that we can match with a CSNEG rather than a CSEL. - ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(0)); - - if (CVal && CVal->isNullValue()) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } - } else if (CTVal && CFVal) { - const int64_t TrueVal = CTVal->getSExtValue(); - const int64_t FalseVal = CFVal->getSExtValue(); - bool Swap = false; - - // If both TVal and FVal are constants, see if FVal is the - // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC - // instead of a CSEL in that case. - if (TrueVal == ~FalseVal) { - Opcode = ARM64ISD::CSINV; - } else if (TrueVal == -FalseVal) { - Opcode = ARM64ISD::CSNEG; - } else if (TVal.getValueType() == MVT::i32) { - // If our operands are only 32-bit wide, make sure we use 32-bit - // arithmetic for the check whether we can use CSINC. This ensures that - // the addition in the check will wrap around properly in case there is - // an overflow (which would not be the case if we do the check with - // 64-bit arithmetic). - const uint32_t TrueVal32 = CTVal->getZExtValue(); - const uint32_t FalseVal32 = CFVal->getZExtValue(); - - if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) { - Opcode = ARM64ISD::CSINC; - - if (TrueVal32 > FalseVal32) { - Swap = true; - } - } - // 64-bit check whether we can use CSINC. - } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) { - Opcode = ARM64ISD::CSINC; - - if (TrueVal > FalseVal) { - Swap = true; - } - } - - // Swap TVal and FVal if necessary. - if (Swap) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } - - if (Opcode != ARM64ISD::CSEL) { - // Drop FVal since we can get its value by simply inverting/negating - // TVal. - FVal = TVal; - } - } - - SDValue CCVal; - SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - - EVT VT = Op.getValueType(); - return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp); - } - - // Now we know we're dealing with FP values. - assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - assert(LHS.getValueType() == RHS.getValueType()); - EVT VT = Op.getValueType(); - - // Try to match this select into a max/min operation, which have dedicated - // opcode in the instruction set. - // NOTE: This is not correct in the presence of NaNs, so we only enable this - // in no-NaNs mode. - if (getTargetMachine().Options.NoNaNsFPMath) { - if (selectCCOpsAreFMaxCompatible(LHS, FVal) && - selectCCOpsAreFMaxCompatible(RHS, TVal)) { - CC = ISD::getSetCCSwappedOperands(CC); - std::swap(TVal, FVal); - } - - if (selectCCOpsAreFMaxCompatible(LHS, TVal) && - selectCCOpsAreFMaxCompatible(RHS, FVal)) { - switch (CC) { - default: - break; - case ISD::SETGT: - case ISD::SETGE: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETOGT: - case ISD::SETOGE: - return DAG.getNode(ARM64ISD::FMAX, dl, VT, TVal, FVal); - break; - case ISD::SETLT: - case ISD::SETLE: - case ISD::SETULT: - case ISD::SETULE: - case ISD::SETOLT: - case ISD::SETOLE: - return DAG.getNode(ARM64ISD::FMIN, dl, VT, TVal, FVal); - break; - } - } - } - - // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead - // and do the comparison. - SDValue Cmp = emitComparison(LHS, RHS, dl, DAG); - - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally - // clean. Some of them require two CSELs to implement. - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); - SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); - - // If we need a second CSEL, emit it, using the output of the first as the - // RHS. We're effectively OR'ing the two CC's together. - if (CC2 != ARM64CC::AL) { - SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); - return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); - } - - // Otherwise, return the output of the first CSEL. - return CS1; -} - -SDValue ARM64TargetLowering::LowerJumpTable(SDValue Op, - SelectionDAG &DAG) const { - // Jump table entries as PC relative offsets. No additional tweaking - // is necessary here. Just get the address of the jump table. - JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - - SDValue Hi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_PAGE); - SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); -} - -SDValue ARM64TargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) const { - ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - - if (getTargetMachine().getCodeModel() == CodeModel::Large) { - // Use the GOT for the large code model on iOS. - if (Subtarget->isTargetMachO()) { - SDValue GotAddr = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - ARM64II::MO_GOT); - return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr); - } - - const unsigned char MO_NC = ARM64II::MO_NC; - return DAG.getNode( - ARM64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G3), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G2 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G1 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G0 | MO_NC)); - } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on - // ELF, the only valid one on Darwin. - SDValue Hi = - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_PAGE); - SDValue Lo = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); - } -} - -SDValue ARM64TargetLowering::LowerBlockAddress(SDValue Op, - SelectionDAG &DAG) const { - const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - if (getTargetMachine().getCodeModel() == CodeModel::Large && - !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = ARM64II::MO_NC; - return DAG.getNode( - ARM64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G3), - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G2 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G1 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G0 | MO_NC)); - } else { - SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGE); - SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGEOFF | - ARM64II::MO_NC); - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); - } -} - -SDValue ARM64TargetLowering::LowerDarwin_VASTART(SDValue Op, - SelectionDAG &DAG) const { - ARM64FunctionInfo *FuncInfo = - DAG.getMachineFunction().getInfo<ARM64FunctionInfo>(); - - SDLoc DL(Op); - SDValue FR = - DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy()); - const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), - MachinePointerInfo(SV), false, false, 0); -} - -SDValue ARM64TargetLowering::LowerAAPCS_VASTART(SDValue Op, - SelectionDAG &DAG) const { - // The layout of the va_list struct is specified in the AArch64 Procedure Call - // Standard, section B.3. - MachineFunction &MF = DAG.getMachineFunction(); - ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>(); - SDLoc DL(Op); - - SDValue Chain = Op.getOperand(0); - SDValue VAList = Op.getOperand(1); - const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - SmallVector<SDValue, 4> MemOps; - - // void *__stack at offset 0 - SDValue Stack = - DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy()); - MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, - MachinePointerInfo(SV), false, false, 8)); - - // void *__gr_top at offset 8 - int GPRSize = FuncInfo->getVarArgsGPRSize(); - if (GPRSize > 0) { - SDValue GRTop, GRTopAddr; - - GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(8, getPointerTy())); - - GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy()); - GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, - DAG.getConstant(GPRSize, getPointerTy())); - - MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, - MachinePointerInfo(SV, 8), false, false, 8)); - } - - // void *__vr_top at offset 16 - int FPRSize = FuncInfo->getVarArgsFPRSize(); - if (FPRSize > 0) { - SDValue VRTop, VRTopAddr; - VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(16, getPointerTy())); - - VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy()); - VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, - DAG.getConstant(FPRSize, getPointerTy())); - - MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, - MachinePointerInfo(SV, 16), false, false, 8)); - } - - // int __gr_offs at offset 24 - SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(24, getPointerTy())); - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32), - GROffsAddr, MachinePointerInfo(SV, 24), false, - false, 4)); - - // int __vr_offs at offset 28 - SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(28, getPointerTy())); - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32), - VROffsAddr, MachinePointerInfo(SV, 28), false, - false, 4)); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], - MemOps.size()); -} - -SDValue ARM64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { - return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG) - : LowerAAPCS_VASTART(Op, DAG); -} - -SDValue ARM64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { - // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single - // pointer. - unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32; - const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); - const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - - return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1), - Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32), - 8, false, false, MachinePointerInfo(DestSV), - MachinePointerInfo(SrcSV)); -} - -SDValue ARM64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetDarwin() && - "automatic va_arg instruction only works on Darwin"); - - const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - EVT VT = Op.getValueType(); - SDLoc DL(Op); - SDValue Chain = Op.getOperand(0); - SDValue Addr = Op.getOperand(1); - unsigned Align = Op.getConstantOperandVal(3); - - SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr, - MachinePointerInfo(V), false, false, false, 0); - Chain = VAList.getValue(1); - - if (Align > 8) { - assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"); - VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(Align - 1, getPointerTy())); - VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList, - DAG.getConstant(-(int64_t)Align, getPointerTy())); - } - - Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); - uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy); - - // Scalar integer and FP values smaller than 64 bits are implicitly extended - // up to 64 bits. At the very least, we have to increase the striding of the - // vaargs list to match this, and for FP values we need to introduce - // FP_ROUND nodes as well. - if (VT.isInteger() && !VT.isVector()) - ArgSize = 8; - bool NeedFPTrunc = false; - if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) { - ArgSize = 8; - NeedFPTrunc = true; - } - - // Increment the pointer, VAList, to the next vaarg - SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(ArgSize, getPointerTy())); - // Store the incremented VAList to the legalized pointer - SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V), - false, false, 0); - - // Load the actual argument out of the pointer VAList - if (NeedFPTrunc) { - // Load the value as an f64. - SDValue WideFP = DAG.getLoad(MVT::f64, DL, APStore, VAList, - MachinePointerInfo(), false, false, false, 0); - // Round the value down to an f32. - SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0), - DAG.getIntPtrConstant(1)); - SDValue Ops[] = { NarrowFP, WideFP.getValue(1) }; - // Merge the rounded value with the chain output of the load. - return DAG.getMergeValues(Ops, 2, DL); - } - - return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo(), false, - false, false, 0); -} - -SDValue ARM64TargetLowering::LowerFRAMEADDR(SDValue Op, - SelectionDAG &DAG) const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - MFI->setFrameAddressIsTaken(true); - - EVT VT = Op.getValueType(); - SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, ARM64::FP, VT); - while (Depth--) - FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr, - MachinePointerInfo(), false, false, false, 0); - return FrameAddr; -} - -SDValue ARM64TargetLowering::LowerRETURNADDR(SDValue Op, - SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->setReturnAddressIsTaken(true); - - EVT VT = Op.getValueType(); - SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - if (Depth) { - SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); - SDValue Offset = DAG.getConstant(8, getPointerTy()); - return DAG.getLoad(VT, DL, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, false, 0); - } - - // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM64::LR, &ARM64::GPR64RegClass); - return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); -} - -/// LowerShiftRightParts - Lower SRA_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - SDValue ARMcc; - unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; - - assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); - - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - - SDValue Cmp = - emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), dl, DAG); - SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32); - - SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); - SDValue Lo = - DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); - - // ARM64 shifts larger than the register width are wrapped rather than - // clamped, so we can't just emit "hi >> x". - SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue TrueValHi = Opc == ISD::SRA - ? DAG.getNode(Opc, dl, VT, ShOpHi, - DAG.getConstant(VTBits - 1, MVT::i64)) - : DAG.getConstant(0, VT); - SDValue Hi = - DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); -} - -/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue ARM64TargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - SDValue ARMcc; - - assert(Op.getOpcode() == ISD::SHL_PARTS); - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); - - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - - SDValue Cmp = - emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), dl, DAG); - SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32); - SDValue Hi = DAG.getNode(ARM64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp); - - // ARM64 shifts of larger than register sizes are wrapped rather than clamped, - // so we can't just emit "lo << a" if a is too big. - SDValue TrueValLo = DAG.getConstant(0, VT); - SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - SDValue Lo = - DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); -} - -bool -ARM64TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { - // The ARM64 target doesn't support folding offsets into global addresses. - return false; -} - -bool ARM64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases. - // FIXME: We should be able to handle f128 as well with a clever lowering. - if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) - return true; - - if (VT == MVT::f64) - return ARM64_AM::getFP64Imm(Imm) != -1; - else if (VT == MVT::f32) - return ARM64_AM::getFP32Imm(Imm) != -1; - return false; -} - -//===----------------------------------------------------------------------===// -// ARM64 Optimization Hooks -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ARM64 Inline Assembly Support -//===----------------------------------------------------------------------===// - -// Table of Constraints -// TODO: This is the current set of constraints supported by ARM for the -// compiler, not all of them may make sense, e.g. S may be difficult to support. -// -// r - A general register -// w - An FP/SIMD register of some size in the range v0-v31 -// x - An FP/SIMD register of some size in the range v0-v15 -// I - Constant that can be used with an ADD instruction -// J - Constant that can be used with a SUB instruction -// K - Constant that can be used with a 32-bit logical instruction -// L - Constant that can be used with a 64-bit logical instruction -// M - Constant that can be used as a 32-bit MOV immediate -// N - Constant that can be used as a 64-bit MOV immediate -// Q - A memory reference with base register and no offset -// S - A symbolic address -// Y - Floating point constant zero -// Z - Integer constant zero -// -// Note that general register operands will be output using their 64-bit x -// register name, whatever the size of the variable, unless the asm operand -// is prefixed by the %w modifier. Floating-point and SIMD register operands -// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or -// %q modifier. - -/// getConstraintType - Given a constraint letter, return the type of -/// constraint it is for this target. -ARM64TargetLowering::ConstraintType -ARM64TargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - default: - break; - case 'z': - return C_Other; - case 'x': - case 'w': - return C_RegisterClass; - // An address with a single base register. Due to the way we - // currently handle addresses it is the same as 'r'. - case 'Q': - return C_Memory; - } - } - return TargetLowering::getConstraintType(Constraint); -} - -/// Examine constraint type and operand type and determine a weight value. -/// This object must already have been set up with the operand type -/// and the current alternative constraint selected. -TargetLowering::ConstraintWeight -ARM64TargetLowering::getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const { - ConstraintWeight weight = CW_Invalid; - Value *CallOperandVal = info.CallOperandVal; - // If we don't have a value, we can't do a match, - // but allow it at the lowest weight. - if (CallOperandVal == NULL) - return CW_Default; - Type *type = CallOperandVal->getType(); - // Look at the constraint type. - switch (*constraint) { - default: - weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break; - case 'x': - case 'w': - if (type->isFloatingPointTy() || type->isVectorTy()) - weight = CW_Register; - break; - case 'z': - weight = CW_Constant; - break; - } - return weight; -} - -std::pair<unsigned, const TargetRegisterClass *> -ARM64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - case 'r': - if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &ARM64::GPR64commonRegClass); - return std::make_pair(0U, &ARM64::GPR32commonRegClass); - case 'w': - if (VT == MVT::f32) - return std::make_pair(0U, &ARM64::FPR32RegClass); - if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &ARM64::FPR64RegClass); - if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &ARM64::FPR128RegClass); - break; - // The instructions that this constraint is designed for can - // only take 128-bit registers so just use that regclass. - case 'x': - if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &ARM64::FPR128_loRegClass); - break; - } - } - if (StringRef("{cc}").equals_lower(Constraint)) - return std::make_pair(unsigned(ARM64::CPSR), &ARM64::CCRRegClass); - - // Use the default implementation in TargetLowering to convert the register - // constraint into a member of a register class. - std::pair<unsigned, const TargetRegisterClass *> Res; - Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); - - // Not found as a standard register? - if (Res.second == 0) { - unsigned Size = Constraint.size(); - if ((Size == 4 || Size == 5) && Constraint[0] == '{' && - tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') { - const std::string Reg = - std::string(&Constraint[2], &Constraint[Size - 1]); - int RegNo = atoi(Reg.c_str()); - if (RegNo >= 0 && RegNo <= 31) { - // v0 - v31 are aliases of q0 - q31. - // By default we'll emit v0-v31 for this unless there's a modifier where - // we'll emit the correct register as well. - Res.first = ARM64::FPR128RegClass.getRegister(RegNo); - Res.second = &ARM64::FPR128RegClass; - } - } - } - - return Res; -} - -/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops -/// vector. If it is invalid, don't add anything to Ops. -void ARM64TargetLowering::LowerAsmOperandForConstraint( - SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, - SelectionDAG &DAG) const { - SDValue Result(0, 0); - - // Currently only support length 1 constraints. - if (Constraint.length() != 1) - return; - - char ConstraintLetter = Constraint[0]; - switch (ConstraintLetter) { - default: - break; - - // This set of constraints deal with valid constants for various instructions. - // Validate and return a target constant for them if we can. - case 'z': { - // 'z' maps to xzr or wzr so it needs an input of 0. - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); - if (!C || C->getZExtValue() != 0) - return; - - if (Op.getValueType() == MVT::i64) - Result = DAG.getRegister(ARM64::XZR, MVT::i64); - else - Result = DAG.getRegister(ARM64::WZR, MVT::i32); - break; - } - - case 'I': - case 'J': - case 'K': - case 'L': - case 'M': - case 'N': - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); - if (!C) - return; - - // Grab the value and do some validation. - uint64_t CVal = C->getZExtValue(); - switch (ConstraintLetter) { - // The I constraint applies only to simple ADD or SUB immediate operands: - // i.e. 0 to 4095 with optional shift by 12 - // The J constraint applies only to ADD or SUB immediates that would be - // valid when negated, i.e. if [an add pattern] were to be output as a SUB - // instruction [or vice versa], in other words -1 to -4095 with optional - // left shift by 12. - case 'I': - if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal)) - break; - return; - case 'J': { - uint64_t NVal = -C->getSExtValue(); - if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) - break; - return; - } - // The K and L constraints apply *only* to logical immediates, including - // what used to be the MOVI alias for ORR (though the MOVI alias has now - // been removed and MOV should be used). So these constraints have to - // distinguish between bit patterns that are valid 32-bit or 64-bit - // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but - // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice - // versa. - case 'K': - if (ARM64_AM::isLogicalImmediate(CVal, 32)) - break; - return; - case 'L': - if (ARM64_AM::isLogicalImmediate(CVal, 64)) - break; - return; - // The M and N constraints are a superset of K and L respectively, for use - // with the MOV (immediate) alias. As well as the logical immediates they - // also match 32 or 64-bit immediates that can be loaded either using a - // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca - // (M) or 64-bit 0x1234000000000000 (N) etc. - // As a note some of this code is liberally stolen from the asm parser. - case 'M': { - if (!isUInt<32>(CVal)) - return; - if (ARM64_AM::isLogicalImmediate(CVal, 32)) - break; - if ((CVal & 0xFFFF) == CVal) - break; - if ((CVal & 0xFFFF0000ULL) == CVal) - break; - uint64_t NCVal = ~(uint32_t)CVal; - if ((NCVal & 0xFFFFULL) == NCVal) - break; - if ((NCVal & 0xFFFF0000ULL) == NCVal) - break; - return; - } - case 'N': { - if (ARM64_AM::isLogicalImmediate(CVal, 64)) - break; - if ((CVal & 0xFFFFULL) == CVal) - break; - if ((CVal & 0xFFFF0000ULL) == CVal) - break; - if ((CVal & 0xFFFF00000000ULL) == CVal) - break; - if ((CVal & 0xFFFF000000000000ULL) == CVal) - break; - uint64_t NCVal = ~CVal; - if ((NCVal & 0xFFFFULL) == NCVal) - break; - if ((NCVal & 0xFFFF0000ULL) == NCVal) - break; - if ((NCVal & 0xFFFF00000000ULL) == NCVal) - break; - if ((NCVal & 0xFFFF000000000000ULL) == NCVal) - break; - return; - } - default: - return; - } - - // All assembler immediates are 64-bit integers. - Result = DAG.getTargetConstant(CVal, MVT::i64); - break; - } - - if (Result.getNode()) { - Ops.push_back(Result); - return; - } - - return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); -} - -//===----------------------------------------------------------------------===// -// ARM64 Advanced SIMD Support -//===----------------------------------------------------------------------===// - -/// WidenVector - Given a value in the V64 register class, produce the -/// equivalent value in the V128 register class. -static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) { - EVT VT = V64Reg.getValueType(); - unsigned NarrowSize = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType().getSimpleVT(); - MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); - SDLoc DL(V64Reg); - - return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy), - V64Reg, DAG.getConstant(0, MVT::i32)); -} - -/// getExtFactor - Determine the adjustment factor for the position when -/// generating an "extract from vector registers" instruction. -static unsigned getExtFactor(SDValue &V) { - EVT EltType = V.getValueType().getVectorElementType(); - return EltType.getSizeInBits() / 8; -} - -/// NarrowVector - Given a value in the V128 register class, produce the -/// equivalent value in the V64 register class. -static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { - EVT VT = V128Reg.getValueType(); - unsigned WideSize = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType().getSimpleVT(); - MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); - SDLoc DL(V128Reg); - - return DAG.getTargetExtractSubreg(ARM64::dsub, DL, NarrowTy, V128Reg); -} - -// Gather data to see if the operation can be modelled as a -// shuffle in combination with VEXTs. -SDValue ARM64TargetLowering::ReconstructShuffle(SDValue Op, - SelectionDAG &DAG) const { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - unsigned NumElts = VT.getVectorNumElements(); - - SmallVector<SDValue, 2> SourceVecs; - SmallVector<unsigned, 2> MinElts; - SmallVector<unsigned, 2> MaxElts; - - for (unsigned i = 0; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { - // A shuffle can only come from building a vector from various - // elements of other vectors. - return SDValue(); - } - - // Record this extraction against the appropriate vector if possible... - SDValue SourceVec = V.getOperand(0); - unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); - bool FoundSource = false; - for (unsigned j = 0; j < SourceVecs.size(); ++j) { - if (SourceVecs[j] == SourceVec) { - if (MinElts[j] > EltNo) - MinElts[j] = EltNo; - if (MaxElts[j] < EltNo) - MaxElts[j] = EltNo; - FoundSource = true; - break; - } - } - - // Or record a new source if not... - if (!FoundSource) { - SourceVecs.push_back(SourceVec); - MinElts.push_back(EltNo); - MaxElts.push_back(EltNo); - } - } - - // Currently only do something sane when at most two source vectors - // involved. - if (SourceVecs.size() > 2) - return SDValue(); - - SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; - int VEXTOffsets[2] = { 0, 0 }; - - // This loop extracts the usage patterns of the source vectors - // and prepares appropriate SDValues for a shuffle if possible. - for (unsigned i = 0; i < SourceVecs.size(); ++i) { - if (SourceVecs[i].getValueType() == VT) { - // No VEXT necessary - ShuffleSrcs[i] = SourceVecs[i]; - VEXTOffsets[i] = 0; - continue; - } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { - // It probably isn't worth padding out a smaller vector just to - // break it down again in a shuffle. - return SDValue(); - } - - // Don't attempt to extract subvectors from BUILD_VECTOR sources - // that expand or trunc the original value. - // TODO: We can try to bitcast and ANY_EXTEND the result but - // we need to consider the cost of vector ANY_EXTEND, and the - // legality of all the types. - if (SourceVecs[i].getValueType().getVectorElementType() != - VT.getVectorElementType()) - return SDValue(); - - // Since only 64-bit and 128-bit vectors are legal on ARM and - // we've eliminated the other cases... - assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts && - "unexpected vector sizes in ReconstructShuffle"); - - if (MaxElts[i] - MinElts[i] >= NumElts) { - // Span too large for a VEXT to cope - return SDValue(); - } - - if (MinElts[i] >= NumElts) { - // The extraction can just take the second half - VEXTOffsets[i] = NumElts; - ShuffleSrcs[i] = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); - } else if (MaxElts[i] < NumElts) { - // The extraction can just take the first half - VEXTOffsets[i] = 0; - ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); - } else { - // An actual VEXT is needed - VEXTOffsets[i] = MinElts[i]; - SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); - SDValue VEXTSrc2 = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); - unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1); - ShuffleSrcs[i] = DAG.getNode(ARM64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2, - DAG.getConstant(Imm, MVT::i32)); - } - } - - SmallVector<int, 8> Mask; - - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Entry = Op.getOperand(i); - if (Entry.getOpcode() == ISD::UNDEF) { - Mask.push_back(-1); - continue; - } - - SDValue ExtractVec = Entry.getOperand(0); - int ExtractElt = - cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue(); - if (ExtractVec == SourceVecs[0]) { - Mask.push_back(ExtractElt - VEXTOffsets[0]); - } else { - Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); - } - } - - // Final check before we try to produce nonsense... - if (isShuffleMaskLegal(Mask, VT)) - return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], - &Mask[0]); - - return SDValue(); -} - -// check if an EXT instruction can handle the shuffle mask when the -// vector sources of the shuffle are the same. -static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { - unsigned NumElts = VT.getVectorNumElements(); - - // Assume that the first shuffle index is not UNDEF. Fail if it is. - if (M[0] < 0) - return false; - - Imm = M[0]; - - // If this is a VEXT shuffle, the immediate value is the index of the first - // element. The other shuffle indices must be the successive elements after - // the first one. - unsigned ExpectedElt = Imm; - for (unsigned i = 1; i < NumElts; ++i) { - // Increment the expected index. If it wraps around, just follow it - // back to index zero and keep going. - ++ExpectedElt; - if (ExpectedElt == NumElts) - ExpectedElt = 0; - - if (M[i] < 0) - continue; // ignore UNDEF indices - if (ExpectedElt != static_cast<unsigned>(M[i])) - return false; - } - - return true; -} - -// check if an EXT instruction can handle the shuffle mask when the -// vector sources of the shuffle are different. -static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT, - unsigned &Imm) { - unsigned NumElts = VT.getVectorNumElements(); - ReverseEXT = false; - - // Assume that the first shuffle index is not UNDEF. Fail if it is. - if (M[0] < 0) - return false; - - Imm = M[0]; - - // If this is a VEXT shuffle, the immediate value is the index of the first - // element. The other shuffle indices must be the successive elements after - // the first one. - unsigned ExpectedElt = Imm; - for (unsigned i = 1; i < NumElts; ++i) { - // Increment the expected index. If it wraps around, it may still be - // a VEXT but the source vectors must be swapped. - ExpectedElt += 1; - if (ExpectedElt == NumElts * 2) { - ExpectedElt = 0; - ReverseEXT = true; - } - - if (M[i] < 0) - continue; // ignore UNDEF indices - if (ExpectedElt != static_cast<unsigned>(M[i])) - return false; - } - - // Adjust the index value if the source operands will be swapped. - if (ReverseEXT) - Imm -= NumElts; - - return true; -} - -/// isREVMask - Check if a vector shuffle corresponds to a REV -/// instruction with the specified blocksize. (The order of the elements -/// within each block of the vector is reversed.) -static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { - assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && - "Only possible block sizes for REV are: 16, 32, 64"); - - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); - if (EltSz == 64) - return false; - - unsigned NumElts = VT.getVectorNumElements(); - unsigned BlockElts = M[0] + 1; - // If the first shuffle index is UNDEF, be optimistic. - if (M[0] < 0) - BlockElts = BlockSize / EltSz; - - if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) - return false; - - for (unsigned i = 0; i < NumElts; ++i) { - if (M[i] < 0) - continue; // ignore UNDEF indices - if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) - return false; - } - - return true; -} - -static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - unsigned Idx = WhichResult * NumElts / 2; - for (unsigned i = 0; i != NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned)M[i] != Idx) || - (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts)) - return false; - Idx += 1; - } - - return true; -} - -static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i != NumElts; ++i) { - if (M[i] < 0) - continue; // ignore UNDEF indices - if ((unsigned)M[i] != 2 * i + WhichResult) - return false; - } - - return true; -} - -static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i < NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || - (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult)) - return false; - } - return true; -} - -/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of -/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". -/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. -static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - unsigned Idx = WhichResult * NumElts / 2; - for (unsigned i = 0; i != NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned)M[i] != Idx) || - (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx)) - return false; - Idx += 1; - } - - return true; -} - -/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of -/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". -/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, -static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { - unsigned Half = VT.getVectorNumElements() / 2; - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned j = 0; j != 2; ++j) { - unsigned Idx = WhichResult; - for (unsigned i = 0; i != Half; ++i) { - int MIdx = M[i + j * Half]; - if (MIdx >= 0 && (unsigned)MIdx != Idx) - return false; - Idx += 2; - } - } - - return true; -} - -/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of -/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". -/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. -static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i < NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || - (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult)) - return false; - } - return true; -} - -/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit -/// the specified operations to build the shuffle. -static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, - SDValue RHS, SelectionDAG &DAG, - SDLoc dl) { - unsigned OpNum = (PFEntry >> 26) & 0x0F; - unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1); - unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1); - - enum { - OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> - OP_VREV, - OP_VDUP0, - OP_VDUP1, - OP_VDUP2, - OP_VDUP3, - OP_VEXT1, - OP_VEXT2, - OP_VEXT3, - OP_VUZPL, // VUZP, left result - OP_VUZPR, // VUZP, right result - OP_VZIPL, // VZIP, left result - OP_VZIPR, // VZIP, right result - OP_VTRNL, // VTRN, left result - OP_VTRNR // VTRN, right result - }; - - if (OpNum == OP_COPY) { - if (LHSID == (1 * 9 + 2) * 9 + 3) - return LHS; - assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!"); - return RHS; - } - - SDValue OpLHS, OpRHS; - OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); - OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); - EVT VT = OpLHS.getValueType(); - - switch (OpNum) { - default: - llvm_unreachable("Unknown shuffle opcode!"); - case OP_VREV: - // VREV divides the vector in half and swaps within the half. - if (VT.getVectorElementType() == MVT::i32 || - VT.getVectorElementType() == MVT::f32) - return DAG.getNode(ARM64ISD::REV64, dl, VT, OpLHS); - // vrev <4 x i16> -> REV32 - if (VT.getVectorElementType() == MVT::i16) - return DAG.getNode(ARM64ISD::REV32, dl, VT, OpLHS); - // vrev <4 x i8> -> REV16 - assert(VT.getVectorElementType() == MVT::i8); - return DAG.getNode(ARM64ISD::REV16, dl, VT, OpLHS); - case OP_VDUP0: - case OP_VDUP1: - case OP_VDUP2: - case OP_VDUP3: { - EVT EltTy = VT.getVectorElementType(); - unsigned Opcode; - if (EltTy == MVT::i8) - Opcode = ARM64ISD::DUPLANE8; - else if (EltTy == MVT::i16) - Opcode = ARM64ISD::DUPLANE16; - else if (EltTy == MVT::i32 || EltTy == MVT::f32) - Opcode = ARM64ISD::DUPLANE32; - else if (EltTy == MVT::i64 || EltTy == MVT::f64) - Opcode = ARM64ISD::DUPLANE64; - else - llvm_unreachable("Invalid vector element type?"); - - if (VT.getSizeInBits() == 64) - OpLHS = WidenVector(OpLHS, DAG); - SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64); - return DAG.getNode(Opcode, dl, VT, OpLHS, Lane); - } - case OP_VEXT1: - case OP_VEXT2: - case OP_VEXT3: { - unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS); - return DAG.getNode(ARM64ISD::EXT, dl, VT, OpLHS, OpRHS, - DAG.getConstant(Imm, MVT::i32)); - } - case OP_VUZPL: - return DAG.getNode(ARM64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - case OP_VUZPR: - return DAG.getNode(ARM64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - case OP_VZIPL: - return DAG.getNode(ARM64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - case OP_VZIPR: - return DAG.getNode(ARM64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - case OP_VTRNL: - return DAG.getNode(ARM64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - case OP_VTRNR: - return DAG.getNode(ARM64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - } -} - -static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask, - SelectionDAG &DAG) { - // Check to see if we can use the TBL instruction. - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - SDLoc DL(Op); - - EVT EltVT = Op.getValueType().getVectorElementType(); - unsigned BytesPerElt = EltVT.getSizeInBits() / 8; - - SmallVector<SDValue, 8> TBLMask; - for (int Val : ShuffleMask) { - for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { - unsigned Offset = Byte + Val * BytesPerElt; - TBLMask.push_back(DAG.getConstant(Offset, MVT::i32)); - } - } - - MVT IndexVT = MVT::v8i8; - unsigned IndexLen = 8; - if (Op.getValueType().getSizeInBits() == 128) { - IndexVT = MVT::v16i8; - IndexLen = 16; - } - - SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1); - SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2); - - SDValue Shuffle; - if (V2.getNode()->getOpcode() == ISD::UNDEF) { - if (IndexLen == 8) - V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst); - Shuffle = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst, - DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen)); - } else { - if (IndexLen == 8) { - V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst); - Shuffle = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst, - DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen)); - } else { - // FIXME: We cannot, for the moment, emit a TBL2 instruction because we - // cannot currently represent the register constraints on the input - // table registers. - // Shuffle = DAG.getNode(ARM64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst, - // DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, - // &TBLMask[0], IndexLen)); - Shuffle = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::arm64_neon_tbl2, MVT::i32), V1Cst, V2Cst, - DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen)); - } - } - return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle); -} - -static unsigned getDUPLANEOp(EVT EltType) { - if (EltType == MVT::i8) - return ARM64ISD::DUPLANE8; - if (EltType == MVT::i16) - return ARM64ISD::DUPLANE16; - if (EltType == MVT::i32 || EltType == MVT::f32) - return ARM64ISD::DUPLANE32; - if (EltType == MVT::i64 || EltType == MVT::f64) - return ARM64ISD::DUPLANE64; - - llvm_unreachable("Invalid vector element type?"); -} - -SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) const { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - - ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); - - // Convert shuffles that are directly supported on NEON to target-specific - // DAG nodes, instead of keeping them as shuffles and matching them again - // during code selection. This is more efficient and avoids the possibility - // of inconsistencies between legalization and selection. - ArrayRef<int> ShuffleMask = SVN->getMask(); - - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], - V1.getValueType().getSimpleVT())) { - int Lane = SVN->getSplatIndex(); - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane == -1) - Lane = 0; - - if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(ARM64ISD::DUP, dl, V1.getValueType(), - V1.getOperand(0)); - // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non- - // constant. If so, we can just reference the lane's definition directly. - if (V1.getOpcode() == ISD::BUILD_VECTOR && - !isa<ConstantSDNode>(V1.getOperand(Lane))) - return DAG.getNode(ARM64ISD::DUP, dl, VT, V1.getOperand(Lane)); - - // Otherwise, duplicate from the lane of the input vector. - unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType()); - - // SelectionDAGBuilder may have "helpfully" already extracted or conatenated - // to make a vector of the same size as this SHUFFLE. We can ignore the - // extract entirely, and canonicalise the concat using WidenVector. - if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { - Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue(); - V1 = V1.getOperand(0); - } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) { - unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2; - Lane -= Idx * VT.getVectorNumElements() / 2; - V1 = WidenVector(V1.getOperand(Idx), DAG); - } else if (VT.getSizeInBits() == 64) - V1 = WidenVector(V1, DAG); - - return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64)); - } - - if (isREVMask(ShuffleMask, VT, 64)) - return DAG.getNode(ARM64ISD::REV64, dl, V1.getValueType(), V1, V2); - if (isREVMask(ShuffleMask, VT, 32)) - return DAG.getNode(ARM64ISD::REV32, dl, V1.getValueType(), V1, V2); - if (isREVMask(ShuffleMask, VT, 16)) - return DAG.getNode(ARM64ISD::REV16, dl, V1.getValueType(), V1, V2); - - bool ReverseEXT = false; - unsigned Imm; - if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) { - if (ReverseEXT) - std::swap(V1, V2); - Imm *= getExtFactor(V1); - return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V2, - DAG.getConstant(Imm, MVT::i32)); - } else if (V2->getOpcode() == ISD::UNDEF && - isSingletonEXTMask(ShuffleMask, VT, Imm)) { - Imm *= getExtFactor(V1); - return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V1, - DAG.getConstant(Imm, MVT::i32)); - } - - unsigned WhichResult; - if (isZIPMask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); - } - if (isUZPMask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); - } - if (isTRNMask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); - } - - if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); - } - if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); - } - if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); - } - - // If the shuffle is not directly supported and it has 4 elements, use - // the PerfectShuffle-generated table to synthesize it from other shuffles. - unsigned NumElts = VT.getVectorNumElements(); - if (NumElts == 4) { - unsigned PFIndexes[4]; - for (unsigned i = 0; i != 4; ++i) { - if (ShuffleMask[i] < 0) - PFIndexes[i] = 8; - else - PFIndexes[i] = ShuffleMask[i]; - } - - // Compute the index in the perfect shuffle table. - unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + - PFIndexes[2] * 9 + PFIndexes[3]; - unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; - unsigned Cost = (PFEntry >> 30); - - if (Cost <= 4) - return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); - } - - return GenerateTBL(Op, ShuffleMask, DAG); -} - -static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, - APInt &UndefBits) { - EVT VT = BVN->getValueType(0); - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - unsigned NumSplats = VT.getSizeInBits() / SplatBitSize; - - for (unsigned i = 0; i < NumSplats; ++i) { - CnstBits <<= SplatBitSize; - UndefBits <<= SplatBitSize; - CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits()); - UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits()); - } - - return true; - } - - return false; -} - -SDValue ARM64TargetLowering::LowerVectorAND(SDValue Op, - SelectionDAG &DAG) const { - BuildVectorSDNode *BVN = - dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode()); - SDValue LHS = Op.getOperand(0); - SDLoc dl(Op); - EVT VT = Op.getValueType(); - - if (!BVN) - return Op; - - APInt CnstBits(VT.getSizeInBits(), 0); - APInt UndefBits(VT.getSizeInBits(), 0); - if (resolveBuildVector(BVN, CnstBits, UndefBits)) { - // We only have BIC vector immediate instruction, which is and-not. - CnstBits = ~CnstBits; - - // We make use of a little bit of goto ickiness in order to avoid having to - // duplicate the immediate matching logic for the undef toggled case. - bool SecondTry = false; - AttemptModImm: - - if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { - CnstBits = CnstBits.zextOrTrunc(64); - uint64_t CnstVal = CnstBits.getZExtValue(); - - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - } - - if (SecondTry) - goto FailedModImm; - SecondTry = true; - CnstBits = ~UndefBits; - goto AttemptModImm; - } - -// We can always fall back to a non-immediate AND. -FailedModImm: - return Op; -} - -// Specialized code to quickly find if PotentialBVec is a BuildVector that -// consists of only the same constant int value, returned in reference arg -// ConstVal -static bool isAllConstantBuildVector(const SDValue &PotentialBVec, - uint64_t &ConstVal) { - BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec); - if (!Bvec) - return false; - ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0)); - if (!FirstElt) - return false; - EVT VT = Bvec->getValueType(0); - unsigned NumElts = VT.getVectorNumElements(); - for (unsigned i = 1; i < NumElts; ++i) - if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt) - return false; - ConstVal = FirstElt->getZExtValue(); - return true; -} - -static unsigned getIntrinsicID(const SDNode *N) { - unsigned Opcode = N->getOpcode(); - switch (Opcode) { - default: - return Intrinsic::not_intrinsic; - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - if (IID < Intrinsic::num_intrinsics) - return IID; - return Intrinsic::not_intrinsic; - } - } -} - -// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)), -// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a -// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2. -// Also, logical shift right -> sri, with the same structure. -static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - - if (!VT.isVector()) - return SDValue(); - - SDLoc DL(N); - - // Is the first op an AND? - const SDValue And = N->getOperand(0); - if (And.getOpcode() != ISD::AND) - return SDValue(); - - // Is the second op an shl or lshr? - SDValue Shift = N->getOperand(1); - // This will have been turned into: ARM64ISD::VSHL vector, #shift - // or ARM64ISD::VLSHR vector, #shift - unsigned ShiftOpc = Shift.getOpcode(); - if ((ShiftOpc != ARM64ISD::VSHL && ShiftOpc != ARM64ISD::VLSHR)) - return SDValue(); - bool IsShiftRight = ShiftOpc == ARM64ISD::VLSHR; - - // Is the shift amount constant? - ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); - if (!C2node) - return SDValue(); - - // Is the and mask vector all constant? - uint64_t C1; - if (!isAllConstantBuildVector(And.getOperand(1), C1)) - return SDValue(); - - // Is C1 == ~C2, taking into account how much one can shift elements of a - // particular size? - uint64_t C2 = C2node->getZExtValue(); - unsigned ElemSizeInBits = VT.getVectorElementType().getSizeInBits(); - if (C2 > ElemSizeInBits) - return SDValue(); - unsigned ElemMask = (1 << ElemSizeInBits) - 1; - if ((C1 & ElemMask) != (~C2 & ElemMask)) - return SDValue(); - - SDValue X = And.getOperand(0); - SDValue Y = Shift.getOperand(0); - - unsigned Intrin = - IsShiftRight ? Intrinsic::arm64_neon_vsri : Intrinsic::arm64_neon_vsli; - SDValue ResultSLI = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1)); - - DEBUG(dbgs() << "arm64-lower: transformed: \n"); - DEBUG(N->dump(&DAG)); - DEBUG(dbgs() << "into: \n"); - DEBUG(ResultSLI->dump(&DAG)); - - ++NumShiftInserts; - return ResultSLI; -} - -SDValue ARM64TargetLowering::LowerVectorOR(SDValue Op, - SelectionDAG &DAG) const { - // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2)) - if (EnableARM64SlrGeneration) { - SDValue Res = tryLowerToSLI(Op.getNode(), DAG); - if (Res.getNode()) - return Res; - } - - BuildVectorSDNode *BVN = - dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode()); - SDValue LHS = Op.getOperand(1); - SDLoc dl(Op); - EVT VT = Op.getValueType(); - - // OR commutes, so try swapping the operands. - if (!BVN) { - LHS = Op.getOperand(0); - BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode()); - } - if (!BVN) - return Op; - - APInt CnstBits(VT.getSizeInBits(), 0); - APInt UndefBits(VT.getSizeInBits(), 0); - if (resolveBuildVector(BVN, CnstBits, UndefBits)) { - // We make use of a little bit of goto ickiness in order to avoid having to - // duplicate the immediate matching logic for the undef toggled case. - bool SecondTry = false; - AttemptModImm: - - if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { - CnstBits = CnstBits.zextOrTrunc(64); - uint64_t CnstVal = CnstBits.getZExtValue(); - - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - } - - if (SecondTry) - goto FailedModImm; - SecondTry = true; - CnstBits = UndefBits; - goto AttemptModImm; - } - -// We can always fall back to a non-immediate OR. -FailedModImm: - return Op; -} - -SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op, - SelectionDAG &DAG) const { - BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); - SDLoc dl(Op); - EVT VT = Op.getValueType(); - - APInt CnstBits(VT.getSizeInBits(), 0); - APInt UndefBits(VT.getSizeInBits(), 0); - if (resolveBuildVector(BVN, CnstBits, UndefBits)) { - // We make use of a little bit of goto ickiness in order to avoid having to - // duplicate the immediate matching logic for the undef toggled case. - bool SecondTry = false; - AttemptModImm: - - if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { - CnstBits = CnstBits.zextOrTrunc(64); - uint64_t CnstVal = CnstBits.getZExtValue(); - - // Certain magic vector constants (used to express things like NOT - // and NEG) are passed through unmodified. This allows codegen patterns - // for these operations to match. Special-purpose patterns will lower - // these immediates to MOVIs if it proves necessary. - if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL)) - return Op; - - // The many faces of MOVI... - if (ARM64_AM::isAdvSIMDModImmType10(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType10(CnstVal); - if (VT.getSizeInBits() == 128) { - SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::v2i64, - DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - // Support the V64 version via subregister insertion. - SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::f64, - DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(264, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(272, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType9(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType9(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8; - SDValue Mov = DAG.getNode(ARM64ISD::MOVI, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - // The few faces of FMOV... - if (ARM64_AM::isAdvSIMDModImmType11(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType11(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32; - SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType12(CnstVal) && - VT.getSizeInBits() == 128) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType12(CnstVal); - SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MVT::v2f64, - DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - // The many faces of MVNI... - CnstVal = ~CnstVal; - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(264, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(272, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - } - - if (SecondTry) - goto FailedModImm; - SecondTry = true; - CnstBits = UndefBits; - goto AttemptModImm; - } -FailedModImm: - - // Scan through the operands to find some interesting properties we can - // exploit: - // 1) If only one value is used, we can use a DUP, or - // 2) if only the low element is not undef, we can just insert that, or - // 3) if only one constant value is used (w/ some non-constant lanes), - // we can splat the constant value into the whole vector then fill - // in the non-constant lanes. - // 4) FIXME: If different constant values are used, but we can intelligently - // select the values we'll be overwriting for the non-constant - // lanes such that we can directly materialize the vector - // some other way (MOVI, e.g.), we can be sneaky. - unsigned NumElts = VT.getVectorNumElements(); - bool isOnlyLowElement = true; - bool usesOnlyOneValue = true; - bool usesOnlyOneConstantValue = true; - bool isConstant = true; - unsigned NumConstantLanes = 0; - SDValue Value; - SDValue ConstantValue; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - if (i > 0) - isOnlyLowElement = false; - if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) - isConstant = false; - - if (isa<ConstantSDNode>(V)) { - ++NumConstantLanes; - if (!ConstantValue.getNode()) - ConstantValue = V; - else if (ConstantValue != V) - usesOnlyOneConstantValue = false; - } - - if (!Value.getNode()) - Value = V; - else if (V != Value) - usesOnlyOneValue = false; - } - - if (!Value.getNode()) - return DAG.getUNDEF(VT); - - if (isOnlyLowElement) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); - - // Use DUP for non-constant splats. For f32 constant splats, reduce to - // i32 and try again. - if (usesOnlyOneValue) { - if (!isConstant) { - if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Value.getValueType() != VT) - return DAG.getNode(ARM64ISD::DUP, dl, VT, Value); - - // This is actually a DUPLANExx operation, which keeps everything vectory. - - // DUPLANE works on 128-bit vectors, widen it if necessary. - SDValue Lane = Value.getOperand(1); - Value = Value.getOperand(0); - if (Value.getValueType().getSizeInBits() == 64) - Value = WidenVector(Value, DAG); - - unsigned Opcode = getDUPLANEOp(VT.getVectorElementType()); - return DAG.getNode(Opcode, dl, VT, Value, Lane); - } - - if (VT.getVectorElementType().isFloatingPoint()) { - SmallVector<SDValue, 8> Ops; - MVT NewType = - (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; - for (unsigned i = 0; i < NumElts; ++i) - Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i))); - EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); - Val = LowerBUILD_VECTOR(Val, DAG); - if (Val.getNode()) - return DAG.getNode(ISD::BITCAST, dl, VT, Val); - } - } - - // If there was only one constant value used and for more than one lane, - // start by splatting that value, then replace the non-constant lanes. This - // is better than the default, which will perform a separate initialization - // for each lane. - if (NumConstantLanes > 0 && usesOnlyOneConstantValue) { - SDValue Val = DAG.getNode(ARM64ISD::DUP, dl, VT, ConstantValue); - // Now insert the non-constant lanes. - for (unsigned i = 0; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - SDValue LaneIdx = DAG.getConstant(i, MVT::i64); - if (!isa<ConstantSDNode>(V)) { - // Note that type legalization likely mucked about with the VT of the - // source operand, so we may have to convert it here before inserting. - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx); - } - } - return Val; - } - - // If all elements are constants and the case above didn't get hit, fall back - // to the default expansion, which will generate a load from the constant - // pool. - if (isConstant) - return SDValue(); - - // Empirical tests suggest this is rarely worth it for vectors of length <= 2. - if (NumElts >= 4) { - SDValue shuffle = ReconstructShuffle(Op, DAG); - if (shuffle != SDValue()) - return shuffle; - } - - // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we - // know the default expansion would otherwise fall back on something even - // worse. For a vector with one or two non-undef values, that's - // scalar_to_vector for the elements followed by a shuffle (provided the - // shuffle is valid for the target) and materialization element by element - // on the stack followed by a load for everything else. - if (!isConstant && !usesOnlyOneValue) { - SDValue Vec = DAG.getUNDEF(VT); - SDValue Op0 = Op.getOperand(0); - unsigned ElemSize = VT.getVectorElementType().getSizeInBits(); - unsigned i = 0; - // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to - // a) Avoid a RMW dependency on the full vector register, and - // b) Allow the register coalescer to fold away the copy if the - // value is already in an S or D register. - if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) { - unsigned SubIdx = ElemSize == 32 ? ARM64::ssub : ARM64::dsub; - MachineSDNode *N = - DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0, - DAG.getTargetConstant(SubIdx, MVT::i32)); - Vec = SDValue(N, 0); - ++i; - } - for (; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - SDValue LaneIdx = DAG.getConstant(i, MVT::i64); - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); - } - return Vec; - } - - // Just use the default expansion. We failed to find a better alternative. - return SDValue(); -} - -SDValue ARM64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); - - // Check for non-constant lane. - if (!isa<ConstantSDNode>(Op.getOperand(2))) - return SDValue(); - - EVT VT = Op.getOperand(0).getValueType(); - - // Insertion/extraction are legal for V128 types. - if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || - VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64) - return Op; - - if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && - VT != MVT::v1i64 && VT != MVT::v2f32) - return SDValue(); - - // For V64 types, we perform insertion by expanding the value - // to a V128 type and perform the insertion on that. - SDLoc DL(Op); - SDValue WideVec = WidenVector(Op.getOperand(0), DAG); - EVT WideTy = WideVec.getValueType(); - - SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec, - Op.getOperand(1), Op.getOperand(2)); - // Re-narrow the resultant vector. - return NarrowVector(Node, DAG); -} - -SDValue ARM64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!"); - - // Check for non-constant lane. - if (!isa<ConstantSDNode>(Op.getOperand(1))) - return SDValue(); - - EVT VT = Op.getOperand(0).getValueType(); - - // Insertion/extraction are legal for V128 types. - if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || - VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64) - return Op; - - if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && - VT != MVT::v1i64 && VT != MVT::v2f32) - return SDValue(); - - // For V64 types, we perform extraction by expanding the value - // to a V128 type and perform the extraction on that. - SDLoc DL(Op); - SDValue WideVec = WidenVector(Op.getOperand(0), DAG); - EVT WideTy = WideVec.getValueType(); - - EVT ExtrTy = WideTy.getVectorElementType(); - if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8) - ExtrTy = MVT::i32; - - // For extractions, we just return the result directly. - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec, - Op.getOperand(1)); -} - -SDValue ARM64TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getOpcode() == ISD::SCALAR_TO_VECTOR && "Unknown opcode!"); - // Some AdvSIMD intrinsics leave their results in the scalar B/H/S/D - // registers. The default lowering will copy those to a GPR then back - // to a vector register. Instead, just recognize those cases and reference - // the vector register they're already a subreg of. - SDValue Op0 = Op->getOperand(0); - if (Op0->getOpcode() != ISD::INTRINSIC_WO_CHAIN) - return Op; - unsigned IID = getIntrinsicID(Op0.getNode()); - // The below list of intrinsics isn't exhaustive. Add cases as-needed. - // FIXME: Even better would be if there were an attribute on the node - // that we could query and set in the intrinsics definition or something. - unsigned SubIdx; - switch (IID) { - default: - // Early exit if this isn't one of the intrinsics we handle. - return Op; - case Intrinsic::arm64_neon_uaddv: - case Intrinsic::arm64_neon_saddv: - case Intrinsic::arm64_neon_uaddlv: - case Intrinsic::arm64_neon_saddlv: - switch (Op0.getValueType().getSizeInBits()) { - default: - llvm_unreachable("Illegal result size from ARM64 vector intrinsic!"); - case 8: - SubIdx = ARM64::bsub; - break; - case 16: - SubIdx = ARM64::hsub; - break; - case 32: - SubIdx = ARM64::ssub; - break; - case 64: - SubIdx = ARM64::dsub; - break; - } - } - MachineSDNode *N = - DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(Op), - Op.getValueType(), DAG.getUNDEF(Op.getValueType()), - Op0, DAG.getTargetConstant(SubIdx, MVT::i32)); - return SDValue(N, 0); -} - -SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getOperand(0).getValueType(); - SDLoc dl(Op); - // Just in case... - if (!VT.isVector()) - return SDValue(); - - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1)); - if (!Cst) - return SDValue(); - unsigned Val = Cst->getZExtValue(); - - unsigned Size = Op.getValueType().getSizeInBits(); - if (Val == 0) { - switch (Size) { - case 8: - return DAG.getTargetExtractSubreg(ARM64::bsub, dl, Op.getValueType(), - Op.getOperand(0)); - case 16: - return DAG.getTargetExtractSubreg(ARM64::hsub, dl, Op.getValueType(), - Op.getOperand(0)); - case 32: - return DAG.getTargetExtractSubreg(ARM64::ssub, dl, Op.getValueType(), - Op.getOperand(0)); - case 64: - return DAG.getTargetExtractSubreg(ARM64::dsub, dl, Op.getValueType(), - Op.getOperand(0)); - default: - llvm_unreachable("Unexpected vector type in extract_subvector!"); - } - } - // If this is extracting the upper 64-bits of a 128-bit vector, we match - // that directly. - if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64) - return Op; - - return SDValue(); -} - -bool ARM64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, - EVT VT) const { - if (VT.getVectorNumElements() == 4 && - (VT.is128BitVector() || VT.is64BitVector())) { - unsigned PFIndexes[4]; - for (unsigned i = 0; i != 4; ++i) { - if (M[i] < 0) - PFIndexes[i] = 8; - else - PFIndexes[i] = M[i]; - } - - // Compute the index in the perfect shuffle table. - unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + - PFIndexes[2] * 9 + PFIndexes[3]; - unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; - unsigned Cost = (PFEntry >> 30); - - if (Cost <= 4) - return true; - } - - bool ReverseVEXT; - unsigned Imm, WhichResult; - - return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) || - isREVMask(M, VT, 32) || isREVMask(M, VT, 16) || - isEXTMask(M, VT, ReverseVEXT, Imm) || - // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM. - isTRNMask(M, VT, WhichResult) || isUZPMask(M, VT, WhichResult) || - isZIPMask(M, VT, WhichResult) || - isTRN_v_undef_Mask(M, VT, WhichResult) || - isUZP_v_undef_Mask(M, VT, WhichResult) || - isZIP_v_undef_Mask(M, VT, WhichResult)); -} - -/// getVShiftImm - Check if this is a valid build_vector for the immediate -/// operand of a vector shift operation, where all the elements of the -/// build_vector must have the same constant integer value. -static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { - // Ignore bit_converts. - while (Op.getOpcode() == ISD::BITCAST) - Op = Op.getOperand(0); - BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, - HasAnyUndefs, ElementBits) || - SplatBitSize > ElementBits) - return false; - Cnt = SplatBits.getSExtValue(); - return true; -} - -/// isVShiftLImm - Check if this is a valid build_vector for the immediate -/// operand of a vector shift left operation. That value must be in the range: -/// 0 <= Value < ElementBits for a left shift; or -/// 0 <= Value <= ElementBits for a long left shift. -static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { - assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); - if (!getVShiftImm(Op, ElementBits, Cnt)) - return false; - return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits); -} - -/// isVShiftRImm - Check if this is a valid build_vector for the immediate -/// operand of a vector shift right operation. For a shift opcode, the value -/// is positive, but for an intrinsic the value count must be negative. The -/// absolute value must be in the range: -/// 1 <= |Value| <= ElementBits for a right shift; or -/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. -static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, - int64_t &Cnt) { - assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); - if (!getVShiftImm(Op, ElementBits, Cnt)) - return false; - if (isIntrinsic) - Cnt = -Cnt; - return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits)); -} - -SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - SDLoc DL(Op); - int64_t Cnt; - - if (!Op.getOperand(1).getValueType().isVector()) - return Op; - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - - switch (Op.getOpcode()) { - default: - llvm_unreachable("unexpected shift opcode"); - - case ISD::SHL: - if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) - return DAG.getNode(ARM64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0), - DAG.getConstant(Cnt, MVT::i32)); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::arm64_neon_ushl, MVT::i32), - Op.getOperand(0), Op.getOperand(1)); - case ISD::SRA: - case ISD::SRL: - // Right shift immediate - if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) && - Cnt < EltSize) { - unsigned Opc = - (Op.getOpcode() == ISD::SRA) ? ARM64ISD::VASHR : ARM64ISD::VLSHR; - return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0), - DAG.getConstant(Cnt, MVT::i32)); - } - - // Right shift register. Note, there is not a shift right register - // instruction, but the shift left register instruction takes a signed - // value, where negative numbers specify a right shift. - unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::arm64_neon_sshl - : Intrinsic::arm64_neon_ushl; - // negate the shift amount - SDValue NegShift = DAG.getNode(ARM64ISD::NEG, DL, VT, Op.getOperand(1)); - SDValue NegShiftLeft = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift); - return NegShiftLeft; - } - - return SDValue(); -} - -static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, - ARM64CC::CondCode CC, bool NoNans, EVT VT, - SDLoc dl, SelectionDAG &DAG) { - EVT SrcVT = LHS.getValueType(); - - BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode()); - APInt CnstBits(VT.getSizeInBits(), 0); - APInt UndefBits(VT.getSizeInBits(), 0); - bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits); - bool IsZero = IsCnst && (CnstBits == 0); - - if (SrcVT.getVectorElementType().isFloatingPoint()) { - switch (CC) { - default: - return SDValue(); - case ARM64CC::NE: { - SDValue Fcmeq; - if (IsZero) - Fcmeq = DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS); - else - Fcmeq = DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS); - return DAG.getNode(ARM64ISD::NOT, dl, VT, Fcmeq); - } - case ARM64CC::EQ: - if (IsZero) - return DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS); - case ARM64CC::GE: - if (IsZero) - return DAG.getNode(ARM64ISD::FCMGEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGE, dl, VT, LHS, RHS); - case ARM64CC::GT: - if (IsZero) - return DAG.getNode(ARM64ISD::FCMGTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGT, dl, VT, LHS, RHS); - case ARM64CC::LS: - if (IsZero) - return DAG.getNode(ARM64ISD::FCMLEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGE, dl, VT, RHS, LHS); - case ARM64CC::LT: - if (!NoNans) - return SDValue(); - // If we ignore NaNs then we can use to the MI implementation. - // Fallthrough. - case ARM64CC::MI: - if (IsZero) - return DAG.getNode(ARM64ISD::FCMLTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGT, dl, VT, RHS, LHS); - } - } - - switch (CC) { - default: - return SDValue(); - case ARM64CC::NE: { - SDValue Cmeq; - if (IsZero) - Cmeq = DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS); - else - Cmeq = DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS); - return DAG.getNode(ARM64ISD::NOT, dl, VT, Cmeq); - } - case ARM64CC::EQ: - if (IsZero) - return DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS); - case ARM64CC::GE: - if (IsZero) - return DAG.getNode(ARM64ISD::CMGEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGE, dl, VT, LHS, RHS); - case ARM64CC::GT: - if (IsZero) - return DAG.getNode(ARM64ISD::CMGTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGT, dl, VT, LHS, RHS); - case ARM64CC::LE: - if (IsZero) - return DAG.getNode(ARM64ISD::CMLEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGE, dl, VT, RHS, LHS); - case ARM64CC::LS: - return DAG.getNode(ARM64ISD::CMHS, dl, VT, RHS, LHS); - case ARM64CC::CC: - return DAG.getNode(ARM64ISD::CMHI, dl, VT, RHS, LHS); - case ARM64CC::LT: - if (IsZero) - return DAG.getNode(ARM64ISD::CMLTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGT, dl, VT, RHS, LHS); - case ARM64CC::HI: - return DAG.getNode(ARM64ISD::CMHI, dl, VT, LHS, RHS); - case ARM64CC::CS: - return DAG.getNode(ARM64ISD::CMHS, dl, VT, LHS, RHS); - } -} - -SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDLoc dl(Op); - - if (LHS.getValueType().getVectorElementType().isInteger()) { - assert(LHS.getValueType() == RHS.getValueType()); - ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC); - return EmitVectorComparison(LHS, RHS, ARM64CC, false, Op.getValueType(), dl, - DAG); - } - - assert(LHS.getValueType().getVectorElementType() == MVT::f32 || - LHS.getValueType().getVectorElementType() == MVT::f64); - - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally - // clean. Some of them require two branches to implement. - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); - - bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath; - SDValue Cmp1 = - EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG); - if (!Cmp1.getNode()) - return SDValue(); - - if (CC2 != ARM64CC::AL) { - SDValue Cmp2 = - EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG); - if (!Cmp2.getNode()) - return SDValue(); - - return DAG.getNode(ISD::OR, dl, Cmp1.getValueType(), Cmp1, Cmp2); - } - - return Cmp1; -} - -/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as -/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment -/// specified in the intrinsic calls. -bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, - unsigned Intrinsic) const { - switch (Intrinsic) { - case Intrinsic::arm64_neon_ld2: - case Intrinsic::arm64_neon_ld3: - case Intrinsic::arm64_neon_ld4: - case Intrinsic::arm64_neon_ld2lane: - case Intrinsic::arm64_neon_ld3lane: - case Intrinsic::arm64_neon_ld4lane: - case Intrinsic::arm64_neon_ld2r: - case Intrinsic::arm64_neon_ld3r: - case Intrinsic::arm64_neon_ld4r: { - Info.opc = ISD::INTRINSIC_W_CHAIN; - // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); - Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); - Info.offset = 0; - Info.align = 0; - Info.vol = false; // volatile loads with NEON intrinsics not supported - Info.readMem = true; - Info.writeMem = false; - return true; - } - case Intrinsic::arm64_neon_st2: - case Intrinsic::arm64_neon_st3: - case Intrinsic::arm64_neon_st4: - case Intrinsic::arm64_neon_st2lane: - case Intrinsic::arm64_neon_st3lane: - case Intrinsic::arm64_neon_st4lane: { - Info.opc = ISD::INTRINSIC_VOID; - // Conservatively set memVT to the entire set of vectors stored. - unsigned NumElts = 0; - for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { - Type *ArgTy = I.getArgOperand(ArgI)->getType(); - if (!ArgTy->isVectorTy()) - break; - NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; - } - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); - Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); - Info.offset = 0; - Info.align = 0; - Info.vol = false; // volatile stores with NEON intrinsics not supported - Info.readMem = false; - Info.writeMem = true; - return true; - } - case Intrinsic::arm64_ldxr: { - PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getElementType()); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; - return true; - } - case Intrinsic::arm64_stxr: { - PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getElementType()); - Info.ptrVal = I.getArgOperand(1); - Info.offset = 0; - Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; - return true; - } - case Intrinsic::arm64_ldxp: { - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::i128; - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Info.align = 16; - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; - return true; - } - case Intrinsic::arm64_stxp: { - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::i128; - Info.ptrVal = I.getArgOperand(2); - Info.offset = 0; - Info.align = 16; - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; - return true; - } - default: - break; - } - - return false; -} - -// Truncations from 64-bit GPR to 32-bit GPR is free. -bool ARM64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) - return false; - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; -} -bool ARM64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) - return false; - unsigned NumBits1 = VT1.getSizeInBits(); - unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; -} - -// All 32-bit GPR operations implicitly zero the high-half of the corresponding -// 64-bit GPR. -bool ARM64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) - return false; - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; -} -bool ARM64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) - return false; - unsigned NumBits1 = VT1.getSizeInBits(); - unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; -} - -bool ARM64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { - EVT VT1 = Val.getValueType(); - if (isZExtFree(VT1, VT2)) { - return true; - } - - if (Val.getOpcode() != ISD::LOAD) - return false; - - // 8-, 16-, and 32-bit integer loads all implicitly zero-extend. - return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() && - VT2.isInteger() && VT1.getSizeInBits() <= 32); -} - -bool ARM64TargetLowering::hasPairedLoad(Type *LoadedType, - unsigned &RequiredAligment) const { - if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy()) - return false; - // Cyclone supports unaligned accesses. - RequiredAligment = 0; - unsigned NumBits = LoadedType->getPrimitiveSizeInBits(); - return NumBits == 32 || NumBits == 64; -} - -bool ARM64TargetLowering::hasPairedLoad(EVT LoadedType, - unsigned &RequiredAligment) const { - if (!LoadedType.isSimple() || - (!LoadedType.isInteger() && !LoadedType.isFloatingPoint())) - return false; - // Cyclone supports unaligned accesses. - RequiredAligment = 0; - unsigned NumBits = LoadedType.getSizeInBits(); - return NumBits == 32 || NumBits == 64; -} - -static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, - unsigned AlignCheck) { - return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && - (DstAlign == 0 || DstAlign % AlignCheck == 0)); -} - -EVT ARM64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, bool MemcpyStrSrc, - MachineFunction &MF) const { - // Don't use AdvSIMD to implement 16-byte memset. It would have taken one - // instruction to materialize the v2i64 zero and one store (with restrictive - // addressing mode). Just do two i64 store of zero-registers. - bool Fast; - const Function *F = MF.getFunction(); - if (!IsMemset && Size >= 16 && - !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoImplicitFloat) && - (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::v2i64, 0, &Fast) && Fast))) - return MVT::v2i64; - - return Size >= 8 ? MVT::i64 : MVT::i32; -} - -// 12-bit optionally shifted immediates are legal for adds. -bool ARM64TargetLowering::isLegalAddImmediate(int64_t Immed) const { - if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0)) - return true; - return false; -} - -// Integer comparisons are implemented with ADDS/SUBS, so the range of valid -// immediates is the same as for an add or a sub. -bool ARM64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { - if (Immed < 0) - Immed *= -1; - return isLegalAddImmediate(Immed); -} - -/// isLegalAddressingMode - Return true if the addressing mode represented -/// by AM is legal for this target, for a load/store of the specified type. -bool ARM64TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - // ARM64 has five basic addressing modes: - // reg - // reg + 9-bit signed offset - // reg + SIZE_IN_BYTES * 12-bit unsigned offset - // reg1 + reg2 - // reg + SIZE_IN_BYTES * reg - - // No global is ever allowed as a base. - if (AM.BaseGV) - return false; - - // No reg+reg+imm addressing. - if (AM.HasBaseReg && AM.BaseOffs && AM.Scale) - return false; - - // check reg + imm case: - // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12 - uint64_t NumBytes = 0; - if (Ty->isSized()) { - uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty); - NumBytes = NumBits / 8; - if (!isPowerOf2_64(NumBits)) - NumBytes = 0; - } - - if (!AM.Scale) { - int64_t Offset = AM.BaseOffs; - - // 9-bit signed offset - if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1) - return true; - - // 12-bit unsigned offset - unsigned shift = Log2_64(NumBytes); - if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 && - // Must be a multiple of NumBytes (NumBytes is a power of 2) - (Offset >> shift) << shift == Offset) - return true; - return false; - } - - // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2 - - if (!AM.Scale || AM.Scale == 1 || - (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes)) - return true; - return false; -} - -int ARM64TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty) const { - // Scaling factors are not free at all. - // Operands | Rt Latency - // ------------------------------------------- - // Rt, [Xn, Xm] | 4 - // ------------------------------------------- - // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5 - // Rt, [Xn, Wm, <extend> #imm] | - if (isLegalAddressingMode(AM, Ty)) - // Scale represents reg2 * scale, thus account for 1 if - // it is not equal to 0 or 1. - return AM.Scale != 0 && AM.Scale != 1; - return -1; -} - -bool ARM64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { - VT = VT.getScalarType(); - - if (!VT.isSimple()) - return false; - - switch (VT.getSimpleVT().SimpleTy) { - case MVT::f32: - case MVT::f64: - return true; - default: - break; - } - - return false; -} - -const uint16_t * -ARM64TargetLowering::getScratchRegisters(CallingConv::ID) const { - // LR is a callee-save register, but we must treat it as clobbered by any call - // site. Hence we include LR in the scratch registers, which are in turn added - // as implicit-defs for stackmaps and patchpoints. - static const uint16_t ScratchRegs[] = { - ARM64::X16, ARM64::X17, ARM64::LR, 0 - }; - return ScratchRegs; -} - -bool ARM64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, - Type *Ty) const { - assert(Ty->isIntegerTy()); - - unsigned BitSize = Ty->getPrimitiveSizeInBits(); - if (BitSize == 0) - return false; - - int64_t Val = Imm.getSExtValue(); - if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize)) - return true; - - if ((int64_t)Val < 0) - Val = ~Val; - if (BitSize == 32) - Val &= (1LL << 32) - 1; - - unsigned LZ = countLeadingZeros((uint64_t)Val); - unsigned Shift = (63 - LZ) / 16; - // MOVZ is free so return true for one or fewer MOVK. - return (Shift < 3) ? true : false; -} - -// Generate SUBS and CSEL for integer abs. -static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDLoc DL(N); - - // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1) - // and change it to SUB and CSEL. - if (VT.isInteger() && N->getOpcode() == ISD::XOR && - N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && - N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) - if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1))) - if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) { - SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), - N0.getOperand(0)); - // Generate SUBS & CSEL. - SDValue Cmp = - DAG.getNode(ARM64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), - N0.getOperand(0), DAG.getConstant(0, VT)); - return DAG.getNode(ARM64ISD::CSEL, DL, VT, N0.getOperand(0), Neg, - DAG.getConstant(ARM64CC::PL, MVT::i32), - SDValue(Cmp.getNode(), 1)); - } - return SDValue(); -} - -// performXorCombine - Attempts to handle integer ABS. -static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - return performIntegerAbsCombine(N, DAG); -} - -static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - // Multiplication of a power of two plus/minus one can be done more - // cheaply as as shift+add/sub. For now, this is true unilaterally. If - // future CPUs have a cheaper MADD instruction, this may need to be - // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and - // 64-bit is 5 cycles, so this is always a win. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { - APInt Value = C->getAPIntValue(); - EVT VT = N->getValueType(0); - APInt VP1 = Value + 1; - if (VP1.isPowerOf2()) { - // Multiplying by one less than a power of two, replace with a shift - // and a subtract. - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VP1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); - } - APInt VM1 = Value - 1; - if (VM1.isPowerOf2()) { - // Multiplying by one more than a power of two, replace with a shift - // and an add. - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VM1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); - } - } - return SDValue(); -} - -static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - if (VT != MVT::f32 && VT != MVT::f64) - return SDValue(); - // Only optimize when the source and destination types have the same width. - if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits()) - return SDValue(); - - // If the result of an integer load is only used by an integer-to-float - // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead. - // This eliminates an "integer-to-vector-move UOP and improve throughput. - SDValue N0 = N->getOperand(0); - if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - // Do not change the width of a volatile load. - !cast<LoadSDNode>(N0)->isVolatile()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->isVolatile(), - LN0->isNonTemporal(), LN0->isInvariant(), - LN0->getAlignment()); - - // Make sure successors of the original load stay after it by updating them - // to use the new Chain. - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1)); - - unsigned Opcode = - (N->getOpcode() == ISD::SINT_TO_FP) ? ARM64ISD::SITOF : ARM64ISD::UITOF; - return DAG.getNode(Opcode, SDLoc(N), VT, Load); - } - - return SDValue(); -} - -/// An EXTR instruction is made up of two shifts, ORed together. This helper -/// searches for and classifies those shifts. -static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, - bool &FromHi) { - if (N.getOpcode() == ISD::SHL) - FromHi = false; - else if (N.getOpcode() == ISD::SRL) - FromHi = true; - else - return false; - - if (!isa<ConstantSDNode>(N.getOperand(1))) - return false; - - ShiftAmount = N->getConstantOperandVal(1); - Src = N->getOperand(0); - return true; -} - -/// EXTR instruction extracts a contiguous chunk of bits from two existing -/// registers viewed as a high/low pair. This function looks for the pattern: -/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an -/// EXTR. Can't quite be done in TableGen because the two immediates aren't -/// independent. -static SDValue tryCombineToEXTR(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - assert(N->getOpcode() == ISD::OR && "Unexpected root"); - - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - SDValue LHS; - uint32_t ShiftLHS = 0; - bool LHSFromHi = 0; - if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) - return SDValue(); - - SDValue RHS; - uint32_t ShiftRHS = 0; - bool RHSFromHi = 0; - if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) - return SDValue(); - - // If they're both trying to come from the high part of the register, they're - // not really an EXTR. - if (LHSFromHi == RHSFromHi) - return SDValue(); - - if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) - return SDValue(); - - if (LHSFromHi) { - std::swap(LHS, RHS); - std::swap(ShiftLHS, ShiftRHS); - } - - return DAG.getNode(ARM64ISD::EXTR, DL, VT, LHS, RHS, - DAG.getConstant(ShiftRHS, MVT::i64)); -} - -static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { - // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) - if (!EnableARM64ExtrGeneration) - return SDValue(); - SelectionDAG &DAG = DCI.DAG; - EVT VT = N->getValueType(0); - - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - SDValue Res = tryCombineToEXTR(N, DCI); - if (Res.getNode()) - return Res; - - return SDValue(); -} - -static SDValue performBitcastCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - // Wait 'til after everything is legalized to try this. That way we have - // legal vector types and such. - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - // Remove extraneous bitcasts around an extract_subvector. - // For example, - // (v4i16 (bitconvert - // (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1))))) - // becomes - // (extract_subvector ((v8i16 ...), (i64 4))) - - // Only interested in 64-bit vectors as the ultimate result. - EVT VT = N->getValueType(0); - if (!VT.isVector()) - return SDValue(); - if (VT.getSimpleVT().getSizeInBits() != 64) - return SDValue(); - // Is the operand an extract_subvector starting at the beginning or halfway - // point of the vector? A low half may also come through as an - // EXTRACT_SUBREG, so look for that, too. - SDValue Op0 = N->getOperand(0); - if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR && - !(Op0->isMachineOpcode() && - Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG)) - return SDValue(); - uint64_t idx = cast<ConstantSDNode>(Op0->getOperand(1))->getZExtValue(); - if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) { - if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0) - return SDValue(); - } else if (Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG) { - if (idx != ARM64::dsub) - return SDValue(); - // The dsub reference is equivalent to a lane zero subvector reference. - idx = 0; - } - // Look through the bitcast of the input to the extract. - if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST) - return SDValue(); - SDValue Source = Op0->getOperand(0)->getOperand(0); - // If the source type has twice the number of elements as our destination - // type, we know this is an extract of the high or low half of the vector. - EVT SVT = Source->getValueType(0); - if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2) - return SDValue(); - - DEBUG(dbgs() << "arm64-lower: bitcast extract_subvector simplification\n"); - - // Create the simplified form to just extract the low or high half of the - // vector directly rather than bothering with the bitcasts. - SDLoc dl(N); - unsigned NumElements = VT.getVectorNumElements(); - if (idx) { - SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx); - } else { - SDValue SubReg = DAG.getTargetConstant(ARM64::dsub, MVT::i32); - return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT, - Source, SubReg), - 0); - } -} - -static SDValue performConcatVectorsCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - // Wait 'til after everything is legalized to try this. That way we have - // legal vector types and such. - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - SDLoc dl(N); - EVT VT = N->getValueType(0); - - // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector - // splat. The indexed instructions are going to be expecting a DUPLANE64, so - // canonicalise to that. - if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) { - assert(VT.getVectorElementType().getSizeInBits() == 64); - return DAG.getNode(ARM64ISD::DUPLANE64, dl, VT, - WidenVector(N->getOperand(0), DAG), - DAG.getConstant(0, MVT::i64)); - } - - // Canonicalise concat_vectors so that the right-hand vector has as few - // bit-casts as possible before its real operation. The primary matching - // destination for these operations will be the narrowing "2" instructions, - // which depend on the operation being performed on this right-hand vector. - // For example, - // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS)))) - // becomes - // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS)) - - SDValue Op1 = N->getOperand(1); - if (Op1->getOpcode() != ISD::BITCAST) - return SDValue(); - SDValue RHS = Op1->getOperand(0); - MVT RHSTy = RHS.getValueType().getSimpleVT(); - // If the RHS is not a vector, this is not the pattern we're looking for. - if (!RHSTy.isVector()) - return SDValue(); - - DEBUG(dbgs() << "arm64-lower: concat_vectors bitcast simplification\n"); - - MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(), - RHSTy.getVectorNumElements() * 2); - return DAG.getNode( - ISD::BITCAST, dl, VT, - DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, - DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS)); -} - -static SDValue tryCombineFixedPointConvert(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - // Wait 'til after everything is legalized to try this. That way we have - // legal vector types and such. - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - // Transform a scalar conversion of a value from a lane extract into a - // lane extract of a vector conversion. E.g., from foo1 to foo2: - // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); } - // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; } - // - // The second form interacts better with instruction selection and the - // register allocator to avoid cross-class register copies that aren't - // coalescable due to a lane reference. - - // Check the operand and see if it originates from a lane extract. - SDValue Op1 = N->getOperand(1); - if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { - // Yep, no additional predication needed. Perform the transform. - SDValue IID = N->getOperand(0); - SDValue Shift = N->getOperand(2); - SDValue Vec = Op1.getOperand(0); - SDValue Lane = Op1.getOperand(1); - EVT ResTy = N->getValueType(0); - EVT VecResTy; - SDLoc DL(N); - - // The vector width should be 128 bits by the time we get here, even - // if it started as 64 bits (the extract_vector handling will have - // done so). - assert(Vec.getValueType().getSizeInBits() == 128 && - "unexpected vector size on extract_vector_elt!"); - if (Vec.getValueType() == MVT::v4i32) - VecResTy = MVT::v4f32; - else if (Vec.getValueType() == MVT::v2i64) - VecResTy = MVT::v2f64; - else - assert(0 && "unexpected vector type!"); - - SDValue Convert = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane); - } - return SDValue(); -} - -// AArch64 high-vector "long" operations are formed by performing the non-high -// version on an extract_subvector of each operand which gets the high half: -// -// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS)) -// -// However, there are cases which don't have an extract_high explicitly, but -// have another operation that can be made compatible with one for free. For -// example: -// -// (dupv64 scalar) --> (extract_high (dup128 scalar)) -// -// This routine does the actual conversion of such DUPs, once outer routines -// have determined that everything else is in order. -static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { - // We can handle most types of duplicate, but the lane ones have an extra - // operand saying *which* lane, so we need to know. - bool IsDUPLANE; - switch (N.getOpcode()) { - case ARM64ISD::DUP: - IsDUPLANE = false; - break; - case ARM64ISD::DUPLANE8: - case ARM64ISD::DUPLANE16: - case ARM64ISD::DUPLANE32: - case ARM64ISD::DUPLANE64: - IsDUPLANE = true; - break; - default: - return SDValue(); - } - - MVT NarrowTy = N.getSimpleValueType(); - if (!NarrowTy.is64BitVector()) - return SDValue(); - - MVT ElementTy = NarrowTy.getVectorElementType(); - unsigned NumElems = NarrowTy.getVectorNumElements(); - MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2); - - SDValue NewDUP; - if (IsDUPLANE) - NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0), - N.getOperand(1)); - else - NewDUP = DAG.getNode(ARM64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0)); - - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy, - NewDUP, DAG.getConstant(NumElems, MVT::i64)); -} - -static bool isEssentiallyExtractSubvector(SDValue N) { - if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) - return true; - - return N.getOpcode() == ISD::BITCAST && - N.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR; -} - -/// \brief Helper structure to keep track of ISD::SET_CC operands. -struct GenericSetCCInfo { - const SDValue *Opnd0; - const SDValue *Opnd1; - ISD::CondCode CC; -}; - -/// \brief Helper structure to keep track of a SET_CC lowered into ARM64 code. -struct ARM64SetCCInfo { - const SDValue *Cmp; - ARM64CC::CondCode CC; -}; - -/// \brief Helper structure to keep track of SetCC information. -union SetCCInfo { - GenericSetCCInfo Generic; - ARM64SetCCInfo ARM64; -}; - -/// \brief Helper structure to be able to read SetCC information. -/// If set to true, IsARM64 field, Info is a ARM64SetCCInfo, otherwise Info is -/// a GenericSetCCInfo. -struct SetCCInfoAndKind { - SetCCInfo Info; - bool IsARM64; -}; - -/// \brief Check whether or not \p Op is a SET_CC operation, either a generic or -/// an -/// ARM64 lowered one. -/// \p SetCCInfo is filled accordingly. -/// \post SetCCInfo is meanginfull only when this function returns true. -/// \return True when Op is a kind of SET_CC operation. -static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) { - // If this is a setcc, this is straight forward. - if (Op.getOpcode() == ISD::SETCC) { - SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0); - SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1); - SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - SetCCInfo.IsARM64 = false; - return true; - } - // Otherwise, check if this is a matching csel instruction. - // In other words: - // - csel 1, 0, cc - // - csel 0, 1, !cc - if (Op.getOpcode() != ARM64ISD::CSEL) - return false; - // Set the information about the operands. - // TODO: we want the operands of the Cmp not the csel - SetCCInfo.Info.ARM64.Cmp = &Op.getOperand(3); - SetCCInfo.IsARM64 = true; - SetCCInfo.Info.ARM64.CC = static_cast<ARM64CC::CondCode>( - cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); - - // Check that the operands matches the constraints: - // (1) Both operands must be constants. - // (2) One must be 1 and the other must be 0. - ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0)); - ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1)); - - // Check (1). - if (!TValue || !FValue) - return false; - - // Check (2). - if (!TValue->isOne()) { - // Update the comparison when we are interested in !cc. - std::swap(TValue, FValue); - SetCCInfo.Info.ARM64.CC = - ARM64CC::getInvertedCondCode(SetCCInfo.Info.ARM64.CC); - } - return TValue->isOne() && FValue->isNullValue(); -} - -// The folding we want to perform is: -// (add x, (setcc cc ...) ) -// --> -// (csel x, (add x, 1), !cc ...) -// -// The latter will get matched to a CSINC instruction. -static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { - assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!"); - SDValue LHS = Op->getOperand(0); - SDValue RHS = Op->getOperand(1); - SetCCInfoAndKind InfoAndKind; - - // If neither operand is a SET_CC, give up. - if (!isSetCC(LHS, InfoAndKind)) { - std::swap(LHS, RHS); - if (!isSetCC(LHS, InfoAndKind)) - return SDValue(); - } - - // FIXME: This could be generatized to work for FP comparisons. - EVT CmpVT = InfoAndKind.IsARM64 - ? InfoAndKind.Info.ARM64.Cmp->getOperand(0).getValueType() - : InfoAndKind.Info.Generic.Opnd0->getValueType(); - if (CmpVT != MVT::i32 && CmpVT != MVT::i64) - return SDValue(); - - SDValue CCVal; - SDValue Cmp; - SDLoc dl(Op); - if (InfoAndKind.IsARM64) { - CCVal = DAG.getConstant( - ARM64CC::getInvertedCondCode(InfoAndKind.Info.ARM64.CC), MVT::i32); - Cmp = *InfoAndKind.Info.ARM64.Cmp; - } else - Cmp = getARM64Cmp(*InfoAndKind.Info.Generic.Opnd0, - *InfoAndKind.Info.Generic.Opnd1, - ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true), - CCVal, DAG, dl); - - EVT VT = Op->getValueType(0); - LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT)); - return DAG.getNode(ARM64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp); -} - -// The basic add/sub long vector instructions have variants with "2" on the end -// which act on the high-half of their inputs. They are normally matched by -// patterns like: -// -// (add (zeroext (extract_high LHS)), -// (zeroext (extract_high RHS))) -// -> uaddl2 vD, vN, vM -// -// However, if one of the extracts is something like a duplicate, this -// instruction can still be used profitably. This function puts the DAG into a -// more appropriate form for those patterns to trigger. -static SDValue performAddSubLongCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - MVT VT = N->getSimpleValueType(0); - if (!VT.is128BitVector()) { - if (N->getOpcode() == ISD::ADD) - return performSetccAddFolding(N, DAG); - return SDValue(); - } - - // Make sure both branches are extended in the same way. - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - if ((LHS.getOpcode() != ISD::ZERO_EXTEND && - LHS.getOpcode() != ISD::SIGN_EXTEND) || - LHS.getOpcode() != RHS.getOpcode()) - return SDValue(); - - unsigned ExtType = LHS.getOpcode(); - - // It's not worth doing if at least one of the inputs isn't already an - // extract, but we don't know which it'll be so we have to try both. - if (isEssentiallyExtractSubvector(LHS.getOperand(0))) { - RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG); - if (!RHS.getNode()) - return SDValue(); - - RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS); - } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) { - LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG); - if (!LHS.getNode()) - return SDValue(); - - LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS); - } - - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS); -} - -// Massage DAGs which we can use the high-half "long" operations on into -// something isel will recognize better. E.g. -// -// (arm64_neon_umull (extract_high vec) (dupv64 scalar)) --> -// (arm64_neon_umull (extract_high (v2i64 vec))) -// (extract_high (v2i64 (dup128 scalar))))) -// -static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - SDValue LHS = N->getOperand(1); - SDValue RHS = N->getOperand(2); - assert(LHS.getValueType().is64BitVector() && - RHS.getValueType().is64BitVector() && - "unexpected shape for long operation"); - - // Either node could be a DUP, but it's not worth doing both of them (you'd - // just as well use the non-high version) so look for a corresponding extract - // operation on the other "wing". - if (isEssentiallyExtractSubvector(LHS)) { - RHS = tryExtendDUPToExtractHigh(RHS, DAG); - if (!RHS.getNode()) - return SDValue(); - } else if (isEssentiallyExtractSubvector(RHS)) { - LHS = tryExtendDUPToExtractHigh(LHS, DAG); - if (!LHS.getNode()) - return SDValue(); - } - - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0), - N->getOperand(0), LHS, RHS); -} - -static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { - MVT ElemTy = N->getSimpleValueType(0).getScalarType(); - unsigned ElemBits = ElemTy.getSizeInBits(); - - int64_t ShiftAmount; - if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) { - APInt SplatValue, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs, ElemBits) || - SplatBitSize != ElemBits) - return SDValue(); - - ShiftAmount = SplatValue.getSExtValue(); - } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) { - ShiftAmount = CVN->getSExtValue(); - } else - return SDValue(); - - unsigned Opcode; - bool IsRightShift; - switch (IID) { - default: - llvm_unreachable("Unknown shift intrinsic"); - case Intrinsic::arm64_neon_sqshl: - Opcode = ARM64ISD::SQSHL_I; - IsRightShift = false; - break; - case Intrinsic::arm64_neon_uqshl: - Opcode = ARM64ISD::UQSHL_I; - IsRightShift = false; - break; - case Intrinsic::arm64_neon_srshl: - Opcode = ARM64ISD::SRSHR_I; - IsRightShift = true; - break; - case Intrinsic::arm64_neon_urshl: - Opcode = ARM64ISD::URSHR_I; - IsRightShift = true; - break; - case Intrinsic::arm64_neon_sqshlu: - Opcode = ARM64ISD::SQSHLU_I; - IsRightShift = false; - break; - } - - if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) - return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), - DAG.getConstant(-ShiftAmount, MVT::i32)); - else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits) - return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), - DAG.getConstant(ShiftAmount, MVT::i32)); - - return SDValue(); -} - -// The CRC32[BH] instructions ignore the high bits of their data operand. Since -// the intrinsics must be legal and take an i32, this means there's almost -// certainly going to be a zext in the DAG which we can eliminate. -static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) { - SDValue AndN = N->getOperand(2); - if (AndN.getOpcode() != ISD::AND) - return SDValue(); - - ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1)); - if (!CMask || CMask->getZExtValue() != Mask) - return SDValue(); - - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32, - N->getOperand(0), N->getOperand(1), AndN.getOperand(0)); -} - -static SDValue performIntrinsicCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; - unsigned IID = getIntrinsicID(N); - switch (IID) { - default: - break; - case Intrinsic::arm64_neon_vcvtfxs2fp: - case Intrinsic::arm64_neon_vcvtfxu2fp: - return tryCombineFixedPointConvert(N, DCI, DAG); - break; - case Intrinsic::arm64_neon_fmax: - return DAG.getNode(ARM64ISD::FMAX, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); - case Intrinsic::arm64_neon_fmin: - return DAG.getNode(ARM64ISD::FMIN, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); - case Intrinsic::arm64_neon_smull: - case Intrinsic::arm64_neon_umull: - case Intrinsic::arm64_neon_pmull: - case Intrinsic::arm64_neon_sqdmull: - return tryCombineLongOpWithDup(IID, N, DCI, DAG); - case Intrinsic::arm64_neon_sqshl: - case Intrinsic::arm64_neon_uqshl: - case Intrinsic::arm64_neon_sqshlu: - case Intrinsic::arm64_neon_srshl: - case Intrinsic::arm64_neon_urshl: - return tryCombineShiftImm(IID, N, DAG); - case Intrinsic::arm64_crc32b: - case Intrinsic::arm64_crc32cb: - return tryCombineCRC32(0xff, N, DAG); - case Intrinsic::arm64_crc32h: - case Intrinsic::arm64_crc32ch: - return tryCombineCRC32(0xffff, N, DAG); - } - return SDValue(); -} - -static SDValue performExtendCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then - // we can convert that DUP into another extract_high (of a bigger DUP), which - // helps the backend to decide that an sabdl2 would be useful, saving a real - // extract_high operation. - if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND && - N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) { - SDNode *ABDNode = N->getOperand(0).getNode(); - unsigned IID = getIntrinsicID(ABDNode); - if (IID == Intrinsic::arm64_neon_sabd || - IID == Intrinsic::arm64_neon_uabd) { - SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG); - if (!NewABD.getNode()) - return SDValue(); - - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), - NewABD); - } - } - - // This is effectively a custom type legalization for ARM64. - // - // Type legalization will split an extend of a small, legal, type to a larger - // illegal type by first splitting the destination type, often creating - // illegal source types, which then get legalized in isel-confusing ways, - // leading to really terrible codegen. E.g., - // %result = v8i32 sext v8i8 %value - // becomes - // %losrc = extract_subreg %value, ... - // %hisrc = extract_subreg %value, ... - // %lo = v4i32 sext v4i8 %losrc - // %hi = v4i32 sext v4i8 %hisrc - // Things go rapidly downhill from there. - // - // For ARM64, the [sz]ext vector instructions can only go up one element - // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32 - // take two instructions. - // - // This implies that the most efficient way to do the extend from v8i8 - // to two v4i32 values is to first extend the v8i8 to v8i16, then do - // the normal splitting to happen for the v8i16->v8i32. - - // This is pre-legalization to catch some cases where the default - // type legalization will create ill-tempered code. - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - - // We're only interested in cleaning things up for non-legal vector types - // here. If both the source and destination are legal, things will just - // work naturally without any fiddling. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT ResVT = N->getValueType(0); - if (!ResVT.isVector() || TLI.isTypeLegal(ResVT)) - return SDValue(); - // If the vector type isn't a simple VT, it's beyond the scope of what - // we're worried about here. Let legalization do its thing and hope for - // the best. - if (!ResVT.isSimple()) - return SDValue(); - - SDValue Src = N->getOperand(0); - MVT SrcVT = Src->getValueType(0).getSimpleVT(); - // If the source VT is a 64-bit vector, we can play games and get the - // better results we want. - if (SrcVT.getSizeInBits() != 64) - return SDValue(); - - unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); - unsigned ElementCount = SrcVT.getVectorNumElements(); - SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount); - SDLoc DL(N); - Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src); - - // Now split the rest of the operation into two halves, each with a 64 - // bit source. - EVT LoVT, HiVT; - SDValue Lo, Hi; - unsigned NumElements = ResVT.getVectorNumElements(); - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), - ResVT.getVectorElementType(), NumElements / 2); - - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi); - - // Now combine the parts back together so we still have a single result - // like the combiner expects. - return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); -} - -/// Replace a splat of a scalar to a vector store by scalar stores of the scalar -/// value. The load store optimizer pass will merge them to store pair stores. -/// This has better performance than a splat of the scalar followed by a split -/// vector store. Even if the stores are not merged it is four stores vs a dup, -/// followed by an ext.b and two stores. -static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) { - SDValue StVal = St->getValue(); - EVT VT = StVal.getValueType(); - - // Don't replace floating point stores, they possibly won't be transformed to - // stp because of the store pair suppress pass. - if (VT.isFloatingPoint()) - return SDValue(); - - // Check for insert vector elements. - if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT) - return SDValue(); - - // We can express a splat as store pair(s) for 2 or 4 elements. - unsigned NumVecElts = VT.getVectorNumElements(); - if (NumVecElts != 4 && NumVecElts != 2) - return SDValue(); - SDValue SplatVal = StVal.getOperand(1); - unsigned RemainInsertElts = NumVecElts - 1; - - // Check that this is a splat. - while (--RemainInsertElts) { - SDValue NextInsertElt = StVal.getOperand(0); - if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT) - return SDValue(); - if (NextInsertElt.getOperand(1) != SplatVal) - return SDValue(); - StVal = NextInsertElt; - } - unsigned OrigAlignment = St->getAlignment(); - unsigned EltOffset = NumVecElts == 4 ? 4 : 8; - unsigned Alignment = std::min(OrigAlignment, EltOffset); - - // Create scalar stores. This is at least as good as the code sequence for a - // split unaligned store wich is a dup.s, ext.b, and two stores. - // Most of the time the three stores should be replaced by store pair - // instructions (stp). - SDLoc DL(St); - SDValue BasePtr = St->getBasePtr(); - SDValue NewST1 = - DAG.getStore(St->getChain(), DL, SplatVal, BasePtr, St->getPointerInfo(), - St->isVolatile(), St->isNonTemporal(), St->getAlignment()); - - unsigned Offset = EltOffset; - while (--NumVecElts) { - SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, - DAG.getConstant(Offset, MVT::i64)); - NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr, - St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), Alignment); - Offset += EltOffset; - } - return NewST1; -} - -static SDValue performSTORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG, - const ARM64Subtarget *Subtarget) { - if (!DCI.isBeforeLegalize()) - return SDValue(); - - StoreSDNode *S = cast<StoreSDNode>(N); - if (S->isVolatile()) - return SDValue(); - - // Cyclone has bad performance on unaligned 16B stores when crossing line and - // page boundries. We want to split such stores. - if (!Subtarget->isCyclone()) - return SDValue(); - - // Don't split at Oz. - MachineFunction &MF = DAG.getMachineFunction(); - bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize); - if (IsMinSize) - return SDValue(); - - SDValue StVal = S->getValue(); - EVT VT = StVal.getValueType(); - - // Don't split v2i64 vectors. Memcpy lowering produces those and splitting - // those up regresses performance on micro-benchmarks and olden/bh. - if (!VT.isVector() || VT.getVectorNumElements() < 2 || VT == MVT::v2i64) - return SDValue(); - - // Split unaligned 16B stores. They are terrible for performance. - // Don't split stores with alignment of 1 or 2. Code that uses clang vector - // extensions can use this to mark that it does not want splitting to happen - // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of - // eliminating alignment hazards is only 1 in 8 for alignment of 2. - if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 || - S->getAlignment() <= 2) - return SDValue(); - - // If we get a splat of a scalar convert this vector store to a store of - // scalars. They will be merged into store pairs thereby removing two - // instructions. - SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S); - if (ReplacedSplat != SDValue()) - return ReplacedSplat; - - SDLoc DL(S); - unsigned NumElts = VT.getVectorNumElements() / 2; - // Split VT into two. - EVT HalfVT = - EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts); - SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, - DAG.getIntPtrConstant(0)); - SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, - DAG.getIntPtrConstant(NumElts)); - SDValue BasePtr = S->getBasePtr(); - SDValue NewST1 = - DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(), - S->isVolatile(), S->isNonTemporal(), S->getAlignment()); - SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, - DAG.getConstant(8, MVT::i64)); - return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr, - S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(), - S->getAlignment()); -} - -// Optimize compare with zero and branch. -static SDValue performBRCONDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - SDValue Chain = N->getOperand(0); - SDValue Dest = N->getOperand(1); - SDValue CCVal = N->getOperand(2); - SDValue Cmp = N->getOperand(3); - - assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!"); - unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue(); - if (CC != ARM64CC::EQ && CC != ARM64CC::NE) - return SDValue(); - - unsigned CmpOpc = Cmp.getOpcode(); - if (CmpOpc != ARM64ISD::ADDS && CmpOpc != ARM64ISD::SUBS) - return SDValue(); - - // Only attempt folding if there is only one use of the flag and no use of the - // value. - if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1)) - return SDValue(); - - SDValue LHS = Cmp.getOperand(0); - SDValue RHS = Cmp.getOperand(1); - - assert(LHS.getValueType() == RHS.getValueType() && - "Expected the value type to be the same for both operands!"); - if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) - return SDValue(); - - if (isa<ConstantSDNode>(LHS) && cast<ConstantSDNode>(LHS)->isNullValue()) - std::swap(LHS, RHS); - - if (!isa<ConstantSDNode>(RHS) || !cast<ConstantSDNode>(RHS)->isNullValue()) - return SDValue(); - - if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA || - LHS.getOpcode() == ISD::SRL) - return SDValue(); - - // Fold the compare into the branch instruction. - SDValue BR; - if (CC == ARM64CC::EQ) - BR = DAG.getNode(ARM64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); - else - BR = DAG.getNode(ARM64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); - - // Do not add new nodes to DAG combiner worklist. - DCI.CombineTo(N, BR, false); - - return SDValue(); -} - -SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - SelectionDAG &DAG = DCI.DAG; - switch (N->getOpcode()) { - default: - break; - case ISD::ADD: - case ISD::SUB: - return performAddSubLongCombine(N, DCI, DAG); - case ISD::XOR: - return performXorCombine(N, DAG, DCI, Subtarget); - case ISD::MUL: - return performMulCombine(N, DAG, DCI, Subtarget); - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - return performIntToFpCombine(N, DAG); - case ISD::OR: - return performORCombine(N, DCI, Subtarget); - case ISD::INTRINSIC_WO_CHAIN: - return performIntrinsicCombine(N, DCI, Subtarget); - case ISD::ANY_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - return performExtendCombine(N, DCI, DAG); - case ISD::BITCAST: - return performBitcastCombine(N, DCI, DAG); - case ISD::CONCAT_VECTORS: - return performConcatVectorsCombine(N, DCI, DAG); - case ISD::STORE: - return performSTORECombine(N, DCI, DAG, Subtarget); - case ARM64ISD::BRCOND: - return performBRCONDCombine(N, DCI, DAG); - } - return SDValue(); -} - -// Check if the return value is used as only a return value, as otherwise -// we can't perform a tail-call. In particular, we need to check for -// target ISD nodes that are returns and any other "odd" constructs -// that the generic analysis code won't necessarily catch. -bool ARM64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { - if (N->getNumValues() != 1) - return false; - if (!N->hasNUsesOfValue(1, 0)) - return false; - - SDValue TCChain = Chain; - SDNode *Copy = *N->use_begin(); - if (Copy->getOpcode() == ISD::CopyToReg) { - // If the copy has a glue operand, we conservatively assume it isn't safe to - // perform a tail call. - if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == - MVT::Glue) - return false; - TCChain = Copy->getOperand(0); - } else if (Copy->getOpcode() != ISD::FP_EXTEND) - return false; - - bool HasRet = false; - for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); - UI != UE; ++UI) { - if (UI->getOpcode() != ARM64ISD::RET_FLAG) - return false; - HasRet = true; - } - - if (!HasRet) - return false; - - Chain = TCChain; - return true; -} - -// Return whether the an instruction can potentially be optimized to a tail -// call. This will cause the optimizers to attempt to move, or duplicate, -// return instructions to help enable tail call optimizations for this -// instruction. -bool ARM64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!EnableARM64TailCalls) - return false; - - if (!CI->isTailCall()) - return false; - - return true; -} - -bool ARM64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - bool &IsInc, - SelectionDAG &DAG) const { - if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) - return false; - - Base = Op->getOperand(0); - // All of the indexed addressing mode instructions take a signed - // 9 bit immediate offset. - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { - int64_t RHSC = (int64_t)RHS->getZExtValue(); - if (RHSC >= 256 || RHSC <= -256) - return false; - IsInc = (Op->getOpcode() == ISD::ADD); - Offset = Op->getOperand(1); - return true; - } - return false; -} - -bool ARM64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const { - EVT VT; - SDValue Ptr; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { - VT = LD->getMemoryVT(); - Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { - VT = ST->getMemoryVT(); - Ptr = ST->getBasePtr(); - } else - return false; - - bool IsInc; - if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG)) - return false; - AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC; - return true; -} - -bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, - SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const { - EVT VT; - SDValue Ptr; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { - VT = LD->getMemoryVT(); - Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { - VT = ST->getMemoryVT(); - Ptr = ST->getBasePtr(); - } else - return false; - - bool IsInc; - if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG)) - return false; - // Post-indexing updates the base, so it's not a valid transform - // if that's not the same as the load's pointer. - if (Ptr != Base) - return false; - AM = IsInc ? ISD::POST_INC : ISD::POST_DEC; - return true; -} - -/// The only 128-bit atomic operation is an stxp that succeeds. In particular -/// neither ldp nor ldxp are atomic. So the canonical sequence for an atomic -/// load is: -/// loop: -/// ldxp x0, x1, [x8] -/// stxp w2, x0, x1, [x8] -/// cbnz w2, loop -/// If the stxp succeeds then the ldxp managed to get both halves without an -/// intervening stxp from a different thread and the read was atomic. -static void ReplaceATOMIC_LOAD_128(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) { - SDLoc DL(N); - AtomicSDNode *AN = cast<AtomicSDNode>(N); - EVT VT = AN->getMemoryVT(); - SDValue Zero = DAG.getConstant(0, VT); - - // FIXME: Really want ATOMIC_LOAD_NOP but that doesn't fit into the existing - // scheme very well. Given the complexity of what we're already generating, an - // extra couple of ORRs probably won't make much difference. - SDValue Result = DAG.getAtomic(ISD::ATOMIC_LOAD_OR, DL, AN->getMemoryVT(), - N->getOperand(0), N->getOperand(1), Zero, - AN->getMemOperand(), AN->getOrdering(), - AN->getSynchScope()); - - Results.push_back(Result.getValue(0)); // Value - Results.push_back(Result.getValue(1)); // Chain -} - -static void ReplaceATOMIC_OP_128(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG, unsigned NewOp) { - SDLoc DL(N); - AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - assert(N->getValueType(0) == MVT::i128 && - "Only know how to expand i128 atomics"); - - SmallVector<SDValue, 6> Ops; - Ops.push_back(N->getOperand(1)); // Ptr - // Low part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(2), DAG.getIntPtrConstant(0))); - // High part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(2), DAG.getIntPtrConstant(1))); - if (NewOp == ARM64::ATOMIC_CMP_SWAP_I128) { - // Low part of Val2 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(3), DAG.getIntPtrConstant(0))); - // High part of Val2 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(3), DAG.getIntPtrConstant(1))); - } - - Ops.push_back(DAG.getTargetConstant(Ordering, MVT::i32)); - Ops.push_back(N->getOperand(0)); // Chain - - SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other); - SDNode *Result = DAG.getMachineNode(NewOp, DL, Tys, Ops); - SDValue OpsF[] = { SDValue(Result, 0), SDValue(Result, 1) }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, OpsF, 2)); - Results.push_back(SDValue(Result, 2)); -} - -void ARM64TargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const { - switch (N->getOpcode()) { - default: - llvm_unreachable("Don't know how to custom expand this"); - case ISD::ATOMIC_LOAD: - ReplaceATOMIC_LOAD_128(N, Results, DAG); - return; - case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_ADD_I128); - return; - case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_SUB_I128); - return; - case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_AND_I128); - return; - case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_OR_I128); - return; - case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_XOR_I128); - return; - case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_NAND_I128); - return; - case ISD::ATOMIC_SWAP: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_SWAP_I128); - return; - case ISD::ATOMIC_LOAD_MIN: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MIN_I128); - return; - case ISD::ATOMIC_LOAD_MAX: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MAX_I128); - return; - case ISD::ATOMIC_LOAD_UMIN: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMIN_I128); - return; - case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMAX_I128); - return; - case ISD::ATOMIC_CMP_SWAP: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_CMP_SWAP_I128); - return; - case ISD::FP_TO_UINT: - case ISD::FP_TO_SINT: - assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion"); - // Let normal code take care of it by not adding anything to Results. - return; - } -} diff --git a/lib/Target/ARM64/ARM64ISelLowering.h b/lib/Target/ARM64/ARM64ISelLowering.h deleted file mode 100644 index a4664ac..0000000 --- a/lib/Target/ARM64/ARM64ISelLowering.h +++ /dev/null @@ -1,422 +0,0 @@ -//==-- ARM64ISelLowering.h - ARM64 DAG Lowering Interface --------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that ARM64 uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ARM64_ISELLOWERING_H -#define LLVM_TARGET_ARM64_ISELLOWERING_H - -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/Target/TargetLowering.h" - -namespace llvm { - -namespace ARM64ISD { - -enum { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. - CALL, // Function call. - - // Almost the same as a normal call node, except that a TLSDesc relocation is - // needed so the linker can relax it correctly if possible. - TLSDESC_CALL, - ADRP, // Page address of a TargetGlobalAddress operand. - ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. - LOADgot, // Load from automatically generated descriptor (e.g. Global - // Offset Table, TLS record). - RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. - BRCOND, // Conditional branch instruction; "b.cond". - CSEL, - FCSEL, // Conditional move instruction. - CSINV, // Conditional select invert. - CSNEG, // Conditional select negate. - CSINC, // Conditional select increment. - - // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on - // ELF. - THREAD_POINTER, - ADC, - SBC, // adc, sbc instructions - - // Arithmetic instructions which write flags. - ADDS, - SUBS, - ADCS, - SBCS, - ANDS, - - // Floating point comparison - FCMP, - - // Floating point max and min instructions. - FMAX, - FMIN, - - // Scalar extract - EXTR, - - // Scalar-to-vector duplication - DUP, - DUPLANE8, - DUPLANE16, - DUPLANE32, - DUPLANE64, - - // Vector immedate moves - MOVI, - MOVIshift, - MOVIedit, - MOVImsl, - FMOV, - MVNIshift, - MVNImsl, - - // Vector immediate ops - BICi, - ORRi, - - // Vector arithmetic negation - NEG, - - // Vector shuffles - ZIP1, - ZIP2, - UZP1, - UZP2, - TRN1, - TRN2, - REV16, - REV32, - REV64, - EXT, - - // Vector shift by scalar - VSHL, - VLSHR, - VASHR, - - // Vector shift by scalar (again) - SQSHL_I, - UQSHL_I, - SQSHLU_I, - SRSHR_I, - URSHR_I, - - // Vector comparisons - CMEQ, - CMGE, - CMGT, - CMHI, - CMHS, - FCMEQ, - FCMGE, - FCMGT, - - // Vector zero comparisons - CMEQz, - CMGEz, - CMGTz, - CMLEz, - CMLTz, - FCMEQz, - FCMGEz, - FCMGTz, - FCMLEz, - FCMLTz, - - // Vector bitwise negation - NOT, - - // Vector bitwise selection - BIT, - - // Compare-and-branch - CBZ, - CBNZ, - TBZ, - TBNZ, - - // Tail calls - TC_RETURN, - - // Custom prefetch handling - PREFETCH, - - // {s|u}int to FP within a FP register. - SITOF, - UITOF -}; - -} // end namespace ARM64ISD - -class ARM64Subtarget; -class ARM64TargetMachine; - -class ARM64TargetLowering : public TargetLowering { - bool RequireStrictAlign; - -public: - explicit ARM64TargetLowering(ARM64TargetMachine &TM); - - /// Selects the correct CCAssignFn for a the given CallingConvention - /// value. - CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; - - /// computeMaskedBitsForTargetNode - Determine which of the bits specified in - /// Mask are known to be either zero or one and return them in the - /// KnownZero/KnownOne bitsets. - void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero, - APInt &KnownOne, const SelectionDAG &DAG, - unsigned Depth = 0) const; - - MVT getScalarShiftAmountTy(EVT LHSTy) const override; - - /// allowsUnalignedMemoryAccesses - Returns true if the target allows - /// unaligned memory accesses. of the specified type. - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0, - bool *Fast = 0) const override { - if (RequireStrictAlign) - return false; - // FIXME: True for Cyclone, but not necessary others. - if (Fast) - *Fast = true; - return true; - } - - /// LowerOperation - Provide custom lowering hooks for some operations. - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - - const char *getTargetNodeName(unsigned Opcode) const override; - - SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - - /// getFunctionAlignment - Return the Log2 alignment of this function. - unsigned getFunctionAlignment(const Function *F) const; - - /// getMaximalGlobalOffset - Returns the maximal possible offset which can - /// be used for loads / stores from the global. - unsigned getMaximalGlobalOffset() const override; - - /// Returns true if a cast between SrcAS and DestAS is a noop. - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. - return true; - } - - /// createFastISel - This method returns a target specific FastISel object, - /// or null if the target does not support "fast" ISel. - FastISel *createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) const override; - - bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - - bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; - - /// isShuffleMaskLegal - Return true if the given shuffle mask can be - /// codegen'd directly, or if it should be stack expanded. - bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override; - - /// getSetCCResultType - Return the ISD::SETCC ValueType - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; - - SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; - - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode) const; - MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size) const; - MachineBasicBlock *EmitAtomicBinary128(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned BinOpcodeLo, - unsigned BinOpcodeHi) const; - MachineBasicBlock *EmitAtomicCmpSwap128(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitAtomicMinMax128(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned CondCode) const; - MachineBasicBlock *EmitF128CSEL(MachineInstr *MI, - MachineBasicBlock *BB) const; - - MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const override; - - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, - unsigned Intrinsic) const override; - - bool isTruncateFree(Type *Ty1, Type *Ty2) const override; - bool isTruncateFree(EVT VT1, EVT VT2) const override; - - bool isZExtFree(Type *Ty1, Type *Ty2) const override; - bool isZExtFree(EVT VT1, EVT VT2) const override; - bool isZExtFree(SDValue Val, EVT VT2) const override; - - bool hasPairedLoad(Type *LoadedType, - unsigned &RequiredAligment) const override; - bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override; - - bool isLegalAddImmediate(int64_t) const override; - bool isLegalICmpImmediate(int64_t) const override; - - EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - MachineFunction &MF) const override; - - /// isLegalAddressingMode - Return true if the addressing mode represented - /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; - - /// \brief Return the cost of the scaling factor used in the addressing - /// mode represented by AM for this target, for a load/store - /// of the specified type. - /// If the AM is supported, the return value must be >= 0. - /// If the AM is not supported, it returns a negative value. - int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; - - /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster - /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be - /// expanded to FMAs when this method returns true, otherwise fmuladd is - /// expanded to fmul + fadd. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; - - const uint16_t *getScratchRegisters(CallingConv::ID CC) const override; - - bool shouldConvertConstantLoadToIntImm(const APInt &Imm, - Type *Ty) const override; - -private: - /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can - /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; - - void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT); - void addDRTypeForNEON(MVT VT); - void addQRTypeForNEON(MVT VT); - - SDValue - LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, - SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const override; - - SDValue LowerCall(CallLoweringInfo & /*CLI*/, - SmallVectorImpl<SDValue> &InVals) const override; - - SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, - SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, - bool isThisReturn, SDValue ThisVal) const; - - bool isEligibleForTailCallOptimization( - SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - bool isCalleeStructRet, bool isCallerStructRet, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; - - void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL, - SDValue &Chain) const; - - bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - LLVMContext &Context) const override; - - SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, SDLoc DL, - SelectionDAG &DAG) const override; - - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const; - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const; - SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; - - ConstraintType getConstraintType(const std::string &Constraint) const; - - /// Examine constraint string and operand type and determine a weight value. - /// The operand object must already have been set up with the operand type. - ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, - const char *constraint) const; - - std::pair<unsigned, const TargetRegisterClass *> - getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; - void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, - std::vector<SDValue> &Ops, - SelectionDAG &DAG) const; - - bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; - bool mayBeEmittedAsTailCall(CallInst *CI) const; - bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, - ISD::MemIndexedMode &AM, bool &IsInc, - SelectionDAG &DAG) const; - bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const; - bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, - SDValue &Offset, ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const; - - void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const; -}; - -namespace ARM64 { -FastISel *createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo); -} // end namespace ARM64 - -} // end namespace llvm - -#endif // LLVM_TARGET_ARM64_ISELLOWERING_H diff --git a/lib/Target/ARM64/ARM64InstrAtomics.td b/lib/Target/ARM64/ARM64InstrAtomics.td deleted file mode 100644 index 0d36e06..0000000 --- a/lib/Target/ARM64/ARM64InstrAtomics.td +++ /dev/null @@ -1,293 +0,0 @@ -//===- ARM64InstrAtomics.td - ARM64 Atomic codegen support -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// ARM64 Atomic operand code-gen constructs. -// -//===----------------------------------------------------------------------===// - -//===---------------------------------- -// Atomic fences -//===---------------------------------- -def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>; -def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; - -//===---------------------------------- -// Atomic loads -//===---------------------------------- - -// When they're actually atomic, only one addressing mode (GPR64sp) is -// supported, but when they're relaxed and anything can be used, all the -// standard modes would be valid and may give efficiency gains. - -// A atomic load operation that actually needs acquire semantics. -class acquiring_load<PatFrag base> - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - assert(Ordering != AcquireRelease && "unexpected load ordering"); - return Ordering == Acquire || Ordering == SequentiallyConsistent; -}]>; - -// An atomic load operation that does not need either acquire or release -// semantics. -class relaxed_load<PatFrag base> - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - return Ordering == Monotonic || Ordering == Unordered; -}]>; - -// 8-bit loads -def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; -def : Pat<(relaxed_load<atomic_load_8> ro_indexed8:$addr), - (LDRBBro ro_indexed8:$addr)>; -def : Pat<(relaxed_load<atomic_load_8> am_indexed8:$addr), - (LDRBBui am_indexed8:$addr)>; -def : Pat<(relaxed_load<atomic_load_8> am_unscaled8:$addr), - (LDURBBi am_unscaled8:$addr)>; - -// 16-bit loads -def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; -def : Pat<(relaxed_load<atomic_load_16> ro_indexed16:$addr), - (LDRHHro ro_indexed16:$addr)>; -def : Pat<(relaxed_load<atomic_load_16> am_indexed16:$addr), - (LDRHHui am_indexed16:$addr)>; -def : Pat<(relaxed_load<atomic_load_16> am_unscaled16:$addr), - (LDURHHi am_unscaled16:$addr)>; - -// 32-bit loads -def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>; -def : Pat<(relaxed_load<atomic_load_32> ro_indexed32:$addr), - (LDRWro ro_indexed32:$addr)>; -def : Pat<(relaxed_load<atomic_load_32> am_indexed32:$addr), - (LDRWui am_indexed32:$addr)>; -def : Pat<(relaxed_load<atomic_load_32> am_unscaled32:$addr), - (LDURWi am_unscaled32:$addr)>; - -// 64-bit loads -def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>; -def : Pat<(relaxed_load<atomic_load_64> ro_indexed64:$addr), - (LDRXro ro_indexed64:$addr)>; -def : Pat<(relaxed_load<atomic_load_64> am_indexed64:$addr), - (LDRXui am_indexed64:$addr)>; -def : Pat<(relaxed_load<atomic_load_64> am_unscaled64:$addr), - (LDURXi am_unscaled64:$addr)>; - -//===---------------------------------- -// Atomic stores -//===---------------------------------- - -// When they're actually atomic, only one addressing mode (GPR64sp) is -// supported, but when they're relaxed and anything can be used, all the -// standard modes would be valid and may give efficiency gains. - -// A store operation that actually needs release semantics. -class releasing_store<PatFrag base> - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - assert(Ordering != AcquireRelease && "unexpected store ordering"); - return Ordering == Release || Ordering == SequentiallyConsistent; -}]>; - -// An atomic store operation that doesn't actually need to be atomic on ARM64. -class relaxed_store<PatFrag base> - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - return Ordering == Monotonic || Ordering == Unordered; -}]>; - -// 8-bit stores -def : Pat<(releasing_store<atomic_store_8> GPR64sp:$ptr, GPR32:$val), - (STLRB GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store<atomic_store_8> ro_indexed8:$ptr, GPR32:$val), - (STRBBro GPR32:$val, ro_indexed8:$ptr)>; -def : Pat<(relaxed_store<atomic_store_8> am_indexed8:$ptr, GPR32:$val), - (STRBBui GPR32:$val, am_indexed8:$ptr)>; -def : Pat<(relaxed_store<atomic_store_8> am_unscaled8:$ptr, GPR32:$val), - (STURBBi GPR32:$val, am_unscaled8:$ptr)>; - -// 16-bit stores -def : Pat<(releasing_store<atomic_store_16> GPR64sp:$ptr, GPR32:$val), - (STLRH GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store<atomic_store_16> ro_indexed16:$ptr, GPR32:$val), - (STRHHro GPR32:$val, ro_indexed16:$ptr)>; -def : Pat<(relaxed_store<atomic_store_16> am_indexed16:$ptr, GPR32:$val), - (STRHHui GPR32:$val, am_indexed16:$ptr)>; -def : Pat<(relaxed_store<atomic_store_16> am_unscaled16:$ptr, GPR32:$val), - (STURHHi GPR32:$val, am_unscaled16:$ptr)>; - -// 32-bit stores -def : Pat<(releasing_store<atomic_store_32> GPR64sp:$ptr, GPR32:$val), - (STLRW GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store<atomic_store_32> ro_indexed32:$ptr, GPR32:$val), - (STRWro GPR32:$val, ro_indexed32:$ptr)>; -def : Pat<(relaxed_store<atomic_store_32> am_indexed32:$ptr, GPR32:$val), - (STRWui GPR32:$val, am_indexed32:$ptr)>; -def : Pat<(relaxed_store<atomic_store_32> am_unscaled32:$ptr, GPR32:$val), - (STURWi GPR32:$val, am_unscaled32:$ptr)>; - -// 64-bit stores -def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val), - (STLRX GPR64:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store<atomic_store_64> ro_indexed64:$ptr, GPR64:$val), - (STRXro GPR64:$val, ro_indexed64:$ptr)>; -def : Pat<(relaxed_store<atomic_store_64> am_indexed64:$ptr, GPR64:$val), - (STRXui GPR64:$val, am_indexed64:$ptr)>; -def : Pat<(relaxed_store<atomic_store_64> am_unscaled64:$ptr, GPR64:$val), - (STURXi GPR64:$val, am_unscaled64:$ptr)>; - -//===---------------------------------- -// Atomic read-modify-write operations -//===---------------------------------- - -// More complicated operations need lots of C++ support, so we just create -// skeletons here for the C++ code to refer to. - -let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in { -multiclass AtomicSizes { - def _I8 : Pseudo<(outs GPR32:$dst), - (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I16 : Pseudo<(outs GPR32:$dst), - (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I32 : Pseudo<(outs GPR32:$dst), - (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I64 : Pseudo<(outs GPR64:$dst), - (ins GPR64sp:$ptr, GPR64:$incr, i32imm:$ordering), []>; - def _I128 : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi), - (ins GPR64sp:$ptr, GPR64:$incrlo, GPR64:$incrhi, - i32imm:$ordering), []>; -} -} - -defm ATOMIC_LOAD_ADD : AtomicSizes; -defm ATOMIC_LOAD_SUB : AtomicSizes; -defm ATOMIC_LOAD_AND : AtomicSizes; -defm ATOMIC_LOAD_OR : AtomicSizes; -defm ATOMIC_LOAD_XOR : AtomicSizes; -defm ATOMIC_LOAD_NAND : AtomicSizes; -defm ATOMIC_SWAP : AtomicSizes; -let Defs = [CPSR] in { - // These operations need a CMP to calculate the correct value - defm ATOMIC_LOAD_MIN : AtomicSizes; - defm ATOMIC_LOAD_MAX : AtomicSizes; - defm ATOMIC_LOAD_UMIN : AtomicSizes; - defm ATOMIC_LOAD_UMAX : AtomicSizes; -} - -class AtomicCmpSwap<RegisterClass GPRData> - : Pseudo<(outs GPRData:$dst), - (ins GPR64sp:$ptr, GPRData:$old, GPRData:$new, - i32imm:$ordering), []> { - let usesCustomInserter = 1; - let hasCtrlDep = 1; - let mayLoad = 1; - let mayStore = 1; - let Defs = [CPSR]; -} - -def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>; -def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>; -def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>; -def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>; - -def ATOMIC_CMP_SWAP_I128 - : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi), - (ins GPR64sp:$ptr, GPR64:$oldlo, GPR64:$oldhi, - GPR64:$newlo, GPR64:$newhi, i32imm:$ordering), []> { - let usesCustomInserter = 1; - let hasCtrlDep = 1; - let mayLoad = 1; - let mayStore = 1; - let Defs = [CPSR]; -} - -//===---------------------------------- -// Low-level exclusive operations -//===---------------------------------- - -// Load-exclusives. - -def ldxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ - return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; -}]>; - -def ldxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ - return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; -}]>; - -def ldxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ - return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; -}]>; - -def ldxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ - return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; -}]>; - -def : Pat<(ldxr_1 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>; -def : Pat<(ldxr_2 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>; -def : Pat<(ldxr_4 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>; -def : Pat<(ldxr_8 am_noindex:$addr), (LDXRX am_noindex:$addr)>; - -def : Pat<(and (ldxr_1 am_noindex:$addr), 0xff), - (SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>; -def : Pat<(and (ldxr_2 am_noindex:$addr), 0xffff), - (SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>; -def : Pat<(and (ldxr_4 am_noindex:$addr), 0xffffffff), - (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>; - -// Store-exclusives. - -def stxr_1 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ - return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; -}]>; - -def stxr_2 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ - return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; -}]>; - -def stxr_4 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ - return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; -}]>; - -def stxr_8 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ - return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; -}]>; - -def : Pat<(stxr_1 GPR64:$val, am_noindex:$addr), - (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_2 GPR64:$val, am_noindex:$addr), - (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_4 GPR64:$val, am_noindex:$addr), - (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_8 GPR64:$val, am_noindex:$addr), - (STXRX GPR64:$val, am_noindex:$addr)>; - -def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), am_noindex:$addr), - (STXRB GPR32:$val, am_noindex:$addr)>; -def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), am_noindex:$addr), - (STXRH GPR32:$val, am_noindex:$addr)>; -def : Pat<(stxr_4 (zext GPR32:$val), am_noindex:$addr), - (STXRW GPR32:$val, am_noindex:$addr)>; - -def : Pat<(stxr_1 (and GPR64:$val, 0xff), am_noindex:$addr), - (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_2 (and GPR64:$val, 0xffff), am_noindex:$addr), - (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), am_noindex:$addr), - (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; - - -// And clear exclusive. - -def : Pat<(int_arm64_clrex), (CLREX 0xf)>; diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td deleted file mode 100644 index 38406f8..0000000 --- a/lib/Target/ARM64/ARM64InstrFormats.td +++ /dev/null @@ -1,8193 +0,0 @@ -//===- ARM64InstrFormats.td - ARM64 Instruction Formats ------*- tblgen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Describe ARM64 instructions format here -// - -// Format specifies the encoding used by the instruction. This is part of the -// ad-hoc solution used to emit machine instruction encodings by our machine -// code emitter. -class Format<bits<2> val> { - bits<2> Value = val; -} - -def PseudoFrm : Format<0>; -def NormalFrm : Format<1>; // Do we need any others? - -// ARM64 Instruction Format -class ARM64Inst<Format f, string cstr> : Instruction { - field bits<32> Inst; // Instruction encoding. - // Mask of bits that cause an encoding to be UNPREDICTABLE. - // If a bit is set, then if the corresponding bit in the - // target encoding differs from its value in the "Inst" field, - // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). - field bits<32> Unpredictable = 0; - // SoftFail is the generic name for this field, but we alias it so - // as to make it more obvious what it means in ARM-land. - field bits<32> SoftFail = Unpredictable; - let Namespace = "ARM64"; - Format F = f; - bits<2> Form = F.Value; - let Pattern = []; - let Constraints = cstr; -} - -// Pseudo instructions (don't have encoding information) -class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = ""> - : ARM64Inst<PseudoFrm, cstr> { - dag OutOperandList = oops; - dag InOperandList = iops; - let Pattern = pattern; - let isCodeGenOnly = 1; -} - -// Real instructions (have encoding information) -class EncodedI<string cstr, list<dag> pattern> : ARM64Inst<NormalFrm, cstr> { - let Pattern = pattern; - let Size = 4; -} - -// Normal instructions -class I<dag oops, dag iops, string asm, string operands, string cstr, - list<dag> pattern> - : EncodedI<cstr, pattern> { - dag OutOperandList = oops; - dag InOperandList = iops; - let AsmString = !strconcat(asm, operands); -} - -class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>; -class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; -class UnOpFrag<dag res> : PatFrag<(ops node:$LHS), res>; - -// Helper fragment for an extract of the high portion of a 128-bit vector. -def extract_high_v16i8 : - UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>; -def extract_high_v8i16 : - UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>; -def extract_high_v4i32 : - UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>; -def extract_high_v2i64 : - UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>; - -//===----------------------------------------------------------------------===// -// Asm Operand Classes. -// - -// Shifter operand for arithmetic shifted encodings. -def ShifterOperand : AsmOperandClass { - let Name = "Shifter"; -} - -// Shifter operand for mov immediate encodings. -def MovImm32ShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "MovImm32Shifter"; -} -def MovImm64ShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "MovImm64Shifter"; -} - -// Shifter operand for arithmetic register shifted encodings. -def ArithmeticShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "ArithmeticShifter"; -} - -// Shifter operand for arithmetic shifted encodings for ADD/SUB instructions. -def AddSubShifterOperand : AsmOperandClass { - let SuperClasses = [ArithmeticShifterOperand]; - let Name = "AddSubShifter"; -} - -// Shifter operand for logical vector 128/64-bit shifted encodings. -def LogicalVecShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "LogicalVecShifter"; -} -def LogicalVecHalfWordShifterOperand : AsmOperandClass { - let SuperClasses = [LogicalVecShifterOperand]; - let Name = "LogicalVecHalfWordShifter"; -} - -// The "MSL" shifter on the vector MOVI instruction. -def MoveVecShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "MoveVecShifter"; -} - -// Extend operand for arithmetic encodings. -def ExtendOperand : AsmOperandClass { let Name = "Extend"; } -def ExtendOperand64 : AsmOperandClass { - let SuperClasses = [ExtendOperand]; - let Name = "Extend64"; -} -// 'extend' that's a lsl of a 64-bit register. -def ExtendOperandLSL64 : AsmOperandClass { - let SuperClasses = [ExtendOperand]; - let Name = "ExtendLSL64"; -} - -// 8-bit floating-point immediate encodings. -def FPImmOperand : AsmOperandClass { - let Name = "FPImm"; - let ParserMethod = "tryParseFPImm"; -} - -// 8-bit immediate for AdvSIMD where 64-bit values of the form: -// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh -// are encoded as the eight bit value 'abcdefgh'. -def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; } - - -//===----------------------------------------------------------------------===// -// Operand Definitions. -// - -// ADR[P] instruction labels. -def AdrpOperand : AsmOperandClass { - let Name = "AdrpLabel"; - let ParserMethod = "tryParseAdrpLabel"; -} -def adrplabel : Operand<i64> { - let EncoderMethod = "getAdrLabelOpValue"; - let PrintMethod = "printAdrpLabel"; - let ParserMatchClass = AdrpOperand; -} - -def AdrOperand : AsmOperandClass { - let Name = "AdrLabel"; - let ParserMethod = "tryParseAdrLabel"; -} -def adrlabel : Operand<i64> { - let EncoderMethod = "getAdrLabelOpValue"; - let ParserMatchClass = AdrOperand; -} - -// simm9 predicate - True if the immediate is in the range [-256, 255]. -def SImm9Operand : AsmOperandClass { - let Name = "SImm9"; - let DiagnosticType = "InvalidMemoryIndexedSImm9"; -} -def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> { - let ParserMatchClass = SImm9Operand; -} - -// simm7s4 predicate - True if the immediate is a multiple of 4 in the range -// [-256, 252]. -def SImm7s4Operand : AsmOperandClass { - let Name = "SImm7s4"; - let DiagnosticType = "InvalidMemoryIndexed32SImm7"; -} -def simm7s4 : Operand<i32> { - let ParserMatchClass = SImm7s4Operand; - let PrintMethod = "printImmScale4"; -} - -// simm7s8 predicate - True if the immediate is a multiple of 8 in the range -// [-512, 504]. -def SImm7s8Operand : AsmOperandClass { - let Name = "SImm7s8"; - let DiagnosticType = "InvalidMemoryIndexed64SImm7"; -} -def simm7s8 : Operand<i32> { - let ParserMatchClass = SImm7s8Operand; - let PrintMethod = "printImmScale8"; -} - -// simm7s16 predicate - True if the immediate is a multiple of 16 in the range -// [-1024, 1008]. -def SImm7s16Operand : AsmOperandClass { - let Name = "SImm7s16"; - let DiagnosticType = "InvalidMemoryIndexed64SImm7"; -} -def simm7s16 : Operand<i32> { - let ParserMatchClass = SImm7s16Operand; - let PrintMethod = "printImmScale16"; -} - -// imm0_65535 predicate - True if the immediate is in the range [0,65535]. -def Imm0_65535Operand : AsmOperandClass { let Name = "Imm0_65535"; } -def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ - return ((uint32_t)Imm) < 65536; -}]> { - let ParserMatchClass = Imm0_65535Operand; -} - -def Imm1_8Operand : AsmOperandClass { - let Name = "Imm1_8"; - let DiagnosticType = "InvalidImm1_8"; -} -def Imm1_16Operand : AsmOperandClass { - let Name = "Imm1_16"; - let DiagnosticType = "InvalidImm1_16"; -} -def Imm1_32Operand : AsmOperandClass { - let Name = "Imm1_32"; - let DiagnosticType = "InvalidImm1_32"; -} -def Imm1_64Operand : AsmOperandClass { - let Name = "Imm1_64"; - let DiagnosticType = "InvalidImm1_64"; -} - -def MovZSymbolG3AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG3"; - let RenderMethod = "addImmOperands"; -} - -def movz_symbol_g3 : Operand<i32> { - let ParserMatchClass = MovZSymbolG3AsmOperand; -} - -def MovZSymbolG2AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG2"; - let RenderMethod = "addImmOperands"; -} - -def movz_symbol_g2 : Operand<i32> { - let ParserMatchClass = MovZSymbolG2AsmOperand; -} - -def MovZSymbolG1AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG1"; - let RenderMethod = "addImmOperands"; -} - -def movz_symbol_g1 : Operand<i32> { - let ParserMatchClass = MovZSymbolG1AsmOperand; -} - -def MovZSymbolG0AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG0"; - let RenderMethod = "addImmOperands"; -} - -def movz_symbol_g0 : Operand<i32> { - let ParserMatchClass = MovZSymbolG0AsmOperand; -} - -def MovKSymbolG2AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG2"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g2 : Operand<i32> { - let ParserMatchClass = MovKSymbolG2AsmOperand; -} - -def MovKSymbolG1AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG1"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g1 : Operand<i32> { - let ParserMatchClass = MovKSymbolG1AsmOperand; -} - -def MovKSymbolG0AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG0"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g0 : Operand<i32> { - let ParserMatchClass = MovKSymbolG0AsmOperand; -} - -def fixedpoint32 : Operand<i32> { - let EncoderMethod = "getFixedPointScaleOpValue"; - let DecoderMethod = "DecodeFixedPointScaleImm"; - let ParserMatchClass = Imm1_32Operand; -} -def fixedpoint64 : Operand<i64> { - let EncoderMethod = "getFixedPointScaleOpValue"; - let DecoderMethod = "DecodeFixedPointScaleImm"; - let ParserMatchClass = Imm1_64Operand; -} - -def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); -}]> { - let EncoderMethod = "getVecShiftR8OpValue"; - let DecoderMethod = "DecodeVecShiftR8Imm"; - let ParserMatchClass = Imm1_8Operand; -} -def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); -}]> { - let EncoderMethod = "getVecShiftR16OpValue"; - let DecoderMethod = "DecodeVecShiftR16Imm"; - let ParserMatchClass = Imm1_16Operand; -} -def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); -}]> { - let EncoderMethod = "getVecShiftR16OpValue"; - let DecoderMethod = "DecodeVecShiftR16ImmNarrow"; - let ParserMatchClass = Imm1_8Operand; -} -def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); -}]> { - let EncoderMethod = "getVecShiftR32OpValue"; - let DecoderMethod = "DecodeVecShiftR32Imm"; - let ParserMatchClass = Imm1_32Operand; -} -def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); -}]> { - let EncoderMethod = "getVecShiftR32OpValue"; - let DecoderMethod = "DecodeVecShiftR32ImmNarrow"; - let ParserMatchClass = Imm1_16Operand; -} -def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65); -}]> { - let EncoderMethod = "getVecShiftR64OpValue"; - let DecoderMethod = "DecodeVecShiftR64Imm"; - let ParserMatchClass = Imm1_64Operand; -} -def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); -}]> { - let EncoderMethod = "getVecShiftR64OpValue"; - let DecoderMethod = "DecodeVecShiftR64ImmNarrow"; - let ParserMatchClass = Imm1_32Operand; -} - -def Imm0_7Operand : AsmOperandClass { let Name = "Imm0_7"; } -def Imm0_15Operand : AsmOperandClass { let Name = "Imm0_15"; } -def Imm0_31Operand : AsmOperandClass { let Name = "Imm0_31"; } -def Imm0_63Operand : AsmOperandClass { let Name = "Imm0_63"; } - -def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) < 8); -}]> { - let EncoderMethod = "getVecShiftL8OpValue"; - let DecoderMethod = "DecodeVecShiftL8Imm"; - let ParserMatchClass = Imm0_7Operand; -} -def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) < 16); -}]> { - let EncoderMethod = "getVecShiftL16OpValue"; - let DecoderMethod = "DecodeVecShiftL16Imm"; - let ParserMatchClass = Imm0_15Operand; -} -def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) < 32); -}]> { - let EncoderMethod = "getVecShiftL32OpValue"; - let DecoderMethod = "DecodeVecShiftL32Imm"; - let ParserMatchClass = Imm0_31Operand; -} -def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{ - return (((uint32_t)Imm) < 64); -}]> { - let EncoderMethod = "getVecShiftL64OpValue"; - let DecoderMethod = "DecodeVecShiftL64Imm"; - let ParserMatchClass = Imm0_63Operand; -} - - -// Crazy immediate formats used by 32-bit and 64-bit logical immediate -// instructions for splatting repeating bit patterns across the immediate. -def logical_imm32_XFORM : SDNodeXForm<imm, [{ - uint64_t enc = ARM64_AM::encodeLogicalImmediate(N->getZExtValue(), 32); - return CurDAG->getTargetConstant(enc, MVT::i32); -}]>; -def logical_imm64_XFORM : SDNodeXForm<imm, [{ - uint64_t enc = ARM64_AM::encodeLogicalImmediate(N->getZExtValue(), 64); - return CurDAG->getTargetConstant(enc, MVT::i32); -}]>; - -def LogicalImm32Operand : AsmOperandClass { let Name = "LogicalImm32"; } -def LogicalImm64Operand : AsmOperandClass { let Name = "LogicalImm64"; } -def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{ - return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 32); -}], logical_imm32_XFORM> { - let PrintMethod = "printLogicalImm32"; - let ParserMatchClass = LogicalImm32Operand; -} -def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{ - return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 64); -}], logical_imm64_XFORM> { - let PrintMethod = "printLogicalImm64"; - let ParserMatchClass = LogicalImm64Operand; -} - -// imm0_255 predicate - True if the immediate is in the range [0,255]. -def Imm0_255Operand : AsmOperandClass { let Name = "Imm0_255"; } -def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ - return ((uint32_t)Imm) < 256; -}]> { - let ParserMatchClass = Imm0_255Operand; -} - -// imm0_127 predicate - True if the immediate is in the range [0,127] -def Imm0_127Operand : AsmOperandClass { let Name = "Imm0_127"; } -def imm0_127 : Operand<i32>, ImmLeaf<i32, [{ - return ((uint32_t)Imm) < 128; -}]> { - let ParserMatchClass = Imm0_127Operand; -} - -// NOTE: These imm0_N operands have to be of type i64 because i64 is the size -// for all shift-amounts. - -// imm0_63 predicate - True if the immediate is in the range [0,63] -def imm0_63 : Operand<i64>, ImmLeaf<i64, [{ - return ((uint64_t)Imm) < 64; -}]> { - let ParserMatchClass = Imm0_63Operand; -} - -// imm0_31 predicate - True if the immediate is in the range [0,31] -def imm0_31 : Operand<i64>, ImmLeaf<i64, [{ - return ((uint64_t)Imm) < 32; -}]> { - let ParserMatchClass = Imm0_31Operand; -} - -// imm0_15 predicate - True if the immediate is in the range [0,15] -def imm0_15 : Operand<i64>, ImmLeaf<i64, [{ - return ((uint64_t)Imm) < 16; -}]> { - let ParserMatchClass = Imm0_15Operand; -} - -// imm0_7 predicate - True if the immediate is in the range [0,7] -def imm0_7 : Operand<i64>, ImmLeaf<i64, [{ - return ((uint64_t)Imm) < 8; -}]> { - let ParserMatchClass = Imm0_7Operand; -} - -// An arithmetic shifter operand: -// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr -// {5-0} - imm6 -def arith_shift : Operand<i32> { - let PrintMethod = "printShifter"; - let ParserMatchClass = ArithmeticShifterOperand; -} - -class arith_shifted_reg<ValueType Ty, RegisterClass regclass> - : Operand<Ty>, - ComplexPattern<Ty, 2, "SelectArithShiftedRegister", []> { - let PrintMethod = "printShiftedRegister"; - let MIOperandInfo = (ops regclass, arith_shift); -} - -def arith_shifted_reg32 : arith_shifted_reg<i32, GPR32>; -def arith_shifted_reg64 : arith_shifted_reg<i64, GPR64>; - -// An arithmetic shifter operand: -// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror -// {5-0} - imm6 -def logical_shift : Operand<i32> { - let PrintMethod = "printShifter"; - let ParserMatchClass = ShifterOperand; -} - -class logical_shifted_reg<ValueType Ty, RegisterClass regclass> - : Operand<Ty>, - ComplexPattern<Ty, 2, "SelectLogicalShiftedRegister", []> { - let PrintMethod = "printShiftedRegister"; - let MIOperandInfo = (ops regclass, logical_shift); -} - -def logical_shifted_reg32 : logical_shifted_reg<i32, GPR32>; -def logical_shifted_reg64 : logical_shifted_reg<i64, GPR64>; - -// A logical vector shifter operand: -// {7-6} - shift type: 00 = lsl -// {5-0} - imm6: #0, #8, #16, or #24 -def logical_vec_shift : Operand<i32> { - let PrintMethod = "printShifter"; - let EncoderMethod = "getVecShifterOpValue"; - let ParserMatchClass = LogicalVecShifterOperand; -} - -// A logical vector half-word shifter operand: -// {7-6} - shift type: 00 = lsl -// {5-0} - imm6: #0 or #8 -def logical_vec_hw_shift : Operand<i32> { - let PrintMethod = "printShifter"; - let EncoderMethod = "getVecShifterOpValue"; - let ParserMatchClass = LogicalVecHalfWordShifterOperand; -} - -// A vector move shifter operand: -// {0} - imm1: #8 or #16 -def move_vec_shift : Operand<i32> { - let PrintMethod = "printShifter"; - let EncoderMethod = "getMoveVecShifterOpValue"; - let ParserMatchClass = MoveVecShifterOperand; -} - -// An ADD/SUB immediate shifter operand: -// {7-6} - shift type: 00 = lsl -// {5-0} - imm6: #0 or #12 -def addsub_shift : Operand<i32> { - let ParserMatchClass = AddSubShifterOperand; -} - -class addsub_shifted_imm<ValueType Ty> - : Operand<Ty>, ComplexPattern<Ty, 2, "SelectArithImmed", [imm]> { - let PrintMethod = "printAddSubImm"; - let EncoderMethod = "getAddSubImmOpValue"; - let MIOperandInfo = (ops i32imm, addsub_shift); -} - -def addsub_shifted_imm32 : addsub_shifted_imm<i32>; -def addsub_shifted_imm64 : addsub_shifted_imm<i64>; - -class neg_addsub_shifted_imm<ValueType Ty> - : Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> { - let PrintMethod = "printAddSubImm"; - let EncoderMethod = "getAddSubImmOpValue"; - let MIOperandInfo = (ops i32imm, addsub_shift); -} - -def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm<i32>; -def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm<i64>; - -// An extend operand: -// {5-3} - extend type -// {2-0} - imm3 -def arith_extend : Operand<i32> { - let PrintMethod = "printExtend"; - let ParserMatchClass = ExtendOperand; -} -def arith_extend64 : Operand<i32> { - let PrintMethod = "printExtend"; - let ParserMatchClass = ExtendOperand64; -} - -// 'extend' that's a lsl of a 64-bit register. -def arith_extendlsl64 : Operand<i32> { - let PrintMethod = "printExtend"; - let ParserMatchClass = ExtendOperandLSL64; -} - -class arith_extended_reg32<ValueType Ty> : Operand<Ty>, - ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> { - let PrintMethod = "printExtendedRegister"; - let MIOperandInfo = (ops GPR32, arith_extend); -} - -class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>, - ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> { - let PrintMethod = "printExtendedRegister"; - let MIOperandInfo = (ops GPR32, arith_extend64); -} - -// Floating-point immediate. -def fpimm32 : Operand<f32>, - PatLeaf<(f32 fpimm), [{ - return ARM64_AM::getFP32Imm(N->getValueAPF()) != -1; - }], SDNodeXForm<fpimm, [{ - APFloat InVal = N->getValueAPF(); - uint32_t enc = ARM64_AM::getFP32Imm(InVal); - return CurDAG->getTargetConstant(enc, MVT::i32); - }]>> { - let ParserMatchClass = FPImmOperand; - let PrintMethod = "printFPImmOperand"; -} -def fpimm64 : Operand<f64>, - PatLeaf<(f64 fpimm), [{ - return ARM64_AM::getFP64Imm(N->getValueAPF()) != -1; - }], SDNodeXForm<fpimm, [{ - APFloat InVal = N->getValueAPF(); - uint32_t enc = ARM64_AM::getFP64Imm(InVal); - return CurDAG->getTargetConstant(enc, MVT::i32); - }]>> { - let ParserMatchClass = FPImmOperand; - let PrintMethod = "printFPImmOperand"; -} - -def fpimm8 : Operand<i32> { - let ParserMatchClass = FPImmOperand; - let PrintMethod = "printFPImmOperand"; -} - -def fpimm0 : PatLeaf<(fpimm), [{ - return N->isExactlyValue(+0.0); -}]>; - -// 8-bit immediate for AdvSIMD where 64-bit values of the form: -// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh -// are encoded as the eight bit value 'abcdefgh'. -def simdimmtype10 : Operand<i32>, - PatLeaf<(f64 fpimm), [{ - return ARM64_AM::isAdvSIMDModImmType10(N->getValueAPF() - .bitcastToAPInt() - .getZExtValue()); - }], SDNodeXForm<fpimm, [{ - APFloat InVal = N->getValueAPF(); - uint32_t enc = ARM64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() - .bitcastToAPInt() - .getZExtValue()); - return CurDAG->getTargetConstant(enc, MVT::i32); - }]>> { - let ParserMatchClass = SIMDImmType10Operand; - let PrintMethod = "printSIMDType10Operand"; -} - - -//--- -// Sytem management -//--- - -// Base encoding for system instruction operands. -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class BaseSystemI<bit L, dag oops, dag iops, string asm, string operands> - : I<oops, iops, asm, operands, "", []> { - let Inst{31-22} = 0b1101010100; - let Inst{21} = L; -} - -// System instructions which do not have an Rt register. -class SimpleSystemI<bit L, dag iops, string asm, string operands> - : BaseSystemI<L, (outs), iops, asm, operands> { - let Inst{4-0} = 0b11111; -} - -// System instructions which have an Rt register. -class RtSystemI<bit L, dag oops, dag iops, string asm, string operands> - : BaseSystemI<L, oops, iops, asm, operands>, - Sched<[WriteSys]> { - bits<5> Rt; - let Inst{4-0} = Rt; -} - -// Hint instructions that take both a CRm and a 3-bit immediate. -class HintI<string mnemonic> - : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "">, - Sched<[WriteHint]> { - bits <7> imm; - let Inst{20-12} = 0b000110010; - let Inst{11-5} = imm; -} - -// System instructions taking a single literal operand which encodes into -// CRm. op2 differentiates the opcodes. -def BarrierAsmOperand : AsmOperandClass { - let Name = "Barrier"; - let ParserMethod = "tryParseBarrierOperand"; -} -def barrier_op : Operand<i32> { - let PrintMethod = "printBarrierOption"; - let ParserMatchClass = BarrierAsmOperand; -} -class CRmSystemI<Operand crmtype, bits<3> opc, string asm> - : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm">, - Sched<[WriteBarrier]> { - bits<4> CRm; - let Inst{20-12} = 0b000110011; - let Inst{11-8} = CRm; - let Inst{7-5} = opc; -} - -// MRS/MSR system instructions. -def SystemRegisterOperand : AsmOperandClass { - let Name = "SystemRegister"; - let ParserMethod = "tryParseSystemRegister"; -} -// concatenation of 1, op0, op1, CRn, CRm, op2. 16-bit immediate. -def sysreg_op : Operand<i32> { - let ParserMatchClass = SystemRegisterOperand; - let DecoderMethod = "DecodeSystemRegister"; - let PrintMethod = "printSystemRegister"; -} - -class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins sysreg_op:$systemreg), - "mrs", "\t$Rt, $systemreg"> { - bits<15> systemreg; - let Inst{20} = 1; - let Inst{19-5} = systemreg; -} - -// FIXME: Some of these def CPSR, others don't. Best way to model that? -// Explicitly modeling each of the system register as a register class -// would do it, but feels like overkill at this point. -class MSRI : RtSystemI<0, (outs), (ins sysreg_op:$systemreg, GPR64:$Rt), - "msr", "\t$systemreg, $Rt"> { - bits<15> systemreg; - let Inst{20} = 1; - let Inst{19-5} = systemreg; -} - -def SystemCPSRFieldOperand : AsmOperandClass { - let Name = "SystemCPSRField"; - let ParserMethod = "tryParseCPSRField"; -} -def cpsrfield_op : Operand<i32> { - let ParserMatchClass = SystemCPSRFieldOperand; - let PrintMethod = "printSystemCPSRField"; -} - -let Defs = [CPSR] in -class MSRcpsrI : SimpleSystemI<0, (ins cpsrfield_op:$cpsr_field, imm0_15:$imm), - "msr", "\t$cpsr_field, $imm">, - Sched<[WriteSys]> { - bits<6> cpsrfield; - bits<4> imm; - let Inst{20-19} = 0b00; - let Inst{18-16} = cpsrfield{5-3}; - let Inst{15-12} = 0b0100; - let Inst{11-8} = imm; - let Inst{7-5} = cpsrfield{2-0}; - - let DecoderMethod = "DecodeSystemCPSRInstruction"; -} - -// SYS and SYSL generic system instructions. -def SysCRAsmOperand : AsmOperandClass { - let Name = "SysCR"; - let ParserMethod = "tryParseSysCROperand"; -} - -def sys_cr_op : Operand<i32> { - let PrintMethod = "printSysCROperand"; - let ParserMatchClass = SysCRAsmOperand; -} - -class SystemI<bit L, string asm> - : SimpleSystemI<L, - (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2), - asm, "\t$op1, $Cn, $Cm, $op2">, - Sched<[WriteSys]> { - bits<3> op1; - bits<4> Cn; - bits<4> Cm; - bits<3> op2; - let Inst{20-19} = 0b01; - let Inst{18-16} = op1; - let Inst{15-12} = Cn; - let Inst{11-8} = Cm; - let Inst{7-5} = op2; -} - -class SystemXtI<bit L, string asm> - : RtSystemI<L, (outs), - (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, GPR64:$Rt), - asm, "\t$op1, $Cn, $Cm, $op2, $Rt"> { - bits<3> op1; - bits<4> Cn; - bits<4> Cm; - bits<3> op2; - let Inst{20-19} = 0b01; - let Inst{18-16} = op1; - let Inst{15-12} = Cn; - let Inst{11-8} = Cm; - let Inst{7-5} = op2; -} - -class SystemLXtI<bit L, string asm> - : RtSystemI<L, (outs), - (ins GPR64:$Rt, imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2), - asm, "\t$Rt, $op1, $Cn, $Cm, $op2"> { - bits<3> op1; - bits<4> Cn; - bits<4> Cm; - bits<3> op2; - let Inst{20-19} = 0b01; - let Inst{18-16} = op1; - let Inst{15-12} = Cn; - let Inst{11-8} = Cm; - let Inst{7-5} = op2; -} - - -// Branch (register) instructions: -// -// case opc of -// 0001 blr -// 0000 br -// 0101 dret -// 0100 eret -// 0010 ret -// otherwise UNDEFINED -class BaseBranchReg<bits<4> opc, dag oops, dag iops, string asm, - string operands, list<dag> pattern> - : I<oops, iops, asm, operands, "", pattern>, Sched<[WriteBrReg]> { - let Inst{31-25} = 0b1101011; - let Inst{24-21} = opc; - let Inst{20-16} = 0b11111; - let Inst{15-10} = 0b000000; - let Inst{4-0} = 0b00000; -} - -class BranchReg<bits<4> opc, string asm, list<dag> pattern> - : BaseBranchReg<opc, (outs), (ins GPR64:$Rn), asm, "\t$Rn", pattern> { - bits<5> Rn; - let Inst{9-5} = Rn; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1, isReturn = 1 in -class SpecialReturn<bits<4> opc, string asm> - : BaseBranchReg<opc, (outs), (ins), asm, "", []> { - let Inst{9-5} = 0b11111; -} - -//--- -// Conditional branch instruction. -//--- -// Branch condition code. -// 4-bit immediate. Pretty-printed as .<cc> -def dotCcode : Operand<i32> { - let PrintMethod = "printDotCondCode"; -} - -// Conditional branch target. 19-bit immediate. The low two bits of the target -// offset are implied zero and so are not part of the immediate. -def BranchTarget19Operand : AsmOperandClass { - let Name = "BranchTarget19"; -} -def am_brcond : Operand<OtherVT> { - let EncoderMethod = "getCondBranchTargetOpValue"; - let DecoderMethod = "DecodeCondBranchTarget"; - let PrintMethod = "printAlignedBranchTarget"; - let ParserMatchClass = BranchTarget19Operand; -} - -class BranchCond : I<(outs), (ins dotCcode:$cond, am_brcond:$target), - "b", "$cond\t$target", "", - [(ARM64brcond bb:$target, imm:$cond, CPSR)]>, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; - let Uses = [CPSR]; - - bits<4> cond; - bits<19> target; - let Inst{31-24} = 0b01010100; - let Inst{23-5} = target; - let Inst{4} = 0; - let Inst{3-0} = cond; -} - -//--- -// Compare-and-branch instructions. -//--- -class BaseCmpBranch<RegisterClass regtype, bit op, string asm, SDNode node> - : I<(outs), (ins regtype:$Rt, am_brcond:$target), - asm, "\t$Rt, $target", "", - [(node regtype:$Rt, bb:$target)]>, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; - - bits<5> Rt; - bits<19> target; - let Inst{30-25} = 0b011010; - let Inst{24} = op; - let Inst{23-5} = target; - let Inst{4-0} = Rt; -} - -multiclass CmpBranch<bit op, string asm, SDNode node> { - def W : BaseCmpBranch<GPR32, op, asm, node> { - let Inst{31} = 0; - } - def X : BaseCmpBranch<GPR64, op, asm, node> { - let Inst{31} = 1; - } -} - -//--- -// Test-bit-and-branch instructions. -//--- -// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of -// the target offset are implied zero and so are not part of the immediate. -def BranchTarget14Operand : AsmOperandClass { - let Name = "BranchTarget14"; -} -def am_tbrcond : Operand<OtherVT> { - let EncoderMethod = "getTestBranchTargetOpValue"; - let PrintMethod = "printAlignedBranchTarget"; - let ParserMatchClass = BranchTarget14Operand; -} - -class TestBranch<bit op, string asm, SDNode node> - : I<(outs), (ins GPR64:$Rt, imm0_63:$bit_off, am_tbrcond:$target), - asm, "\t$Rt, $bit_off, $target", "", - [(node GPR64:$Rt, imm0_63:$bit_off, bb:$target)]>, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; - - bits<5> Rt; - bits<6> bit_off; - bits<14> target; - - let Inst{31} = bit_off{5}; - let Inst{30-25} = 0b011011; - let Inst{24} = op; - let Inst{23-19} = bit_off{4-0}; - let Inst{18-5} = target; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodeTestAndBranch"; -} - -//--- -// Unconditional branch (immediate) instructions. -//--- -def BranchTarget26Operand : AsmOperandClass { - let Name = "BranchTarget26"; -} -def am_b_target : Operand<OtherVT> { - let EncoderMethod = "getBranchTargetOpValue"; - let PrintMethod = "printAlignedBranchTarget"; - let ParserMatchClass = BranchTarget26Operand; -} -def am_bl_target : Operand<i64> { - let EncoderMethod = "getBranchTargetOpValue"; - let PrintMethod = "printAlignedBranchTarget"; - let ParserMatchClass = BranchTarget26Operand; -} - -class BImm<bit op, dag iops, string asm, list<dag> pattern> - : I<(outs), iops, asm, "\t$addr", "", pattern>, Sched<[WriteBr]> { - bits<26> addr; - let Inst{31} = op; - let Inst{30-26} = 0b00101; - let Inst{25-0} = addr; - - let DecoderMethod = "DecodeUnconditionalBranch"; -} - -class BranchImm<bit op, string asm, list<dag> pattern> - : BImm<op, (ins am_b_target:$addr), asm, pattern>; -class CallImm<bit op, string asm, list<dag> pattern> - : BImm<op, (ins am_bl_target:$addr), asm, pattern>; - -//--- -// Basic one-operand data processing instructions. -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseOneOperandData<bits<3> opc, RegisterClass regtype, string asm, - SDPatternOperator node> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", - [(set regtype:$Rd, (node regtype:$Rn))]>, - Sched<[WriteI]> { - bits<5> Rd; - bits<5> Rn; - - let Inst{30-13} = 0b101101011000000000; - let Inst{12-10} = opc; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass OneOperandData<bits<3> opc, string asm, - SDPatternOperator node = null_frag> { - def Wr : BaseOneOperandData<opc, GPR32, asm, node> { - let Inst{31} = 0; - } - - def Xr : BaseOneOperandData<opc, GPR64, asm, node> { - let Inst{31} = 1; - } -} - -class OneWRegData<bits<3> opc, string asm, SDPatternOperator node> - : BaseOneOperandData<opc, GPR32, asm, node> { - let Inst{31} = 0; -} - -class OneXRegData<bits<3> opc, string asm, SDPatternOperator node> - : BaseOneOperandData<opc, GPR64, asm, node> { - let Inst{31} = 1; -} - -//--- -// Basic two-operand data processing instructions. -//--- -class BaseBaseAddSubCarry<bit isSub, RegisterClass regtype, string asm, - list<dag> pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", pattern>, - Sched<[WriteI]> { - let Uses = [CPSR]; - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{30} = isSub; - let Inst{28-21} = 0b11010000; - let Inst{20-16} = Rm; - let Inst{15-10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseAddSubCarry<bit isSub, RegisterClass regtype, string asm, - SDNode OpNode> - : BaseBaseAddSubCarry<isSub, regtype, asm, - [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, CPSR))]>; - -class BaseAddSubCarrySetFlags<bit isSub, RegisterClass regtype, string asm, - SDNode OpNode> - : BaseBaseAddSubCarry<isSub, regtype, asm, - [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, CPSR)), - (implicit CPSR)]> { - let Defs = [CPSR]; -} - -multiclass AddSubCarry<bit isSub, string asm, string asm_setflags, - SDNode OpNode, SDNode OpNode_setflags> { - def Wr : BaseAddSubCarry<isSub, GPR32, asm, OpNode> { - let Inst{31} = 0; - let Inst{29} = 0; - } - def Xr : BaseAddSubCarry<isSub, GPR64, asm, OpNode> { - let Inst{31} = 1; - let Inst{29} = 0; - } - - // Sets flags. - def SWr : BaseAddSubCarrySetFlags<isSub, GPR32, asm_setflags, - OpNode_setflags> { - let Inst{31} = 0; - let Inst{29} = 1; - } - def SXr : BaseAddSubCarrySetFlags<isSub, GPR64, asm_setflags, - OpNode_setflags> { - let Inst{31} = 1; - let Inst{29} = 1; - } -} - -class BaseTwoOperand<bits<4> opc, RegisterClass regtype, string asm, - SDPatternOperator OpNode> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{30-21} = 0b0011010110; - let Inst{20-16} = Rm; - let Inst{15-14} = 0b00; - let Inst{13-10} = opc; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseDiv<bit isSigned, RegisterClass regtype, string asm, - SDPatternOperator OpNode> - : BaseTwoOperand<{0,0,1,?}, regtype, asm, OpNode> { - let Inst{10} = isSigned; -} - -multiclass Div<bit isSigned, string asm, SDPatternOperator OpNode> { - def Wr : BaseDiv<isSigned, GPR32, asm, OpNode>, - Sched<[WriteID32]> { - let Inst{31} = 0; - } - def Xr : BaseDiv<isSigned, GPR64, asm, OpNode>, - Sched<[WriteID64]> { - let Inst{31} = 1; - } -} - -class BaseShift<bits<2> shift_type, RegisterClass regtype, string asm, - SDPatternOperator OpNode = null_frag> - : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>, - Sched<[WriteIS]> { - let Inst{11-10} = shift_type; -} - -multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> { - def Wr : BaseShift<shift_type, GPR32, asm> { - let Inst{31} = 0; - } - - def Xr : BaseShift<shift_type, GPR64, asm, OpNode> { - let Inst{31} = 1; - } - - def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)), - (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, - (EXTRACT_SUBREG i64:$Rm, sub_32))>; - - def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))), - (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; - - def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))), - (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; - - def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))), - (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; -} - -class ShiftAlias<string asm, Instruction inst, RegisterClass regtype> - : InstAlias<asm#" $dst, $src1, $src2", - (inst regtype:$dst, regtype:$src1, regtype:$src2)>; - -class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype, - RegisterClass addtype, string asm, - list<dag> pattern> - : I<(outs addtype:$Rd), (ins multype:$Rn, multype:$Rm, addtype:$Ra), - asm, "\t$Rd, $Rn, $Rm, $Ra", "", pattern> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<5> Ra; - let Inst{30-24} = 0b0011011; - let Inst{23-21} = opc; - let Inst{20-16} = Rm; - let Inst{15} = isSub; - let Inst{14-10} = Ra; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass MulAccum<bit isSub, string asm, SDNode AccNode> { - def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm, - [(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>, - Sched<[WriteIM32]> { - let Inst{31} = 0; - } - - def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm, - [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>, - Sched<[WriteIM64]> { - let Inst{31} = 1; - } -} - -class WideMulAccum<bit isSub, bits<3> opc, string asm, - SDNode AccNode, SDNode ExtNode> - : BaseMulAccum<isSub, opc, GPR32, GPR64, asm, - [(set GPR64:$Rd, (AccNode GPR64:$Ra, - (mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>, - Sched<[WriteIM32]> { - let Inst{31} = 1; -} - -class MulHi<bits<3> opc, string asm, SDNode OpNode> - : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>, - Sched<[WriteIM64]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-24} = 0b10011011; - let Inst{23-21} = opc; - let Inst{20-16} = Rm; - let Inst{15-10} = 0b011111; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class MulAccumWAlias<string asm, Instruction inst> - : InstAlias<asm#" $dst, $src1, $src2", - (inst GPR32:$dst, GPR32:$src1, GPR32:$src2, WZR)>; -class MulAccumXAlias<string asm, Instruction inst> - : InstAlias<asm#" $dst, $src1, $src2", - (inst GPR64:$dst, GPR64:$src1, GPR64:$src2, XZR)>; -class WideMulAccumAlias<string asm, Instruction inst> - : InstAlias<asm#" $dst, $src1, $src2", - (inst GPR64:$dst, GPR32:$src1, GPR32:$src2, XZR)>; - -class BaseCRC32<bit sf, bits<2> sz, bit C, RegisterClass StreamReg, - SDPatternOperator OpNode, string asm> - : I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>, - Sched<[WriteISReg]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = sf; - let Inst{30-21} = 0b0011010110; - let Inst{20-16} = Rm; - let Inst{15-13} = 0b010; - let Inst{12} = C; - let Inst{11-10} = sz; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -//--- -// Address generation. -//--- - -class ADRI<bit page, string asm, Operand adr, list<dag> pattern> - : I<(outs GPR64:$Xd), (ins adr:$label), asm, "\t$Xd, $label", "", - pattern>, - Sched<[WriteI]> { - bits<5> Xd; - bits<21> label; - let Inst{31} = page; - let Inst{30-29} = label{1-0}; - let Inst{28-24} = 0b10000; - let Inst{23-5} = label{20-2}; - let Inst{4-0} = Xd; - - let DecoderMethod = "DecodeAdrInstruction"; -} - -//--- -// Move immediate. -//--- - -def movimm32_imm : Operand<i32> { - let ParserMatchClass = Imm0_65535Operand; - let EncoderMethod = "getMoveWideImmOpValue"; -} -def movimm32_shift : Operand<i32> { - let PrintMethod = "printShifter"; - let ParserMatchClass = MovImm32ShifterOperand; -} -def movimm64_shift : Operand<i32> { - let PrintMethod = "printShifter"; - let ParserMatchClass = MovImm64ShifterOperand; -} -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseMoveImmediate<bits<2> opc, RegisterClass regtype, Operand shifter, - string asm> - : I<(outs regtype:$Rd), (ins movimm32_imm:$imm, shifter:$shift), - asm, "\t$Rd, $imm$shift", "", []>, - Sched<[WriteImm]> { - bits<5> Rd; - bits<16> imm; - bits<6> shift; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100101; - let Inst{22-21} = shift{5-4}; - let Inst{20-5} = imm; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeMoveImmInstruction"; -} - -multiclass MoveImmediate<bits<2> opc, string asm> { - def Wi : BaseMoveImmediate<opc, GPR32, movimm32_shift, asm> { - let Inst{31} = 0; - } - - def Xi : BaseMoveImmediate<opc, GPR64, movimm64_shift, asm> { - let Inst{31} = 1; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseInsertImmediate<bits<2> opc, RegisterClass regtype, Operand shifter, - string asm> - : I<(outs regtype:$Rd), - (ins regtype:$src, movimm32_imm:$imm, shifter:$shift), - asm, "\t$Rd, $imm$shift", "$src = $Rd", []>, - Sched<[WriteI]> { - bits<5> Rd; - bits<16> imm; - bits<6> shift; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100101; - let Inst{22-21} = shift{5-4}; - let Inst{20-5} = imm; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeMoveImmInstruction"; -} - -multiclass InsertImmediate<bits<2> opc, string asm> { - def Wi : BaseInsertImmediate<opc, GPR32, movimm32_shift, asm> { - let Inst{31} = 0; - } - - def Xi : BaseInsertImmediate<opc, GPR64, movimm64_shift, asm> { - let Inst{31} = 1; - } -} - -//--- -// Add/Subtract -//--- - -class BaseAddSubImm<bit isSub, bit setFlags, RegisterClass dstRegtype, - RegisterClass srcRegtype, addsub_shifted_imm immtype, - string asm, SDPatternOperator OpNode> - : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm), - asm, "\t$Rd, $Rn, $imm", "", - [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>, - Sched<[WriteI]> { - bits<5> Rd; - bits<5> Rn; - bits<14> imm; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b10001; - let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12 - let Inst{21-10} = imm{11-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - let DecoderMethod = "DecodeBaseAddSubImm"; -} - -class BaseAddSubRegPseudo<RegisterClass regtype, - SDPatternOperator OpNode> - : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, - Sched<[WriteI]>; - -class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype, - arith_shifted_reg shifted_regtype, string asm, - SDPatternOperator OpNode> - : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>, - Sched<[WriteISReg]> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> src1; - bits<5> src2; - bits<8> shift; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b01011; - let Inst{23-22} = shift{7-6}; - let Inst{21} = 0; - let Inst{20-16} = src2; - let Inst{15-10} = shift{5-0}; - let Inst{9-5} = src1; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeThreeAddrSRegInstruction"; -} - -class BaseAddSubEReg<bit isSub, bit setFlags, RegisterClass dstRegtype, - RegisterClass src1Regtype, Operand src2Regtype, - string asm, SDPatternOperator OpNode> - : I<(outs dstRegtype:$R1), - (ins src1Regtype:$R2, src2Regtype:$R3), - asm, "\t$R1, $R2, $R3", "", - [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>, - Sched<[WriteIEReg]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<6> ext; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b01011; - let Inst{23-21} = 0b001; - let Inst{20-16} = Rm; - let Inst{15-13} = ext{5-3}; - let Inst{12-10} = ext{2-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeAddSubERegInstruction"; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseAddSubEReg64<bit isSub, bit setFlags, RegisterClass dstRegtype, - RegisterClass src1Regtype, RegisterClass src2Regtype, - Operand ext_op, string asm> - : I<(outs dstRegtype:$Rd), - (ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext), - asm, "\t$Rd, $Rn, $Rm$ext", "", []>, - Sched<[WriteIEReg]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<6> ext; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b01011; - let Inst{23-21} = 0b001; - let Inst{20-16} = Rm; - let Inst{15} = ext{5}; - let Inst{12-10} = ext{2-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeAddSubERegInstruction"; -} - -// Aliases for register+register add/subtract. -class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype, - RegisterClass src1Regtype, RegisterClass src2Regtype, - int shiftExt> - : InstAlias<asm#" $dst, $src1, $src2", - (inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2, - shiftExt)>; - -multiclass AddSub<bit isSub, string mnemonic, - SDPatternOperator OpNode = null_frag> { - let hasSideEffects = 0 in { - // Add/Subtract immediate - def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32, - mnemonic, OpNode> { - let Inst{31} = 0; - } - def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64, - mnemonic, OpNode> { - let Inst{31} = 1; - } - - // Add/Subtract register - Only used for CodeGen - def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>; - def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>; - - // Add/Subtract shifted register - def Wrs : BaseAddSubSReg<isSub, 0, GPR32, arith_shifted_reg32, mnemonic, - OpNode> { - let Inst{31} = 0; - } - def Xrs : BaseAddSubSReg<isSub, 0, GPR64, arith_shifted_reg64, mnemonic, - OpNode> { - let Inst{31} = 1; - } - } - - // Add/Subtract extended register - let AddedComplexity = 1, hasSideEffects = 0 in { - def Wrx : BaseAddSubEReg<isSub, 0, GPR32sp, GPR32sp, - arith_extended_reg32<i32>, mnemonic, OpNode> { - let Inst{31} = 0; - } - def Xrx : BaseAddSubEReg<isSub, 0, GPR64sp, GPR64sp, - arith_extended_reg32to64<i64>, mnemonic, OpNode> { - let Inst{31} = 1; - } - } - - def Xrx64 : BaseAddSubEReg64<isSub, 0, GPR64sp, GPR64sp, GPR64, - arith_extendlsl64, mnemonic> { - // UXTX and SXTX only. - let Inst{14-13} = 0b11; - let Inst{31} = 1; - } - - // Register/register aliases with no shift when SP is not used. - def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"), - GPR32, GPR32, GPR32, 0>; - def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"), - GPR64, GPR64, GPR64, 0>; - - // Register/register aliases with no shift when either the destination or - // first source register is SP. This relies on the shifted register aliases - // above matching first in the case when SP is not used. - def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"), - GPR32sp, GPR32sp, GPR32, 16>; // UXTW #0 - def : AddSubRegAlias<mnemonic, - !cast<Instruction>(NAME#"Xrx64"), - GPR64sp, GPR64sp, GPR64, 24>; // UXTX #0 -} - -multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode> { - let isCompare = 1, Defs = [CPSR] in { - // Add/Subtract immediate - def Wri : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32, - mnemonic, OpNode> { - let Inst{31} = 0; - } - def Xri : BaseAddSubImm<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64, - mnemonic, OpNode> { - let Inst{31} = 1; - } - - // Add/Subtract register - def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>; - def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>; - - // Add/Subtract shifted register - def Wrs : BaseAddSubSReg<isSub, 1, GPR32, arith_shifted_reg32, mnemonic, - OpNode> { - let Inst{31} = 0; - } - def Xrs : BaseAddSubSReg<isSub, 1, GPR64, arith_shifted_reg64, mnemonic, - OpNode> { - let Inst{31} = 1; - } - - // Add/Subtract extended register - let AddedComplexity = 1 in { - def Wrx : BaseAddSubEReg<isSub, 1, GPR32, GPR32sp, - arith_extended_reg32<i32>, mnemonic, OpNode> { - let Inst{31} = 0; - } - def Xrx : BaseAddSubEReg<isSub, 1, GPR64, GPR64sp, - arith_extended_reg32<i64>, mnemonic, OpNode> { - let Inst{31} = 1; - } - } - - def Xrx64 : BaseAddSubEReg64<isSub, 1, GPR64, GPR64sp, GPR64, - arith_extendlsl64, mnemonic> { - // UXTX and SXTX only. - let Inst{14-13} = 0b11; - let Inst{31} = 1; - } - } // Defs = [CPSR] - - // Register/register aliases with no shift when SP is not used. - def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"), - GPR32, GPR32, GPR32, 0>; - def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"), - GPR64, GPR64, GPR64, 0>; - - // Register/register aliases with no shift when the first source register - // is SP. This relies on the shifted register aliases above matching first - // in the case when SP is not used. - def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"), - GPR32, GPR32sp, GPR32, 16>; // UXTW #0 - def : AddSubRegAlias<mnemonic, - !cast<Instruction>(NAME#"Xrx64"), - GPR64, GPR64sp, GPR64, 24>; // UXTX #0 -} - -//--- -// Extract -//--- -def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisPtrTy<3>]>; -def ARM64Extr : SDNode<"ARM64ISD::EXTR", SDTA64EXTR>; - -class BaseExtractImm<RegisterClass regtype, Operand imm_type, string asm, - list<dag> patterns> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, imm_type:$imm), - asm, "\t$Rd, $Rn, $Rm, $imm", "", patterns>, - Sched<[WriteExtr, ReadExtrHi]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<6> imm; - - let Inst{30-23} = 0b00100111; - let Inst{21} = 0; - let Inst{20-16} = Rm; - let Inst{15-10} = imm; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass ExtractImm<string asm> { - def Wrri : BaseExtractImm<GPR32, imm0_31, asm, - [(set GPR32:$Rd, - (ARM64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> { - let Inst{31} = 0; - let Inst{22} = 0; - } - def Xrri : BaseExtractImm<GPR64, imm0_63, asm, - [(set GPR64:$Rd, - (ARM64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> { - - let Inst{31} = 1; - let Inst{22} = 1; - } -} - -//--- -// Bitfield -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseBitfieldImm<bits<2> opc, - RegisterClass regtype, Operand imm_type, string asm> - : I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms), - asm, "\t$Rd, $Rn, $immr, $imms", "", []>, - Sched<[WriteIS]> { - bits<5> Rd; - bits<5> Rn; - bits<6> immr; - bits<6> imms; - - let Inst{30-29} = opc; - let Inst{28-23} = 0b100110; - let Inst{21-16} = immr; - let Inst{15-10} = imms; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass BitfieldImm<bits<2> opc, string asm> { - def Wri : BaseBitfieldImm<opc, GPR32, imm0_31, asm> { - let Inst{31} = 0; - let Inst{22} = 0; - } - def Xri : BaseBitfieldImm<opc, GPR64, imm0_63, asm> { - let Inst{31} = 1; - let Inst{22} = 1; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseBitfieldImmWith2RegArgs<bits<2> opc, - RegisterClass regtype, Operand imm_type, string asm> - : I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr, - imm_type:$imms), - asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>, - Sched<[WriteIS]> { - bits<5> Rd; - bits<5> Rn; - bits<6> immr; - bits<6> imms; - - let Inst{30-29} = opc; - let Inst{28-23} = 0b100110; - let Inst{21-16} = immr; - let Inst{15-10} = imms; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass BitfieldImmWith2RegArgs<bits<2> opc, string asm> { - def Wri : BaseBitfieldImmWith2RegArgs<opc, GPR32, imm0_31, asm> { - let Inst{31} = 0; - let Inst{22} = 0; - } - def Xri : BaseBitfieldImmWith2RegArgs<opc, GPR64, imm0_63, asm> { - let Inst{31} = 1; - let Inst{22} = 1; - } -} - -//--- -// Logical -//--- - -// Logical (immediate) -class BaseLogicalImm<bits<2> opc, RegisterClass dregtype, - RegisterClass sregtype, Operand imm_type, string asm, - list<dag> pattern> - : I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm), - asm, "\t$Rd, $Rn, $imm", "", pattern>, - Sched<[WriteI]> { - bits<5> Rd; - bits<5> Rn; - bits<13> imm; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100100; - let Inst{22} = imm{12}; - let Inst{21-16} = imm{11-6}; - let Inst{15-10} = imm{5-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeLogicalImmInstruction"; -} - -// Logical (shifted register) -class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype, - logical_shifted_reg shifted_regtype, string asm, - list<dag> pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", pattern>, - Sched<[WriteISReg]> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> src1; - bits<5> src2; - bits<8> shift; - let Inst{30-29} = opc; - let Inst{28-24} = 0b01010; - let Inst{23-22} = shift{7-6}; - let Inst{21} = N; - let Inst{20-16} = src2; - let Inst{15-10} = shift{5-0}; - let Inst{9-5} = src1; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeThreeAddrSRegInstruction"; -} - -// Aliases for register+register logical instructions. -class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype> - : InstAlias<asm#" $dst, $src1, $src2", - (inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>; - -let AddedComplexity = 6 in -multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode> { - def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic, - [(set GPR32sp:$Rd, (OpNode GPR32:$Rn, - logical_imm32:$imm))]> { - let Inst{31} = 0; - let Inst{22} = 0; // 64-bit version has an additional bit of immediate. - } - def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic, - [(set GPR64sp:$Rd, (OpNode GPR64:$Rn, - logical_imm64:$imm))]> { - let Inst{31} = 1; - } -} - -multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> { - let isCompare = 1, Defs = [CPSR] in { - def Wri : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic, - [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> { - let Inst{31} = 0; - let Inst{22} = 0; // 64-bit version has an additional bit of immediate. - } - def Xri : BaseLogicalImm<opc, GPR64, GPR64, logical_imm64, mnemonic, - [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_imm64:$imm))]> { - let Inst{31} = 1; - } - } // end Defs = [CPSR] -} - -class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode> - : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, - Sched<[WriteI]>; - -// Split from LogicalImm as not all instructions have both. -multiclass LogicalReg<bits<2> opc, bit N, string mnemonic, - SDPatternOperator OpNode> { - def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>; - def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>; - - def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic, - [(set GPR32:$Rd, (OpNode GPR32:$Rn, - logical_shifted_reg32:$Rm))]> { - let Inst{31} = 0; - } - def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic, - [(set GPR64:$Rd, (OpNode GPR64:$Rn, - logical_shifted_reg64:$Rm))]> { - let Inst{31} = 1; - } - - def : LogicalRegAlias<mnemonic, - !cast<Instruction>(NAME#"Wrs"), GPR32>; - def : LogicalRegAlias<mnemonic, - !cast<Instruction>(NAME#"Xrs"), GPR64>; -} - -// Split from LogicalReg to allow setting CPSR Defs -multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic> { - let Defs = [CPSR], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic, []>{ - let Inst{31} = 0; - } - def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic, []>{ - let Inst{31} = 1; - } - } // Defs = [CPSR] - - def : LogicalRegAlias<mnemonic, - !cast<Instruction>(NAME#"Wrs"), GPR32>; - def : LogicalRegAlias<mnemonic, - !cast<Instruction>(NAME#"Xrs"), GPR64>; -} - -//--- -// Conditionally set flags -//--- - -// Condition code. -// 4-bit immediate. Pretty-printed as <cc> -def ccode : Operand<i32> { - let PrintMethod = "printCondCode"; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm> - : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $imm, $nzcv, $cond", "", []>, - Sched<[WriteI]> { - let Uses = [CPSR]; - let Defs = [CPSR]; - - bits<5> Rn; - bits<5> imm; - bits<4> nzcv; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b111010010; - let Inst{20-16} = imm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = nzcv; -} - -multiclass CondSetFlagsImm<bit op, string asm> { - def Wi : BaseCondSetFlagsImm<op, GPR32, asm> { - let Inst{31} = 0; - } - def Xi : BaseCondSetFlagsImm<op, GPR64, asm> { - let Inst{31} = 1; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm> - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, - Sched<[WriteI]> { - let Uses = [CPSR]; - let Defs = [CPSR]; - - bits<5> Rn; - bits<5> Rm; - bits<4> nzcv; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b111010010; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = nzcv; -} - -multiclass CondSetFlagsReg<bit op, string asm> { - def Wr : BaseCondSetFlagsReg<op, GPR32, asm> { - let Inst{31} = 0; - } - def Xr : BaseCondSetFlagsReg<op, GPR64, asm> { - let Inst{31} = 1; - } -} - -//--- -// Conditional select -//--- - -class BaseCondSelect<bit op, bits<2> op2, RegisterClass regtype, string asm> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), - asm, "\t$Rd, $Rn, $Rm, $cond", "", - [(set regtype:$Rd, - (ARM64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), CPSR))]>, - Sched<[WriteI]> { - let Uses = [CPSR]; - - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b011010100; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = op2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass CondSelect<bit op, bits<2> op2, string asm> { - def Wr : BaseCondSelect<op, op2, GPR32, asm> { - let Inst{31} = 0; - } - def Xr : BaseCondSelect<op, op2, GPR64, asm> { - let Inst{31} = 1; - } -} - -class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm, - PatFrag frag> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), - asm, "\t$Rd, $Rn, $Rm, $cond", "", - [(set regtype:$Rd, - (ARM64csel regtype:$Rn, (frag regtype:$Rm), - (i32 imm:$cond), CPSR))]>, - Sched<[WriteI]> { - let Uses = [CPSR]; - - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b011010100; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = op2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> { - def Wr : BaseCondSelectOp<op, op2, GPR32, asm, frag> { - let Inst{31} = 0; - } - def Xr : BaseCondSelectOp<op, op2, GPR64, asm, frag> { - let Inst{31} = 1; - } -} - -//--- -// Special Mask Value -//--- -def maski8_or_more : Operand<i32>, - ImmLeaf<i32, [{ return (Imm & 0xff) == 0xff; }]> { -} -def maski16_or_more : Operand<i32>, - ImmLeaf<i32, [{ return (Imm & 0xffff) == 0xffff; }]> { -} - - -//--- -// Load/store -//--- - -// (unsigned immediate) -// Indexed for 8-bit registers. offset is in range [0,4095]. -def MemoryIndexed8Operand : AsmOperandClass { - let Name = "MemoryIndexed8"; - let DiagnosticType = "InvalidMemoryIndexed8"; -} -def am_indexed8 : Operand<i64>, - ComplexPattern<i64, 2, "SelectAddrModeIndexed8", []> { - let PrintMethod = "printAMIndexed8"; - let EncoderMethod - = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale1>"; - let ParserMatchClass = MemoryIndexed8Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 16-bit registers. offset is multiple of 2 in range [0,8190], -// stored as immval/2 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed16Operand : AsmOperandClass { - let Name = "MemoryIndexed16"; - let DiagnosticType = "InvalidMemoryIndexed16"; -} -def am_indexed16 : Operand<i64>, - ComplexPattern<i64, 2, "SelectAddrModeIndexed16", []> { - let PrintMethod = "printAMIndexed16"; - let EncoderMethod - = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale2>"; - let ParserMatchClass = MemoryIndexed16Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 32-bit registers. offset is multiple of 4 in range [0,16380], -// stored as immval/4 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed32Operand : AsmOperandClass { - let Name = "MemoryIndexed32"; - let DiagnosticType = "InvalidMemoryIndexed32"; -} -def am_indexed32 : Operand<i64>, - ComplexPattern<i64, 2, "SelectAddrModeIndexed32", []> { - let PrintMethod = "printAMIndexed32"; - let EncoderMethod - = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale4>"; - let ParserMatchClass = MemoryIndexed32Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 64-bit registers. offset is multiple of 8 in range [0,32760], -// stored as immval/8 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed64Operand : AsmOperandClass { - let Name = "MemoryIndexed64"; - let DiagnosticType = "InvalidMemoryIndexed64"; -} -def am_indexed64 : Operand<i64>, - ComplexPattern<i64, 2, "SelectAddrModeIndexed64", []> { - let PrintMethod = "printAMIndexed64"; - let EncoderMethod - = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale8>"; - let ParserMatchClass = MemoryIndexed64Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 128-bit registers. offset is multiple of 16 in range [0,65520], -// stored as immval/16 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed128Operand : AsmOperandClass { - let Name = "MemoryIndexed128"; - let DiagnosticType = "InvalidMemoryIndexed128"; -} -def am_indexed128 : Operand<i64>, - ComplexPattern<i64, 2, "SelectAddrModeIndexed128", []> { - let PrintMethod = "printAMIndexed128"; - let EncoderMethod - = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale16>"; - let ParserMatchClass = MemoryIndexed128Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// No offset. -def MemoryNoIndexOperand : AsmOperandClass { let Name = "MemoryNoIndex"; } -def am_noindex : Operand<i64>, - ComplexPattern<i64, 1, "SelectAddrModeNoIndex", []> { - let PrintMethod = "printAMNoIndex"; - let ParserMatchClass = MemoryNoIndexOperand; - let MIOperandInfo = (ops GPR64sp:$base); -} - -class BaseLoadStoreUI<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, list<dag> pattern> - : I<oops, iops, asm, "\t$Rt, $addr", "", pattern> { - bits<5> dst; - - bits<17> addr; - bits<5> base = addr{4-0}; - bits<12> offset = addr{16-5}; - - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b01; - let Inst{23-22} = opc; - let Inst{21-10} = offset; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeUnsignedLdStInstruction"; -} - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class LoadUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - Operand indextype, string asm, list<dag> pattern> - : BaseLoadStoreUI<sz, V, opc, - (outs regtype:$Rt), (ins indextype:$addr), asm, pattern>, - Sched<[WriteLD]>; - -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -class StoreUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - Operand indextype, string asm, list<dag> pattern> - : BaseLoadStoreUI<sz, V, opc, - (outs), (ins regtype:$Rt, indextype:$addr), asm, pattern>, - Sched<[WriteST]>; - -def PrefetchOperand : AsmOperandClass { - let Name = "Prefetch"; - let ParserMethod = "tryParsePrefetch"; -} -def prfop : Operand<i32> { - let PrintMethod = "printPrefetchOp"; - let ParserMatchClass = PrefetchOperand; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchUI<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat> - : BaseLoadStoreUI<sz, V, opc, - (outs), (ins prfop:$Rt, am_indexed64:$addr), asm, pat>, - Sched<[WriteLD]>; - -//--- -// Load literal -//--- - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class LoadLiteral<bits<2> opc, bit V, RegisterClass regtype, string asm> - : I<(outs regtype:$Rt), (ins am_brcond:$label), - asm, "\t$Rt, $label", "", []>, - Sched<[WriteLD]> { - bits<5> Rt; - bits<19> label; - let Inst{31-30} = opc; - let Inst{29-27} = 0b011; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-5} = label; - let Inst{4-0} = Rt; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchLiteral<bits<2> opc, bit V, string asm, list<dag> pat> - : I<(outs), (ins prfop:$Rt, am_brcond:$label), - asm, "\t$Rt, $label", "", pat>, - Sched<[WriteLD]> { - bits<5> Rt; - bits<19> label; - let Inst{31-30} = opc; - let Inst{29-27} = 0b011; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-5} = label; - let Inst{4-0} = Rt; -} - -//--- -// Load/store register offset -//--- - -class MemROAsmOperand<int sz> : AsmOperandClass { - let Name = "MemoryRegisterOffset"#sz; -} - -def MemROAsmOperand8 : MemROAsmOperand<8>; -def MemROAsmOperand16 : MemROAsmOperand<16>; -def MemROAsmOperand32 : MemROAsmOperand<32>; -def MemROAsmOperand64 : MemROAsmOperand<64>; -def MemROAsmOperand128 : MemROAsmOperand<128>; - -class ro_indexed<int sz> : Operand<i64> { // ComplexPattern<...> - let PrintMethod = "printMemoryRegOffset"#sz; - let MIOperandInfo = (ops GPR64sp:$base, GPR64:$offset, i32imm:$extend); -} - -def ro_indexed8 : ro_indexed<8>, ComplexPattern<i64, 3, "SelectAddrModeRO8", []> { - let ParserMatchClass = MemROAsmOperand8; -} - -def ro_indexed16 : ro_indexed<16>, ComplexPattern<i64, 3, "SelectAddrModeRO16", []> { - let ParserMatchClass = MemROAsmOperand16; -} - -def ro_indexed32 : ro_indexed<32>, ComplexPattern<i64, 3, "SelectAddrModeRO32", []> { - let ParserMatchClass = MemROAsmOperand32; -} - -def ro_indexed64 : ro_indexed<64>, ComplexPattern<i64, 3, "SelectAddrModeRO64", []> { - let ParserMatchClass = MemROAsmOperand64; -} - -def ro_indexed128 : ro_indexed<128>, ComplexPattern<i64, 3, "SelectAddrModeRO128", []> { - let ParserMatchClass = MemROAsmOperand128; -} - -class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, dag ins, dag outs, list<dag> pat> - : I<ins, outs, asm, "\t$Rt, $addr", "", pat> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -class Load8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list<dag> pat> - : LoadStore8RO<sz, V, opc, regtype, asm, - (outs regtype:$Rt), (ins ro_indexed8:$addr), pat>, - Sched<[WriteLDIdx, ReadAdrBase]>; - -class Store8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list<dag> pat> - : LoadStore8RO<sz, V, opc, regtype, asm, - (outs), (ins regtype:$Rt, ro_indexed8:$addr), pat>, - Sched<[WriteSTIdx, ReadAdrBase]>; - -class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, dag ins, dag outs, list<dag> pat> - : I<ins, outs, asm, "\t$Rt, $addr", "", pat> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -class Load16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list<dag> pat> - : LoadStore16RO<sz, V, opc, regtype, asm, - (outs regtype:$Rt), (ins ro_indexed16:$addr), pat>, - Sched<[WriteLDIdx, ReadAdrBase]>; - -class Store16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list<dag> pat> - : LoadStore16RO<sz, V, opc, regtype, asm, - (outs), (ins regtype:$Rt, ro_indexed16:$addr), pat>, - Sched<[WriteSTIdx, ReadAdrBase]>; - -class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, dag ins, dag outs, list<dag> pat> - : I<ins, outs, asm, "\t$Rt, $addr", "", pat> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -class Load32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list<dag> pat> - : LoadStore32RO<sz, V, opc, regtype, asm, - (outs regtype:$Rt), (ins ro_indexed32:$addr), pat>, - Sched<[WriteLDIdx, ReadAdrBase]>; - -class Store32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list<dag> pat> - : LoadStore32RO<sz, V, opc, regtype, asm, - (outs), (ins regtype:$Rt, ro_indexed32:$addr), pat>, - Sched<[WriteSTIdx, ReadAdrBase]>; - -class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, dag ins, dag outs, list<dag> pat> - : I<ins, outs, asm, "\t$Rt, $addr", "", pat> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class Load64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list<dag> pat> - : LoadStore64RO<sz, V, opc, regtype, asm, - (outs regtype:$Rt), (ins ro_indexed64:$addr), pat>, - Sched<[WriteLDIdx, ReadAdrBase]>; - -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -class Store64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list<dag> pat> - : LoadStore64RO<sz, V, opc, regtype, asm, - (outs), (ins regtype:$Rt, ro_indexed64:$addr), pat>, - Sched<[WriteSTIdx, ReadAdrBase]>; - - -class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, dag ins, dag outs, list<dag> pat> - : I<ins, outs, asm, "\t$Rt, $addr", "", pat> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class Load128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list<dag> pat> - : LoadStore128RO<sz, V, opc, regtype, asm, - (outs regtype:$Rt), (ins ro_indexed128:$addr), pat>, - Sched<[WriteLDIdx, ReadAdrBase]>; - -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -class Store128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list<dag> pat> - : LoadStore128RO<sz, V, opc, regtype, asm, - (outs), (ins regtype:$Rt, ro_indexed128:$addr), pat>, - Sched<[WriteSTIdx, ReadAdrBase]>; - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchRO<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat> - : I<(outs), (ins prfop:$Rt, ro_indexed64:$addr), asm, - "\t$Rt, $addr", "", pat>, - Sched<[WriteLD]> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -//--- -// Load/store unscaled immediate -//--- - -def MemoryUnscaledOperand : AsmOperandClass { - let Name = "MemoryUnscaled"; - let DiagnosticType = "InvalidMemoryIndexedSImm9"; -} -class am_unscaled_operand : Operand<i64> { - let PrintMethod = "printAMUnscaled"; - let ParserMatchClass = MemoryUnscaledOperand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled : am_unscaled_operand; -def am_unscaled8 : am_unscaled_operand, - ComplexPattern<i64, 2, "SelectAddrModeUnscaled8", []>; -def am_unscaled16 : am_unscaled_operand, - ComplexPattern<i64, 2, "SelectAddrModeUnscaled16", []>; -def am_unscaled32 : am_unscaled_operand, - ComplexPattern<i64, 2, "SelectAddrModeUnscaled32", []>; -def am_unscaled64 : am_unscaled_operand, - ComplexPattern<i64, 2, "SelectAddrModeUnscaled64", []>; -def am_unscaled128 : am_unscaled_operand, - ComplexPattern<i64, 2, "SelectAddrModeUnscaled128", []>; - -class BaseLoadStoreUnscale<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, list<dag> pattern> - : I<oops, iops, asm, "\t$Rt, $addr", "", pattern> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b00; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -let AddedComplexity = 1 in // try this before LoadUI -class LoadUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - Operand amtype, string asm, list<dag> pattern> - : BaseLoadStoreUnscale<sz, V, opc, (outs regtype:$Rt), - (ins amtype:$addr), asm, pattern>, - Sched<[WriteLD]>; - -let AddedComplexity = 1 in // try this before StoreUI -class StoreUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - Operand amtype, string asm, list<dag> pattern> - : BaseLoadStoreUnscale<sz, V, opc, (outs), - (ins regtype:$Rt, amtype:$addr), asm, pattern>, - Sched<[WriteST]>; - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchUnscaled<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat> - : BaseLoadStoreUnscale<sz, V, opc, (outs), - (ins prfop:$Rt, am_unscaled:$addr), asm, pat>, - Sched<[WriteLD]>; - -//--- -// Load/store unscaled immediate, unprivileged -//--- - -class BaseLoadStoreUnprivileged<bits<2> sz, bit V, bits<2> opc, - dag oops, dag iops, string asm> - : I<oops, iops, asm, "\t$Rt, $addr", "", []> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in { -class LoadUnprivileged<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStoreUnprivileged<sz, V, opc, - (outs regtype:$Rt), (ins am_unscaled:$addr), asm>, - Sched<[WriteLD]>; -} - -let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { -class StoreUnprivileged<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStoreUnprivileged<sz, V, opc, - (outs), (ins regtype:$Rt, am_unscaled:$addr), asm>, - Sched<[WriteST]>; -} - -//--- -// Load/store pre-indexed -//--- - -class BaseLoadStorePreIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, string cstr> - : I<oops, iops, asm, "\t$Rt, $addr!", cstr, []> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. - bits<5> dst; - bits<5> base; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0; - let Inst{23-22} = opc; - let Inst{21} = 0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b11; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -// FIXME: Modeling the write-back of these instructions for isel is tricky. -// we need the complex addressing mode for the memory reference, but -// we also need the write-back specified as a tied operand to the -// base register. That combination does not play nicely with -// the asm matcher and friends. -class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStorePreIdx<sz, V, opc, - (outs regtype:$Rt/*, GPR64sp:$wback*/), - (ins am_unscaled:$addr), asm, ""/*"$addr.base = $wback"*/>, - Sched<[WriteLD, WriteAdr]>; - -let mayStore = 1, mayLoad = 0 in -class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStorePreIdx<sz, V, opc, - (outs/* GPR64sp:$wback*/), - (ins regtype:$Rt, am_unscaled:$addr), - asm, ""/*"$addr.base = $wback"*/>, - Sched<[WriteAdr, WriteST]>; -} // hasSideEffects = 0 - -// ISel pseudo-instructions which have the tied operands. When the MC lowering -// logic finally gets smart enough to strip off tied operands that are just -// for isel convenience, we can get rid of these pseudos and just reference -// the real instructions directly. -// -// Ironically, also because of the writeback operands, we can't put the -// matcher pattern directly on the instruction, but need to define it -// separately. -// -// Loads aren't matched with patterns here at all, but rather in C++ -// custom lowering. -let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in { -class LoadPreIdxPseudo<RegisterClass regtype> - : Pseudo<(outs regtype:$Rt, GPR64sp:$wback), - (ins am_noindex:$addr, simm9:$offset), [], - "$addr.base = $wback,@earlyclobber $wback">, - Sched<[WriteLD, WriteAdr]>; -class LoadPostIdxPseudo<RegisterClass regtype> - : Pseudo<(outs regtype:$Rt, GPR64sp:$wback), - (ins am_noindex:$addr, simm9:$offset), [], - "$addr.base = $wback,@earlyclobber $wback">, - Sched<[WriteLD, WriteI]>; -} -multiclass StorePreIdxPseudo<RegisterClass regtype, ValueType Ty, - SDPatternOperator OpNode> { - let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in - def _isel: Pseudo<(outs GPR64sp:$wback), - (ins regtype:$Rt, am_noindex:$addr, simm9:$offset), [], - "$addr.base = $wback,@earlyclobber $wback">, - Sched<[WriteAdr, WriteST]>; - - def : Pat<(OpNode (Ty regtype:$Rt), am_noindex:$addr, simm9:$offset), - (!cast<Instruction>(NAME#_isel) regtype:$Rt, am_noindex:$addr, - simm9:$offset)>; -} - -//--- -// Load/store post-indexed -//--- - -// (pre-index) load/stores. -class BaseLoadStorePostIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, string cstr> - : I<oops, iops, asm, "\t$Rt, $addr, $idx", cstr, []> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. - bits<5> dst; - bits<5> base; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b01; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -// FIXME: Modeling the write-back of these instructions for isel is tricky. -// we need the complex addressing mode for the memory reference, but -// we also need the write-back specified as a tied operand to the -// base register. That combination does not play nicely with -// the asm matcher and friends. -class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStorePostIdx<sz, V, opc, - (outs regtype:$Rt/*, GPR64sp:$wback*/), - (ins am_noindex:$addr, simm9:$idx), - asm, ""/*"$addr.base = $wback"*/>, - Sched<[WriteLD, WriteI]>; - -let mayStore = 1, mayLoad = 0 in -class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStorePostIdx<sz, V, opc, - (outs/* GPR64sp:$wback*/), - (ins regtype:$Rt, am_noindex:$addr, simm9:$idx), - asm, ""/*"$addr.base = $wback"*/>, - Sched<[WriteAdr, WriteST, ReadAdrBase]>; -} // hasSideEffects = 0 - -// ISel pseudo-instructions which have the tied operands. When the MC lowering -// logic finally gets smart enough to strip off tied operands that are just -// for isel convenience, we can get rid of these pseudos and just reference -// the real instructions directly. -// -// Ironically, also because of the writeback operands, we can't put the -// matcher pattern directly on the instruction, but need to define it -// separately. -multiclass StorePostIdxPseudo<RegisterClass regtype, ValueType Ty, - SDPatternOperator OpNode, Instruction Insn> { - let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in - def _isel: Pseudo<(outs GPR64sp:$wback), - (ins regtype:$Rt, am_noindex:$addr, simm9:$idx), [], - "$addr.base = $wback,@earlyclobber $wback">, - PseudoInstExpansion<(Insn regtype:$Rt, am_noindex:$addr, simm9:$idx)>, - Sched<[WriteAdr, WriteST, ReadAdrBase]>; - - def : Pat<(OpNode (Ty regtype:$Rt), am_noindex:$addr, simm9:$idx), - (!cast<Instruction>(NAME#_isel) regtype:$Rt, am_noindex:$addr, - simm9:$idx)>; -} - -//--- -// Load/store pair -//--- - -// (indexed, offset) - -class BaseLoadStorePairOffset<bits<2> opc, bit V, bit L, dag oops, dag iops, - string asm> - : I<oops, iops, asm, "\t$Rt, $Rt2, $addr", "", []> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b010; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairOffset<bits<2> opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairOffset<opc, V, 1, - (outs regtype:$Rt, regtype:$Rt2), - (ins indextype:$addr), asm>, - Sched<[WriteLD, WriteLDHi]>; - -let mayLoad = 0, mayStore = 1 in -class StorePairOffset<bits<2> opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairOffset<opc, V, 0, (outs), - (ins regtype:$Rt, regtype:$Rt2, indextype:$addr), - asm>, - Sched<[WriteSTP]>; -} // hasSideEffects = 0 - -// (pre-indexed) - -def MemoryIndexed32SImm7 : AsmOperandClass { - let Name = "MemoryIndexed32SImm7"; - let DiagnosticType = "InvalidMemoryIndexed32SImm7"; -} -def am_indexed32simm7 : Operand<i32> { // ComplexPattern<...> - let PrintMethod = "printAMIndexed32"; - let ParserMatchClass = MemoryIndexed32SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); -} - -def MemoryIndexed64SImm7 : AsmOperandClass { - let Name = "MemoryIndexed64SImm7"; - let DiagnosticType = "InvalidMemoryIndexed64SImm7"; -} -def am_indexed64simm7 : Operand<i32> { // ComplexPattern<...> - let PrintMethod = "printAMIndexed64"; - let ParserMatchClass = MemoryIndexed64SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); -} - -def MemoryIndexed128SImm7 : AsmOperandClass { - let Name = "MemoryIndexed128SImm7"; - let DiagnosticType = "InvalidMemoryIndexed128SImm7"; -} -def am_indexed128simm7 : Operand<i32> { // ComplexPattern<...> - let PrintMethod = "printAMIndexed128"; - let ParserMatchClass = MemoryIndexed128SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); -} - -class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops, - string asm> - : I<oops, iops, asm, "\t$Rt, $Rt2, $addr!", "", []> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b011; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairPreIdx<bits<2> opc, bit V, RegisterClass regtype, - Operand addrmode, string asm> - : BaseLoadStorePairPreIdx<opc, V, 1, - (outs regtype:$Rt, regtype:$Rt2), - (ins addrmode:$addr), asm>, - Sched<[WriteLD, WriteLDHi, WriteAdr]>; - -let mayStore = 1, mayLoad = 0 in -class StorePairPreIdx<bits<2> opc, bit V, RegisterClass regtype, - Operand addrmode, string asm> - : BaseLoadStorePairPreIdx<opc, V, 0, (outs), - (ins regtype:$Rt, regtype:$Rt2, addrmode:$addr), - asm>, - Sched<[WriteAdr, WriteSTP]>; -} // hasSideEffects = 0 - -// (post-indexed) - -class BaseLoadStorePairPostIdx<bits<2> opc, bit V, bit L, dag oops, dag iops, - string asm> - : I<oops, iops, asm, "\t$Rt, $Rt2, $addr, $idx", "", []> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b001; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairPostIdx<bits<2> opc, bit V, RegisterClass regtype, - Operand idxtype, string asm> - : BaseLoadStorePairPostIdx<opc, V, 1, - (outs regtype:$Rt, regtype:$Rt2), - (ins am_noindex:$addr, idxtype:$idx), asm>, - Sched<[WriteLD, WriteLDHi, WriteAdr]>; - -let mayStore = 1, mayLoad = 0 in -class StorePairPostIdx<bits<2> opc, bit V, RegisterClass regtype, - Operand idxtype, string asm> - : BaseLoadStorePairPostIdx<opc, V, 0, (outs), - (ins regtype:$Rt, regtype:$Rt2, - am_noindex:$addr, idxtype:$idx), - asm>, - Sched<[WriteAdr, WriteSTP]>; -} // hasSideEffects = 0 - -// (no-allocate) - -class BaseLoadStorePairNoAlloc<bits<2> opc, bit V, bit L, dag oops, dag iops, - string asm> - : I<oops, iops, asm, "\t$Rt, $Rt2, $addr", "", []> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b000; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairNoAlloc<bits<2> opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairNoAlloc<opc, V, 1, - (outs regtype:$Rt, regtype:$Rt2), - (ins indextype:$addr), asm>, - Sched<[WriteLD, WriteLDHi]>; - -let mayStore = 1, mayLoad = 0 in -class StorePairNoAlloc<bits<2> opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairNoAlloc<opc, V, 0, (outs), - (ins regtype:$Rt, regtype:$Rt2, indextype:$addr), - asm>, - Sched<[WriteSTP]>; -} // hasSideEffects = 0 - -//--- -// Load/store exclusive -//--- - -// True exclusive operations write to and/or read from the system's exclusive -// monitors, which as far as a compiler is concerned can be modelled as a -// random shared memory address. Hence LoadExclusive mayStore. -let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in -class BaseLoadStoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0, - dag oops, dag iops, string asm, string operands> - : I<oops, iops, asm, operands, "", []> { - let Inst{31-30} = sz; - let Inst{29-24} = 0b001000; - let Inst{23} = o2; - let Inst{22} = L; - let Inst{21} = o1; - let Inst{15} = o0; - - let DecoderMethod = "DecodeExclusiveLdStInstruction"; -} - -// Neither Rs nor Rt2 operands. -class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0, - dag oops, dag iops, string asm, string operands> - : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> { - bits<5> reg; - bits<5> base; - let Inst{20-16} = 0b11111; - let Inst{14-10} = 0b11111; - let Inst{9-5} = base; - let Inst{4-0} = reg; -} - -// Simple load acquires don't set the exclusive monitor -let mayLoad = 1, mayStore = 0 in -class LoadAcquire<bits<2> sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt), - (ins am_noindex:$addr), asm, "\t$Rt, $addr">, - Sched<[WriteLD]>; - -class LoadExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt), - (ins am_noindex:$addr), asm, "\t$Rt, $addr">, - Sched<[WriteLD]>; - -class LoadExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : BaseLoadStoreExclusive<sz, o2, L, o1, o0, - (outs regtype:$Rt, regtype:$Rt2), - (ins am_noindex:$addr), asm, - "\t$Rt, $Rt2, $addr">, - Sched<[WriteLD, WriteLDHi]> { - bits<5> dst1; - bits<5> dst2; - bits<5> base; - let Inst{20-16} = 0b11111; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst1; -} - -// Simple store release operations do not check the exclusive monitor. -let mayLoad = 0, mayStore = 1 in -class StoreRelease<bits<2> sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs), - (ins regtype:$Rt, am_noindex:$addr), - asm, "\t$Rt, $addr">, - Sched<[WriteST]>; - -let mayLoad = 1, mayStore = 1 in -class StoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : BaseLoadStoreExclusive<sz, o2, L, o1, o0, (outs GPR32:$Ws), - (ins regtype:$Rt, am_noindex:$addr), - asm, "\t$Ws, $Rt, $addr">, - Sched<[WriteSTX]> { - bits<5> status; - bits<5> reg; - bits<5> base; - let Inst{20-16} = status; - let Inst{14-10} = 0b11111; - let Inst{9-5} = base; - let Inst{4-0} = reg; - - let Constraints = "@earlyclobber $Ws"; -} - -class StoreExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : BaseLoadStoreExclusive<sz, o2, L, o1, o0, - (outs GPR32:$Ws), - (ins regtype:$Rt, regtype:$Rt2, am_noindex:$addr), - asm, "\t$Ws, $Rt, $Rt2, $addr">, - Sched<[WriteSTX]> { - bits<5> status; - bits<5> dst1; - bits<5> dst2; - bits<5> base; - let Inst{20-16} = status; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst1; - - let Constraints = "@earlyclobber $Ws"; -} - -//--- -// Exception generation -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm> - : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>, - Sched<[WriteSys]> { - bits<16> imm; - let Inst{31-24} = 0b11010100; - let Inst{23-21} = op1; - let Inst{20-5} = imm; - let Inst{4-2} = 0b000; - let Inst{1-0} = ll; -} - -//--- -// Floating point to integer conversion -//--- - -class BaseFPToIntegerUnscaled<bits<2> type, bits<2> rmode, bits<3> opcode, - RegisterClass srcType, RegisterClass dstType, - string asm, list<dag> pattern> - : I<(outs dstType:$Rd), (ins srcType:$Rn), - asm, "\t$Rd, $Rn", "", pattern>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30} = 0; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode, - RegisterClass srcType, RegisterClass dstType, - Operand immType, string asm> - : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), - asm, "\t$Rd, $Rn, $scale", "", []>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - bits<6> scale; - let Inst{30} = 0; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = scale; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass FPToInteger<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator OpN> { - // Unscaled single-precision to 32-bit - def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm, - [(set GPR32:$Rd, (OpN FPR32:$Rn))]> { - let Inst{31} = 0; // 32-bit GPR flag - } - - // Unscaled single-precision to 64-bit - def UXSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR64, asm, - [(set GPR64:$Rd, (OpN FPR32:$Rn))]> { - let Inst{31} = 1; // 64-bit GPR flag - } - - // Unscaled double-precision to 32-bit - def UWDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR32, asm, - [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn)))]> { - let Inst{31} = 0; // 32-bit GPR flag - } - - // Unscaled double-precision to 64-bit - def UXDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR64, asm, - [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn)))]> { - let Inst{31} = 1; // 64-bit GPR flag - } - - // Scaled single-precision to 32-bit - def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32, - fixedpoint32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - } - - // Scaled single-precision to 64-bit - def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64, - fixedpoint64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - } - - // Scaled double-precision to 32-bit - def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32, - fixedpoint32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - } - - // Scaled double-precision to 64-bit - def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64, - fixedpoint64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - } -} - -//--- -// Integer to floating point conversion -//--- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseIntegerToFP<bit isUnsigned, - RegisterClass srcType, RegisterClass dstType, - Operand immType, string asm> - : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), - asm, "\t$Rd, $Rn, $scale", "", []>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - bits<6> scale; - let Inst{30-23} = 0b00111100; - let Inst{21-17} = 0b00001; - let Inst{16} = isUnsigned; - let Inst{15-10} = scale; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseIntegerToFPUnscaled<bit isUnsigned, - RegisterClass srcType, RegisterClass dstType, - ValueType dvt, string asm, SDNode node> - : I<(outs dstType:$Rd), (ins srcType:$Rn), - asm, "\t$Rd, $Rn", "", [(set (dvt dstType:$Rd), (node srcType:$Rn))]>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - bits<6> scale; - let Inst{30-23} = 0b00111100; - let Inst{21-17} = 0b10001; - let Inst{16} = isUnsigned; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> { - // Unscaled - def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } - - def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } - - // Scaled - def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } - - def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } -} - -//--- -// Unscaled integer <-> floating point conversion (i.e. FMOV) -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseUnscaledConversion<bits<2> rmode, bits<3> opcode, - RegisterClass srcType, RegisterClass dstType, - string asm> - : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", - // We use COPY_TO_REGCLASS for these bitconvert operations. - // copyPhysReg() expands the resultant COPY instructions after - // regalloc is done. This gives greater freedom for the allocator - // and related passes (coalescing, copy propagation, et. al.) to - // be more effective. - [/*(set (dvt dstType:$Rd), (bitconvert (svt srcType:$Rn)))*/]>, - Sched<[WriteFCopy]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30-23} = 0b00111100; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseUnscaledConversionToHigh<bits<2> rmode, bits<3> opcode, - RegisterClass srcType, RegisterOperand dstType, string asm> - : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd[1], $Rn", "", []>, - Sched<[WriteFCopy]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30-23} = 0b00111101; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseUnscaledConversionFromHigh<bits<2> rmode, bits<3> opcode, - RegisterOperand srcType, RegisterClass dstType, string asm> - : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn[1]", "", []>, - Sched<[WriteFCopy]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30-23} = 0b00111101; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - - -multiclass UnscaledConversion<string asm> { - def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } - - def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } - - def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128, - asm#".d"> { - let Inst{31} = 1; - let Inst{22} = 0; - } - - def DXHighr : BaseUnscaledConversionFromHigh<0b01, 0b110, V128, GPR64, - asm#".d"> { - let Inst{31} = 1; - let Inst{22} = 0; - } - - def : InstAlias<asm#"$Vd.d[1], $Rn", - (!cast<Instruction>(NAME#XDHighr) V128:$Vd, GPR64:$Rn), 0>; - def : InstAlias<asm#"$Rd, $Vn.d[1]", - (!cast<Instruction>(NAME#DXHighr) GPR64:$Rd, V128:$Vn), 0>; -} - -//--- -// Floating point conversion -//--- - -class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType, - RegisterClass srcType, string asm, list<dag> pattern> - : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-24} = 0b00011110; - let Inst{23-22} = type; - let Inst{21-17} = 0b10001; - let Inst{16-15} = opcode; - let Inst{14-10} = 0b10000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass FPConversion<string asm> { - // Double-precision to Half-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, []>; - - // Double-precision to Single-precision - def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm, - [(set FPR32:$Rd, (fround FPR64:$Rn))]>; - - // Half-precision to Double-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, []>; - - // Half-precision to Single-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, []>; - - // Single-precision to Double-precision - def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, - [(set FPR64:$Rd, (fextend FPR32:$Rn))]>; - - // Single-precision to Half-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, []>; -} - -//--- -// Single operand floating point data processing -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype, - ValueType vt, string asm, SDPatternOperator node> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", - [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>, - Sched<[WriteF]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-23} = 0b000111100; - let Inst{21-19} = 0b100; - let Inst{18-15} = opcode; - let Inst{14-10} = 0b10000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SingleOperandFPData<bits<4> opcode, string asm, - SDPatternOperator node = null_frag> { - def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> { - let Inst{22} = 0; // 32-bit size flag - } - - def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> { - let Inst{22} = 1; // 64-bit size flag - } -} - -//--- -// Two operand floating point data processing -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype, - string asm, list<dag> pat> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", pat>, - Sched<[WriteF]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass TwoOperandFPData<bits<4> opcode, string asm, - SDPatternOperator node = null_frag> { - def Srr : BaseTwoOperandFPData<opcode, FPR32, asm, - [(set (f32 FPR32:$Rd), - (node (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]> { - let Inst{22} = 0; // 32-bit size flag - } - - def Drr : BaseTwoOperandFPData<opcode, FPR64, asm, - [(set (f64 FPR64:$Rd), - (node (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]> { - let Inst{22} = 1; // 64-bit size flag - } -} - -multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> { - def Srr : BaseTwoOperandFPData<opcode, FPR32, asm, - [(set FPR32:$Rd, (fneg (node FPR32:$Rn, (f32 FPR32:$Rm))))]> { - let Inst{22} = 0; // 32-bit size flag - } - - def Drr : BaseTwoOperandFPData<opcode, FPR64, asm, - [(set FPR64:$Rd, (fneg (node FPR64:$Rn, (f64 FPR64:$Rm))))]> { - let Inst{22} = 1; // 64-bit size flag - } -} - - -//--- -// Three operand floating point data processing -//--- - -class BaseThreeOperandFPData<bit isNegated, bit isSub, - RegisterClass regtype, string asm, list<dag> pat> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra), - asm, "\t$Rd, $Rn, $Rm, $Ra", "", pat>, - Sched<[WriteFMul]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<5> Ra; - let Inst{31-23} = 0b000111110; - let Inst{21} = isNegated; - let Inst{20-16} = Rm; - let Inst{15} = isSub; - let Inst{14-10} = Ra; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm, - SDPatternOperator node> { - def Srrr : BaseThreeOperandFPData<isNegated, isSub, FPR32, asm, - [(set FPR32:$Rd, - (node (f32 FPR32:$Rn), (f32 FPR32:$Rm), (f32 FPR32:$Ra)))]> { - let Inst{22} = 0; // 32-bit size flag - } - - def Drrr : BaseThreeOperandFPData<isNegated, isSub, FPR64, asm, - [(set FPR64:$Rd, - (node (f64 FPR64:$Rn), (f64 FPR64:$Rm), (f64 FPR64:$Ra)))]> { - let Inst{22} = 1; // 64-bit size flag - } -} - -//--- -// Floating point data comparisons -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseOneOperandFPComparison<bit signalAllNans, - RegisterClass regtype, string asm, - list<dag> pat> - : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>, - Sched<[WriteFCmp]> { - bits<5> Rn; - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - - let Inst{20-16} = 0b00000; - let Inst{15-10} = 0b001000; - let Inst{9-5} = Rn; - let Inst{4} = signalAllNans; - let Inst{3-0} = 0b1000; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype, - string asm, list<dag> pat> - : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>, - Sched<[WriteFCmp]> { - bits<5> Rm; - bits<5> Rn; - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-10} = 0b001000; - let Inst{9-5} = Rn; - let Inst{4} = signalAllNans; - let Inst{3-0} = 0b0000; -} - -multiclass FPComparison<bit signalAllNans, string asm, - SDPatternOperator OpNode = null_frag> { - let Defs = [CPSR] in { - def Srr : BaseTwoOperandFPComparison<signalAllNans, FPR32, asm, - [(OpNode FPR32:$Rn, (f32 FPR32:$Rm)), (implicit CPSR)]> { - let Inst{22} = 0; - } - - def Sri : BaseOneOperandFPComparison<signalAllNans, FPR32, asm, - [(OpNode (f32 FPR32:$Rn), fpimm0), (implicit CPSR)]> { - let Inst{22} = 0; - } - - def Drr : BaseTwoOperandFPComparison<signalAllNans, FPR64, asm, - [(OpNode FPR64:$Rn, (f64 FPR64:$Rm)), (implicit CPSR)]> { - let Inst{22} = 1; - } - - def Dri : BaseOneOperandFPComparison<signalAllNans, FPR64, asm, - [(OpNode (f64 FPR64:$Rn), fpimm0), (implicit CPSR)]> { - let Inst{22} = 1; - } - } // Defs = [CPSR] -} - -//--- -// Floating point conditional comparisons -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseFPCondComparison<bit signalAllNans, - RegisterClass regtype, string asm> - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, - Sched<[WriteFCmp]> { - bits<5> Rn; - bits<5> Rm; - bits<4> nzcv; - bits<4> cond; - - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b01; - let Inst{9-5} = Rn; - let Inst{4} = signalAllNans; - let Inst{3-0} = nzcv; -} - -multiclass FPCondComparison<bit signalAllNans, string asm> { - let Defs = [CPSR], Uses = [CPSR] in { - def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> { - let Inst{22} = 0; - } - - def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> { - let Inst{22} = 1; - } - } // Defs = [CPSR], Uses = [CPSR] -} - -//--- -// Floating point conditional select -//--- - -class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), - asm, "\t$Rd, $Rn, $Rm, $cond", "", - [(set regtype:$Rd, - (ARM64csel (vt regtype:$Rn), regtype:$Rm, - (i32 imm:$cond), CPSR))]>, - Sched<[WriteF]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> cond; - - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b11; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass FPCondSelect<string asm> { - let Uses = [CPSR] in { - def Srrr : BaseFPCondSelect<FPR32, f32, asm> { - let Inst{22} = 0; - } - - def Drrr : BaseFPCondSelect<FPR64, f64, asm> { - let Inst{22} = 1; - } - } // Uses = [CPSR] -} - -//--- -// Floating move immediate -//--- - -class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm> - : I<(outs regtype:$Rd), (ins fpimmtype:$imm), asm, "\t$Rd, $imm", "", - [(set regtype:$Rd, fpimmtype:$imm)]>, - Sched<[WriteFImm]> { - bits<5> Rd; - bits<8> imm; - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - let Inst{20-13} = imm; - let Inst{12-5} = 0b10000000; - let Inst{4-0} = Rd; -} - -multiclass FPMoveImmediate<string asm> { - def Si : BaseFPMoveImmediate<FPR32, fpimm32, asm> { - let Inst{22} = 0; - } - - def Di : BaseFPMoveImmediate<FPR64, fpimm64, asm> { - let Inst{22} = 1; - } -} - -//---------------------------------------------------------------------------- -// AdvSIMD -//---------------------------------------------------------------------------- - -def VectorIndexBOperand : AsmOperandClass { let Name = "VectorIndexB"; } -def VectorIndexHOperand : AsmOperandClass { let Name = "VectorIndexH"; } -def VectorIndexSOperand : AsmOperandClass { let Name = "VectorIndexS"; } -def VectorIndexDOperand : AsmOperandClass { let Name = "VectorIndexD"; } -def VectorIndexB : Operand<i64>, ImmLeaf<i64, [{ - return ((uint64_t)Imm) < 16; -}]> { - let ParserMatchClass = VectorIndexBOperand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i64imm); -} -def VectorIndexH : Operand<i64>, ImmLeaf<i64, [{ - return ((uint64_t)Imm) < 8; -}]> { - let ParserMatchClass = VectorIndexHOperand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i64imm); -} -def VectorIndexS : Operand<i64>, ImmLeaf<i64, [{ - return ((uint64_t)Imm) < 4; -}]> { - let ParserMatchClass = VectorIndexSOperand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i64imm); -} -def VectorIndexD : Operand<i64>, ImmLeaf<i64, [{ - return ((uint64_t)Imm) < 2; -}]> { - let ParserMatchClass = VectorIndexDOperand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i64imm); -} - -//---------------------------------------------------------------------------- -// AdvSIMD three register vector instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode, - RegisterOperand regtype, string asm, string kind, - list<dag> pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # - "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode, - RegisterOperand regtype, string asm, string kind, - list<dag> pattern> - : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # - "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// All operand sizes distinguished in the encoding. -multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, - asm, ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, - asm, ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, - asm, ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, - asm, ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; - def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128, - asm, ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; -} - -// As above, but D sized elements unsupported. -multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, - asm, ".8b", - [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, - asm, ".16b", - [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, - asm, ".4h", - [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, - asm, ".8h", - [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, - asm, ".2s", - [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, - asm, ".4s", - [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>; -} - -multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64, - asm, ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128, - asm, ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64, - asm, ".4h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128, - asm, ".8h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; -} - -// As above, but only B sized elements supported. -multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, - asm, ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, - asm, ".16b", - [(set (v16i8 V128:$Rd), - (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; -} - -// As above, but only S and D sized floating point elements supported. -multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<5> opc, - string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, - asm, ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; -} - -multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<5> opc, - string asm, - SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, - asm, ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; -} - -multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc, - string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64, - asm, ".2s", - [(set (v2f32 V64:$dst), - (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128, - asm, ".4s", - [(set (v4f32 V128:$dst), - (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128, - asm, ".2d", - [(set (v2f64 V128:$dst), - (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; -} - -// As above, but D and B sized elements unsupported. -multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, - asm, ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, - asm, ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; -} - -// Logical three vector ops share opcode bits, and only use B sized elements. -multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64, - asm, ".8b", - [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128, - asm, ".16b", - [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>; - - def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)), - (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>; - def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)), - (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>; - def : Pat<(v1i64 (OpNode V64:$LHS, V64:$RHS)), - (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>; - - def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)), - (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>; - def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)), - (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>; - def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)), - (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>; -} - -multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size, - string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64, - asm, ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128, - asm, ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), - (v16i8 V128:$Rm)))]>; - - def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS), - (v4i16 V64:$RHS))), - (!cast<Instruction>(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS), - (v2i32 V64:$RHS))), - (!cast<Instruction>(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS), - (v1i64 V64:$RHS))), - (!cast<Instruction>(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - - def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS), - (v8i16 V128:$RHS))), - (!cast<Instruction>(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; - def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS), - (v4i32 V128:$RHS))), - (!cast<Instruction>(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; - def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS), - (v2i64 V128:$RHS))), - (!cast<Instruction>(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; -} - - -//---------------------------------------------------------------------------- -// AdvSIMD two register vector instructions. -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode, - RegisterOperand regtype, string asm, string dstkind, - string srckind, list<dag> pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, - "{\t$Rd" # dstkind # ", $Rn" # srckind # - "|" # dstkind # "\t$Rd, $Rn}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode, - RegisterOperand regtype, string asm, string dstkind, - string srckind, list<dag> pattern> - : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm, - "{\t$Rd" # dstkind # ", $Rn" # srckind # - "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// Supports B, H, and S element sizes. -multiclass SIMDTwoVectorBHS<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; -} - -class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size, - RegisterOperand regtype, string asm, string dstkind, - string srckind, string amount> - : I<(outs V128:$Rd), (ins regtype:$Rn), asm, - "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount # - "|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-24} = 0b101110; - let Inst{23-22} = size; - let Inst{21-10} = 0b100001001110; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDVectorLShiftLongBySizeBHS { - let neverHasSideEffects = 1 in { - def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64, - "shll", ".8h", ".8b", "8">; - def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128, - "shll2", ".8h", ".16b", "8">; - def v4i16 : BaseSIMDVectorLShiftLongBySize<0, 0b01, V64, - "shll", ".4s", ".4h", "16">; - def v8i16 : BaseSIMDVectorLShiftLongBySize<1, 0b01, V128, - "shll2", ".4s", ".8h", "16">; - def v2i32 : BaseSIMDVectorLShiftLongBySize<0, 0b10, V64, - "shll", ".2d", ".2s", "32">; - def v4i32 : BaseSIMDVectorLShiftLongBySize<1, 0b10, V128, - "shll2", ".2d", ".4s", "32">; - } -} - -// Supports all element sizes. -multiclass SIMDLongTwoVector<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, - asm, ".4h", ".8b", - [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, - asm, ".8h", ".16b", - [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, - asm, ".2s", ".4h", - [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, - asm, ".4s", ".8h", - [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, - asm, ".1d", ".2s", - [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, - asm, ".2d", ".4s", - [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; -} - -multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, - asm, ".4h", ".8b", - [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), - (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, - asm, ".8h", ".16b", - [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), - (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, - asm, ".2s", ".4h", - [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), - (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, - asm, ".4s", ".8h", - [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), - (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, - asm, ".1d", ".2s", - [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd), - (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, - asm, ".2d", ".4s", - [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), - (v4i32 V128:$Rn)))]>; -} - -// Supports all element sizes, except 1xD. -multiclass SIMDTwoVectorBHSDTied<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128, - asm, ".2d", ".2d", - [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>; -} - -multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; -} - - -// Supports only B element sizes. -multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - -} - -// Supports only B and H element sizes. -multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>; -} - -// Supports only S and D element sizes, uses high bit of the size field -// as an extra opcode bit. -multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, - asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; -} - -// Supports only S element size. -multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; -} - - -multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; -} - -multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, - asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; -} - - -class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode, - RegisterOperand inreg, RegisterOperand outreg, - string asm, string outkind, string inkind, - list<dag> pattern> - : I<(outs outreg:$Rd), (ins inreg:$Rn), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind # - "|" # outkind # "\t$Rd, $Rn}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode, - RegisterOperand inreg, RegisterOperand outreg, - string asm, string outkind, string inkind, - list<dag> pattern> - : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind # - "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDMixedTwoVector<0, U, 0b00, opc, V128, V64, - asm, ".8b", ".8h", - [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v16i8 : BaseSIMDMixedTwoVectorTied<1, U, 0b00, opc, V128, V128, - asm#"2", ".16b", ".8h", []>; - def v4i16 : BaseSIMDMixedTwoVector<0, U, 0b01, opc, V128, V64, - asm, ".4h", ".4s", - [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v8i16 : BaseSIMDMixedTwoVectorTied<1, U, 0b01, opc, V128, V128, - asm#"2", ".8h", ".4s", []>; - def v2i32 : BaseSIMDMixedTwoVector<0, U, 0b10, opc, V128, V64, - asm, ".2s", ".2d", - [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn)))]>; - def v4i32 : BaseSIMDMixedTwoVectorTied<1, U, 0b10, opc, V128, V128, - asm#"2", ".4s", ".2d", []>; - - def : Pat<(concat_vectors (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn))), - (!cast<Instruction>(NAME # "v16i8") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; - def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn))), - (!cast<Instruction>(NAME # "v8i16") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; - def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn))), - (!cast<Instruction>(NAME # "v4i32") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -} - -class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode, - RegisterOperand regtype, string asm, string kind, - ValueType dty, ValueType sty, SDNode OpNode> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", #0" # - "|" # kind # "\t$Rd, $Rn, #0}", "", - [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// Comparisons support all element sizes, except 1xD. -multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm, - SDNode OpNode> { - def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64, - asm, ".8b", - v8i8, v8i8, OpNode>; - def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128, - asm, ".16b", - v16i8, v16i8, OpNode>; - def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64, - asm, ".4h", - v4i16, v4i16, OpNode>; - def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128, - asm, ".8h", - v8i16, v8i16, OpNode>; - def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64, - asm, ".2s", - v2i32, v2i32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128, - asm, ".4s", - v4i32, v4i32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128, - asm, ".2d", - v2i64, v2i64, OpNode>; -} - -// FP Comparisons support only S and D element sizes. -multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc, - string asm, SDNode OpNode> { - def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64, - asm, ".2s", - v2i32, v2f32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128, - asm, ".4s", - v4i32, v4f32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128, - asm, ".2d", - v2i64, v2f64, OpNode>; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode, - RegisterOperand outtype, RegisterOperand intype, - string asm, string VdTy, string VnTy, - list<dag> pattern> - : I<(outs outtype:$Rd), (ins intype:$Rn), asm, - !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode, - RegisterOperand outtype, RegisterOperand intype, - string asm, string VdTy, string VnTy, - list<dag> pattern> - : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm, - !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDFPWidenTwoVector<bit U, bit S, bits<5> opc, string asm> { - def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V128, V64, - asm, ".4s", ".4h", []>; - def v8i16 : BaseSIMDFPCvtTwoVector<1, U, {S,0}, opc, V128, V128, - asm#"2", ".4s", ".8h", []>; - def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V128, V64, - asm, ".2d", ".2s", []>; - def v4i32 : BaseSIMDFPCvtTwoVector<1, U, {S,1}, opc, V128, V128, - asm#"2", ".2d", ".4s", []>; -} - -multiclass SIMDFPNarrowTwoVector<bit U, bit S, bits<5> opc, string asm> { - def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V64, V128, - asm, ".4h", ".4s", []>; - def v8i16 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,0}, opc, V128, V128, - asm#"2", ".8h", ".4s", []>; - def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, - asm, ".2s", ".2d", []>; - def v4i32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, - asm#"2", ".4s", ".2d", []>; -} - -multiclass SIMDFPInexactCvtTwoVector<bit U, bit S, bits<5> opc, string asm, - Intrinsic OpNode> { - def v2f32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, - asm, ".2s", ".2d", - [(set (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn)))]>; - def v4f32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, - asm#"2", ".4s", ".2d", []>; - - def : Pat<(concat_vectors (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn))), - (!cast<Instruction>(NAME # "v4f32") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD three register different-size vector instructions. -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode, - RegisterOperand outtype, RegisterOperand intype1, - RegisterOperand intype2, string asm, - string outkind, string inkind1, string inkind2, - list<dag> pattern> - : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # - "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = size{0}; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size{2-1}; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = opcode; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode, - RegisterOperand outtype, RegisterOperand intype1, - RegisterOperand intype2, string asm, - string outkind, string inkind1, string inkind2, - list<dag> pattern> - : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # - "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = size{0}; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size{2-1}; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = opcode; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// FIXME: TableGen doesn't know how to deal with expanded types that also -// change the element count (in this case, placing the results in -// the high elements of the result register rather than the low -// elements). Until that's fixed, we can't code-gen those. -multiclass SIMDNarrowThreeVectorBHS<bit U, bits<4> opc, string asm, - Intrinsic IntOp> { - def v8i16_v8i8 : BaseSIMDDifferentThreeVector<U, 0b000, opc, - V64, V128, V128, - asm, ".8b", ".8h", ".8h", - [(set (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v8i16_v16i8 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc, - V128, V128, V128, - asm#"2", ".16b", ".8h", ".8h", - []>; - def v4i32_v4i16 : BaseSIMDDifferentThreeVector<U, 0b010, opc, - V64, V128, V128, - asm, ".4h", ".4s", ".4s", - [(set (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; - def v4i32_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc, - V128, V128, V128, - asm#"2", ".8h", ".4s", ".4s", - []>; - def v2i64_v2i32 : BaseSIMDDifferentThreeVector<U, 0b100, opc, - V64, V128, V128, - asm, ".2s", ".2d", ".2d", - [(set (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; - def v2i64_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc, - V128, V128, V128, - asm#"2", ".4s", ".2d", ".2d", - []>; - - - // Patterns for the '2' variants involve INSERT_SUBREG, which you can't put in - // a version attached to an instruction. - def : Pat<(concat_vectors (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), - (v8i16 V128:$Rm))), - (!cast<Instruction>(NAME # "v8i16_v16i8") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - def : Pat<(concat_vectors (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), - (v4i32 V128:$Rm))), - (!cast<Instruction>(NAME # "v4i32_v8i16") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - def : Pat<(concat_vectors (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), - (v2i64 V128:$Rm))), - (!cast<Instruction>(NAME # "v2i64_v4i32") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -} - -multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm, - Intrinsic IntOp> { - def v8i8 : BaseSIMDDifferentThreeVector<U, 0b000, opc, - V128, V64, V64, - asm, ".8h", ".8b", ".8b", - [(set (v8i16 V128:$Rd), (IntOp (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDDifferentThreeVector<U, 0b001, opc, - V128, V128, V128, - asm#"2", ".8h", ".16b", ".16b", []>; - def v1i64 : BaseSIMDDifferentThreeVector<U, 0b110, opc, - V128, V64, V64, - asm, ".1q", ".1d", ".1d", []>; - def v2i64 : BaseSIMDDifferentThreeVector<U, 0b111, opc, - V128, V128, V128, - asm#"2", ".1q", ".2d", ".2d", []>; - - def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)), - (v8i8 (extract_high_v16i8 V128:$Rm)))), - (!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>; -} - -multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode> { - def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc, - V128, V64, V64, - asm, ".4s", ".4h", ".4h", - [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc, - V128, V128, V128, - asm#"2", ".4s", ".8h", ".8h", - [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm)))]>; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc, - V128, V64, V64, - asm, ".2d", ".2s", ".2s", - [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc, - V128, V128, V128, - asm#"2", ".2d", ".4s", ".4s", - [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm)))]>; -} - -multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc, - V128, V64, V64, - asm, ".8h", ".8b", ".8b", - [(set (v8i16 V128:$Rd), - (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))))]>; - def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc, - V128, V128, V128, - asm#"2", ".8h", ".16b", ".16b", - [(set (v8i16 V128:$Rd), - (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn), - (extract_high_v16i8 V128:$Rm)))))]>; - def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc, - V128, V64, V64, - asm, ".4s", ".4h", ".4h", - [(set (v4i32 V128:$Rd), - (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))))]>; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc, - V128, V128, V128, - asm#"2", ".4s", ".8h", ".8h", - [(set (v4i32 V128:$Rd), - (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm)))))]>; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc, - V128, V64, V64, - asm, ".2d", ".2s", ".2s", - [(set (v2i64 V128:$Rd), - (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))))]>; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc, - V128, V128, V128, - asm#"2", ".2d", ".4s", ".4s", - [(set (v2i64 V128:$Rd), - (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm)))))]>; -} - -multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc, - string asm, - SDPatternOperator OpNode> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc, - V128, V64, V64, - asm, ".8h", ".8b", ".8b", - [(set (v8i16 V128:$dst), - (add (v8i16 V128:$Rd), - (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))))]>; - def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc, - V128, V128, V128, - asm#"2", ".8h", ".16b", ".16b", - [(set (v8i16 V128:$dst), - (add (v8i16 V128:$Rd), - (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn), - (extract_high_v16i8 V128:$Rm))))))]>; - def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc, - V128, V64, V64, - asm, ".4s", ".4h", ".4h", - [(set (v4i32 V128:$dst), - (add (v4i32 V128:$Rd), - (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))))]>; - def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc, - V128, V128, V128, - asm#"2", ".4s", ".8h", ".8h", - [(set (v4i32 V128:$dst), - (add (v4i32 V128:$Rd), - (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm))))))]>; - def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc, - V128, V64, V64, - asm, ".2d", ".2s", ".2s", - [(set (v2i64 V128:$dst), - (add (v2i64 V128:$Rd), - (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))))]>; - def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc, - V128, V128, V128, - asm#"2", ".2d", ".4s", ".4s", - [(set (v2i64 V128:$dst), - (add (v2i64 V128:$Rd), - (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm))))))]>; -} - -multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc, - V128, V64, V64, - asm, ".8h", ".8b", ".8b", - [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc, - V128, V128, V128, - asm#"2", ".8h", ".16b", ".16b", - [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 V128:$Rn), - (extract_high_v16i8 V128:$Rm)))]>; - def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc, - V128, V64, V64, - asm, ".4s", ".4h", ".4h", - [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc, - V128, V128, V128, - asm#"2", ".4s", ".8h", ".8h", - [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm)))]>; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc, - V128, V64, V64, - asm, ".2d", ".2s", ".2s", - [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc, - V128, V128, V128, - asm#"2", ".2d", ".4s", ".4s", - [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm)))]>; -} - -multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc, - string asm, - SDPatternOperator OpNode> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc, - V128, V64, V64, - asm, ".8h", ".8b", ".8b", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc, - V128, V128, V128, - asm#"2", ".8h", ".16b", ".16b", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), - (extract_high_v16i8 V128:$Rn), - (extract_high_v16i8 V128:$Rm)))]>; - def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc, - V128, V64, V64, - asm, ".4s", ".4h", ".4h", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc, - V128, V128, V128, - asm#"2", ".4s", ".8h", ".8h", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), - (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm)))]>; - def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc, - V128, V64, V64, - asm, ".2d", ".2s", ".2s", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc, - V128, V128, V128, - asm#"2", ".2d", ".4s", ".4s", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), - (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm)))]>; -} - -multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm, - SDPatternOperator Accum> { - def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc, - V128, V64, V64, - asm, ".4s", ".4h", ".4h", - [(set (v4i32 V128:$dst), - (Accum (v4i32 V128:$Rd), - (v4i32 (int_arm64_neon_sqdmull (v4i16 V64:$Rn), - (v4i16 V64:$Rm)))))]>; - def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc, - V128, V128, V128, - asm#"2", ".4s", ".8h", ".8h", - [(set (v4i32 V128:$dst), - (Accum (v4i32 V128:$Rd), - (v4i32 (int_arm64_neon_sqdmull (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 V128:$Rm)))))]>; - def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc, - V128, V64, V64, - asm, ".2d", ".2s", ".2s", - [(set (v2i64 V128:$dst), - (Accum (v2i64 V128:$Rd), - (v2i64 (int_arm64_neon_sqdmull (v2i32 V64:$Rn), - (v2i32 V64:$Rm)))))]>; - def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc, - V128, V128, V128, - asm#"2", ".2d", ".4s", ".4s", - [(set (v2i64 V128:$dst), - (Accum (v2i64 V128:$Rd), - (v2i64 (int_arm64_neon_sqdmull (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 V128:$Rm)))))]>; -} - -multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc, - V128, V128, V64, - asm, ".8h", ".8h", ".8b", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc, - V128, V128, V128, - asm#"2", ".8h", ".8h", ".16b", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), - (extract_high_v16i8 V128:$Rm)))]>; - def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc, - V128, V128, V64, - asm, ".4s", ".4s", ".4h", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc, - V128, V128, V128, - asm#"2", ".4s", ".4s", ".8h", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), - (extract_high_v8i16 V128:$Rm)))]>; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc, - V128, V128, V64, - asm, ".2d", ".2d", ".2s", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc, - V128, V128, V128, - asm#"2", ".2d", ".2d", ".4s", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), - (extract_high_v4i32 V128:$Rm)))]>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD bitwise extract from vector -//---------------------------------------------------------------------------- - -class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty, - string asm, string kind> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, i32imm:$imm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" # - "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "", - [(set (vty regtype:$Rd), - (ARM64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> imm; - let Inst{31} = 0; - let Inst{30} = size; - let Inst{29-21} = 0b101110000; - let Inst{20-16} = Rm; - let Inst{15} = 0; - let Inst{14-11} = imm; - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - -multiclass SIMDBitwiseExtract<string asm> { - def v8i8 : BaseSIMDBitwiseExtract<0, V64, v8i8, asm, ".8b">; - def v16i8 : BaseSIMDBitwiseExtract<1, V128, v16i8, asm, ".16b">; -} - -//---------------------------------------------------------------------------- -// AdvSIMD zip vector -//---------------------------------------------------------------------------- - -class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype, - string asm, string kind, SDNode OpNode, ValueType valty> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # - "|" # kind # "\t$Rd, $Rn, $Rm}", "", - [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = size{0}; - let Inst{29-24} = 0b001110; - let Inst{23-22} = size{2-1}; - let Inst{21} = 0; - let Inst{20-16} = Rm; - let Inst{15} = 0; - let Inst{14-12} = opc; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDZipVector<bits<3>opc, string asm, - SDNode OpNode> { - def v8i8 : BaseSIMDZipVector<0b000, opc, V64, - asm, ".8b", OpNode, v8i8>; - def v16i8 : BaseSIMDZipVector<0b001, opc, V128, - asm, ".16b", OpNode, v16i8>; - def v4i16 : BaseSIMDZipVector<0b010, opc, V64, - asm, ".4h", OpNode, v4i16>; - def v8i16 : BaseSIMDZipVector<0b011, opc, V128, - asm, ".8h", OpNode, v8i16>; - def v2i32 : BaseSIMDZipVector<0b100, opc, V64, - asm, ".2s", OpNode, v2i32>; - def v4i32 : BaseSIMDZipVector<0b101, opc, V128, - asm, ".4s", OpNode, v4i32>; - def v2i64 : BaseSIMDZipVector<0b111, opc, V128, - asm, ".2d", OpNode, v2i64>; - - def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)), - (!cast<Instruction>(NAME#"v2i32") V64:$Rn, V64:$Rm)>; - def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)), - (!cast<Instruction>(NAME#"v4i32") V128:$Rn, V128:$Rm)>; - def : Pat<(v2f64 (OpNode V128:$Rn, V128:$Rm)), - (!cast<Instruction>(NAME#"v2i64") V128:$Rn, V128:$Rm)>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD three register scalar instructions -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode, - RegisterClass regtype, string asm, - list<dag> pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, - "\t$Rd, $Rn, $Rm", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm, - [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>; -} - -multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm, - [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>; - def v1i32 : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm, []>; - def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>; - def v1i8 : BaseSIMDThreeScalar<U, 0b00, opc, FPR8 , asm, []>; - - def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; - def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))), - (!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v1i32 : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm, - [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>; - def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>; -} - -multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm, - [(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>; - def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm, - [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>; - } - - def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; -} - -multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm, - [(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>; - def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm, - [(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>; - } - - def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; -} - -class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode, - dag oops, dag iops, string asm, string cstr, list<dag> pat> - : I<oops, iops, asm, - "\t$Rd, $Rn, $Rm", cstr, pat>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDThreeScalarMixedHS<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def i16 : BaseSIMDThreeScalarMixed<U, 0b01, opc, - (outs FPR32:$Rd), - (ins FPR16:$Rn, FPR16:$Rm), asm, "", []>; - def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc, - (outs FPR64:$Rd), - (ins FPR32:$Rn, FPR32:$Rm), asm, "", - [(set (i64 FPR64:$Rd), (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDThreeScalarMixedTiedHS<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def i16 : BaseSIMDThreeScalarMixed<U, 0b01, opc, - (outs FPR32:$dst), - (ins FPR32:$Rd, FPR16:$Rn, FPR16:$Rm), - asm, "$Rd = $dst", []>; - def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc, - (outs FPR64:$dst), - (ins FPR64:$Rd, FPR32:$Rn, FPR32:$Rm), - asm, "$Rd = $dst", - [(set (i64 FPR64:$dst), - (OpNode (i64 FPR64:$Rd), (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD two register scalar instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode, - RegisterClass regtype, RegisterClass regtype2, - string asm, list<dag> pat> - : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm, - "\t$Rd, $Rn", "", pat>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode, - RegisterClass regtype, RegisterClass regtype2, - string asm, list<dag> pat> - : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm, - "\t$Rd, $Rn", "$Rd = $dst", pat>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode, - RegisterClass regtype, string asm> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, - "\t$Rd, $Rn, #0", "", []>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm> - : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "", - [(set (f32 FPR32:$Rd), (int_arm64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-17} = 0b011111100110000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar<U, 0b11, opc, FPR64, asm>; - - def : Pat<(v1i64 (OpNode FPR64:$Rn)), - (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>; -} - -multiclass SIMDCmpTwoScalarSD<bit U, bit S, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar<U, {S,1}, opc, FPR64, asm>; - def v1i32rz : BaseSIMDCmpTwoScalar<U, {S,0}, opc, FPR32, asm>; - - def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))), - (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>; -} - -multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v1i64 : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm, - [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn)))]>; - - def : Pat<(i64 (OpNode (i64 FPR64:$Rn))), - (!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>; -} - -multiclass SIMDTwoScalarSD<bit U, bit S, bits<5> opc, string asm> { - def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,[]>; - def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,[]>; -} - -multiclass SIMDTwoScalarCVTSD<bit U, bit S, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm, - [(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>; - def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm, - [(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>; -} - -multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v1i64 : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm, - [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>; - def v1i32 : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR32, asm, - [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>; - def v1i16 : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR16, asm, []>; - def v1i8 : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR8 , asm, []>; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))), - (!cast<Instruction>(NAME # v1i64) FPR64:$Rn)>; -} - -multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm, - Intrinsic OpNode> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v1i64 : BaseSIMDTwoScalarTied<U, 0b11, opc, FPR64, FPR64, asm, - [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn)))]>; - def v1i32 : BaseSIMDTwoScalarTied<U, 0b10, opc, FPR32, FPR32, asm, - [(set (i32 FPR32:$dst), (OpNode (i32 FPR32:$Rd), (i32 FPR32:$Rn)))]>; - def v1i16 : BaseSIMDTwoScalarTied<U, 0b01, opc, FPR16, FPR16, asm, []>; - def v1i8 : BaseSIMDTwoScalarTied<U, 0b00, opc, FPR8 , FPR8 , asm, []>; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))), - (!cast<Instruction>(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>; -} - - - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDTwoScalarMixedBHS<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v1i32 : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR64, asm, - [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn)))]>; - def v1i16 : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR32, asm, []>; - def v1i8 : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR16, asm, []>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD scalar pairwise instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode, - RegisterOperand regtype, RegisterOperand vectype, - string asm, string kind> - : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, - "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> { - def v2i64p : BaseSIMDPairwiseScalar<U, 0b11, opc, FPR64Op, V128, - asm, ".2d">; -} - -multiclass SIMDPairwiseScalarSD<bit U, bit S, bits<5> opc, string asm> { - def v2i32p : BaseSIMDPairwiseScalar<U, {S,0}, opc, FPR32Op, V64, - asm, ".2s">; - def v2i64p : BaseSIMDPairwiseScalar<U, {S,1}, opc, FPR64Op, V128, - asm, ".2d">; -} - -//---------------------------------------------------------------------------- -// AdvSIMD across lanes instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode, - RegisterClass regtype, RegisterOperand vectype, - string asm, string kind, list<dag> pattern> - : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, - "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDAcrossLanesBHS<bit U, bits<5> opcode, - string asm> { - def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR8, V64, - asm, ".8b", []>; - def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR8, V128, - asm, ".16b", []>; - def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR16, V64, - asm, ".4h", []>; - def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR16, V128, - asm, ".8h", []>; - def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR32, V128, - asm, ".4s", []>; -} - -multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> { - def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR16, V64, - asm, ".8b", []>; - def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR16, V128, - asm, ".16b", []>; - def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR32, V64, - asm, ".4h", []>; - def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR32, V128, - asm, ".8h", []>; - def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR64, V128, - asm, ".4s", []>; -} - -multiclass SIMDAcrossLanesS<bits<5> opcode, bit sz1, string asm, - Intrinsic intOp> { - def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128, - asm, ".4s", - [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD INS/DUP instructions -//---------------------------------------------------------------------------- - -// FIXME: There has got to be a better way to factor these. ugh. - -class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm, - string operands, string constraints, list<dag> pattern> - : I<outs, ins, asm, operands, constraints, pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = op; - let Inst{28-21} = 0b01110000; - let Inst{15} = 0; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SIMDDupFromMain<bit Q, bits<5> imm5, string size, ValueType vectype, - RegisterOperand vecreg, RegisterClass regtype> - : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins regtype:$Rn), "dup", - "{\t$Rd" # size # ", $Rn" # - "|" # size # "\t$Rd, $Rn}", "", - [(set (vectype vecreg:$Rd), (ARM64dup regtype:$Rn))]> { - let Inst{20-16} = imm5; - let Inst{14-11} = 0b0001; -} - -class SIMDDupFromElement<bit Q, string dstkind, string srckind, - ValueType vectype, ValueType insreg, - RegisterOperand vecreg, Operand idxtype, - ValueType elttype, SDNode OpNode> - : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins V128:$Rn, idxtype:$idx), "dup", - "{\t$Rd" # dstkind # ", $Rn" # srckind # "$idx" # - "|" # dstkind # "\t$Rd, $Rn$idx}", "", - [(set (vectype vecreg:$Rd), - (OpNode (insreg V128:$Rn), idxtype:$idx))]> { - let Inst{14-11} = 0b0000; -} - -class SIMDDup64FromElement - : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128, - VectorIndexD, i64, ARM64duplane64> { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; -} - -class SIMDDup32FromElement<bit Q, string size, ValueType vectype, - RegisterOperand vecreg> - : SIMDDupFromElement<Q, size, ".s", vectype, v4i32, vecreg, - VectorIndexS, i64, ARM64duplane32> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; -} - -class SIMDDup16FromElement<bit Q, string size, ValueType vectype, - RegisterOperand vecreg> - : SIMDDupFromElement<Q, size, ".h", vectype, v8i16, vecreg, - VectorIndexH, i64, ARM64duplane16> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; -} - -class SIMDDup8FromElement<bit Q, string size, ValueType vectype, - RegisterOperand vecreg> - : SIMDDupFromElement<Q, size, ".b", vectype, v16i8, vecreg, - VectorIndexB, i64, ARM64duplane8> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; -} - -class BaseSIMDMov<bit Q, string size, bits<4> imm4, RegisterClass regtype, - Operand idxtype, string asm, list<dag> pattern> - : BaseSIMDInsDup<Q, 0, (outs regtype:$Rd), (ins V128:$Rn, idxtype:$idx), asm, - "{\t$Rd, $Rn" # size # "$idx" # - "|" # size # "\t$Rd, $Rn$idx}", "", pattern> { - let Inst{14-11} = imm4; -} - -class SIMDSMov<bit Q, string size, RegisterClass regtype, - Operand idxtype> - : BaseSIMDMov<Q, size, 0b0101, regtype, idxtype, "smov", []>; -class SIMDUMov<bit Q, string size, ValueType vectype, RegisterClass regtype, - Operand idxtype> - : BaseSIMDMov<Q, size, 0b0111, regtype, idxtype, "umov", - [(set regtype:$Rd, (vector_extract (vectype V128:$Rn), idxtype:$idx))]>; - -class SIMDMovAlias<string asm, string size, Instruction inst, - RegisterClass regtype, Operand idxtype> - : InstAlias<asm#"{\t$dst, $src"#size#"$idx" # - "|" # size # "\t$dst, $src$idx}", - (inst regtype:$dst, V128:$src, idxtype:$idx)>; - -multiclass SMov { - def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi8to64 : SIMDSMov<1, ".b", GPR64, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi16to32 : SIMDSMov<0, ".h", GPR32, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi16to64 : SIMDSMov<1, ".h", GPR64, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi32to64 : SIMDSMov<1, ".s", GPR64, VectorIndexS> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } -} - -multiclass UMov { - def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi16 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi32 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndexS> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } - def vi64 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndexD> { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - } - def : SIMDMovAlias<"mov", ".s", - !cast<Instruction>(NAME#"vi32"), - GPR32, VectorIndexS>; - def : SIMDMovAlias<"mov", ".d", - !cast<Instruction>(NAME#"vi64"), - GPR64, VectorIndexD>; -} - -class SIMDInsFromMain<string size, ValueType vectype, - RegisterClass regtype, Operand idxtype> - : BaseSIMDInsDup<1, 0, (outs V128:$dst), - (ins V128:$Rd, idxtype:$idx, regtype:$Rn), "ins", - "{\t$Rd" # size # "$idx, $Rn" # - "|" # size # "\t$Rd$idx, $Rn}", - "$Rd = $dst", - [(set V128:$dst, - (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> { - let Inst{14-11} = 0b0011; -} - -class SIMDInsFromElement<string size, ValueType vectype, - ValueType elttype, Operand idxtype> - : BaseSIMDInsDup<1, 1, (outs V128:$dst), - (ins V128:$Rd, idxtype:$idx, V128:$Rn, idxtype:$idx2), "ins", - "{\t$Rd" # size # "$idx, $Rn" # size # "$idx2" # - "|" # size # "\t$Rd$idx, $Rn$idx2}", - "$Rd = $dst", - [(set V128:$dst, - (vector_insert - (vectype V128:$Rd), - (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)), - idxtype:$idx))]>; - -class SIMDInsMainMovAlias<string size, Instruction inst, - RegisterClass regtype, Operand idxtype> - : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # - "|" # size #"\t$dst$idx, $src}", - (inst V128:$dst, idxtype:$idx, regtype:$src)>; -class SIMDInsElementMovAlias<string size, Instruction inst, - Operand idxtype> - : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" # - # "|" # size #" $dst$idx, $src$idx2}", - (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>; - - -multiclass SIMDIns { - def vi8gpr : SIMDInsFromMain<".b", v16i8, GPR32, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi16gpr : SIMDInsFromMain<".h", v8i16, GPR32, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi32gpr : SIMDInsFromMain<".s", v4i32, GPR32, VectorIndexS> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } - def vi64gpr : SIMDInsFromMain<".d", v2i64, GPR64, VectorIndexD> { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - } - - def vi8lane : SIMDInsFromElement<".b", v16i8, i32, VectorIndexB> { - bits<4> idx; - bits<4> idx2; - let Inst{20-17} = idx; - let Inst{16} = 1; - let Inst{14-11} = idx2; - } - def vi16lane : SIMDInsFromElement<".h", v8i16, i32, VectorIndexH> { - bits<3> idx; - bits<3> idx2; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - let Inst{14-12} = idx2; - let Inst{11} = 0; - } - def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> { - bits<2> idx; - bits<2> idx2; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - let Inst{14-13} = idx2; - let Inst{12-11} = 0; - } - def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> { - bits<1> idx; - bits<1> idx2; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - let Inst{14} = idx2; - let Inst{13-11} = 0; - } - - // For all forms of the INS instruction, the "mov" mnemonic is the - // preferred alias. Why they didn't just call the instruction "mov" in - // the first place is a very good question indeed... - def : SIMDInsMainMovAlias<".b", !cast<Instruction>(NAME#"vi8gpr"), - GPR32, VectorIndexB>; - def : SIMDInsMainMovAlias<".h", !cast<Instruction>(NAME#"vi16gpr"), - GPR32, VectorIndexH>; - def : SIMDInsMainMovAlias<".s", !cast<Instruction>(NAME#"vi32gpr"), - GPR32, VectorIndexS>; - def : SIMDInsMainMovAlias<".d", !cast<Instruction>(NAME#"vi64gpr"), - GPR64, VectorIndexD>; - - def : SIMDInsElementMovAlias<".b", !cast<Instruction>(NAME#"vi8lane"), - VectorIndexB>; - def : SIMDInsElementMovAlias<".h", !cast<Instruction>(NAME#"vi16lane"), - VectorIndexH>; - def : SIMDInsElementMovAlias<".s", !cast<Instruction>(NAME#"vi32lane"), - VectorIndexS>; - def : SIMDInsElementMovAlias<".d", !cast<Instruction>(NAME#"vi64lane"), - VectorIndexD>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD TBL/TBX -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype, - RegisterOperand listtype, string asm, string kind> - : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm, - "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>, - Sched<[WriteV]> { - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-21} = 0b001110000; - let Inst{20-16} = Vm; - let Inst{15} = 0; - let Inst{14-13} = len; - let Inst{12} = op; - let Inst{11-10} = 0b00; - let Inst{9-5} = Vn; - let Inst{4-0} = Vd; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectype, - RegisterOperand listtype, string asm, string kind> - : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm, - "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>, - Sched<[WriteV]> { - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-21} = 0b001110000; - let Inst{20-16} = Vm; - let Inst{15} = 0; - let Inst{14-13} = len; - let Inst{12} = op; - let Inst{11-10} = 0b00; - let Inst{9-5} = Vn; - let Inst{4-0} = Vd; -} - -class SIMDTableLookupAlias<string asm, Instruction inst, - RegisterOperand vectype, RegisterOperand listtype> - : InstAlias<!strconcat(asm, "\t$dst, $lst, $index"), - (inst vectype:$dst, listtype:$lst, vectype:$index), 0>; - -multiclass SIMDTableLookup<bit op, string asm> { - def v8i8One : BaseSIMDTableLookup<0, 0b00, op, V64, VecListOne16b, - asm, ".8b">; - def v8i8Two : BaseSIMDTableLookup<0, 0b01, op, V64, VecListTwo16b, - asm, ".8b">; - def v8i8Three : BaseSIMDTableLookup<0, 0b10, op, V64, VecListThree16b, - asm, ".8b">; - def v8i8Four : BaseSIMDTableLookup<0, 0b11, op, V64, VecListFour16b, - asm, ".8b">; - def v16i8One : BaseSIMDTableLookup<1, 0b00, op, V128, VecListOne16b, - asm, ".16b">; - def v16i8Two : BaseSIMDTableLookup<1, 0b01, op, V128, VecListTwo16b, - asm, ".16b">; - def v16i8Three: BaseSIMDTableLookup<1, 0b10, op, V128, VecListThree16b, - asm, ".16b">; - def v16i8Four : BaseSIMDTableLookup<1, 0b11, op, V128, VecListFour16b, - asm, ".16b">; - - def : SIMDTableLookupAlias<asm # ".8b", - !cast<Instruction>(NAME#"v8i8One"), - V64, VecListOne128>; - def : SIMDTableLookupAlias<asm # ".8b", - !cast<Instruction>(NAME#"v8i8Two"), - V64, VecListTwo128>; - def : SIMDTableLookupAlias<asm # ".8b", - !cast<Instruction>(NAME#"v8i8Three"), - V64, VecListThree128>; - def : SIMDTableLookupAlias<asm # ".8b", - !cast<Instruction>(NAME#"v8i8Four"), - V64, VecListFour128>; - def : SIMDTableLookupAlias<asm # ".16b", - !cast<Instruction>(NAME#"v16i8One"), - V128, VecListOne128>; - def : SIMDTableLookupAlias<asm # ".16b", - !cast<Instruction>(NAME#"v16i8Two"), - V128, VecListTwo128>; - def : SIMDTableLookupAlias<asm # ".16b", - !cast<Instruction>(NAME#"v16i8Three"), - V128, VecListThree128>; - def : SIMDTableLookupAlias<asm # ".16b", - !cast<Instruction>(NAME#"v16i8Four"), - V128, VecListFour128>; -} - -multiclass SIMDTableLookupTied<bit op, string asm> { - def v8i8One : BaseSIMDTableLookupTied<0, 0b00, op, V64, VecListOne16b, - asm, ".8b">; - def v8i8Two : BaseSIMDTableLookupTied<0, 0b01, op, V64, VecListTwo16b, - asm, ".8b">; - def v8i8Three : BaseSIMDTableLookupTied<0, 0b10, op, V64, VecListThree16b, - asm, ".8b">; - def v8i8Four : BaseSIMDTableLookupTied<0, 0b11, op, V64, VecListFour16b, - asm, ".8b">; - def v16i8One : BaseSIMDTableLookupTied<1, 0b00, op, V128, VecListOne16b, - asm, ".16b">; - def v16i8Two : BaseSIMDTableLookupTied<1, 0b01, op, V128, VecListTwo16b, - asm, ".16b">; - def v16i8Three: BaseSIMDTableLookupTied<1, 0b10, op, V128, VecListThree16b, - asm, ".16b">; - def v16i8Four : BaseSIMDTableLookupTied<1, 0b11, op, V128, VecListFour16b, - asm, ".16b">; - - def : SIMDTableLookupAlias<asm # ".8b", - !cast<Instruction>(NAME#"v8i8One"), - V64, VecListOne128>; - def : SIMDTableLookupAlias<asm # ".8b", - !cast<Instruction>(NAME#"v8i8Two"), - V64, VecListTwo128>; - def : SIMDTableLookupAlias<asm # ".8b", - !cast<Instruction>(NAME#"v8i8Three"), - V64, VecListThree128>; - def : SIMDTableLookupAlias<asm # ".8b", - !cast<Instruction>(NAME#"v8i8Four"), - V64, VecListFour128>; - def : SIMDTableLookupAlias<asm # ".16b", - !cast<Instruction>(NAME#"v16i8One"), - V128, VecListOne128>; - def : SIMDTableLookupAlias<asm # ".16b", - !cast<Instruction>(NAME#"v16i8Two"), - V128, VecListTwo128>; - def : SIMDTableLookupAlias<asm # ".16b", - !cast<Instruction>(NAME#"v16i8Three"), - V128, VecListThree128>; - def : SIMDTableLookupAlias<asm # ".16b", - !cast<Instruction>(NAME#"v16i8Four"), - V128, VecListFour128>; -} - - -//---------------------------------------------------------------------------- -// AdvSIMD scalar CPY -//---------------------------------------------------------------------------- -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype, - string kind, Operand idxtype> - : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov", - "{\t$dst, $src" # kind # "$idx" # - "|\t$dst, $src$idx}", "", []>, - Sched<[WriteV]> { - bits<5> dst; - bits<5> src; - let Inst{31-21} = 0b01011110000; - let Inst{15-10} = 0b000001; - let Inst{9-5} = src; - let Inst{4-0} = dst; -} - -class SIMDScalarCPYAlias<string asm, string size, Instruction inst, - RegisterClass regtype, RegisterOperand vectype, Operand idxtype> - : InstAlias<asm # "{\t$dst, $src" # size # "$index" # - # "|\t$dst, $src$index}", - (inst regtype:$dst, vectype:$src, idxtype:$index)>; - - -multiclass SIMDScalarCPY<string asm> { - def i8 : BaseSIMDScalarCPY<FPR8, V128, ".b", VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def i16 : BaseSIMDScalarCPY<FPR16, V128, ".h", VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def i32 : BaseSIMDScalarCPY<FPR32, V128, ".s", VectorIndexS> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } - def i64 : BaseSIMDScalarCPY<FPR64, V128, ".d", VectorIndexD> { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - } - - // 'DUP' mnemonic aliases. - def : SIMDScalarCPYAlias<"dup", ".b", - !cast<Instruction>(NAME#"i8"), - FPR8, V128, VectorIndexB>; - def : SIMDScalarCPYAlias<"dup", ".h", - !cast<Instruction>(NAME#"i16"), - FPR16, V128, VectorIndexH>; - def : SIMDScalarCPYAlias<"dup", ".s", - !cast<Instruction>(NAME#"i32"), - FPR32, V128, VectorIndexS>; - def : SIMDScalarCPYAlias<"dup", ".d", - !cast<Instruction>(NAME#"i64"), - FPR64, V128, VectorIndexD>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD modified immediate instructions -//---------------------------------------------------------------------------- - -class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops, - string asm, string op_string, - string cstr, list<dag> pattern> - : I<oops, iops, asm, op_string, cstr, pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<8> imm8; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = op; - let Inst{28-19} = 0b0111100000; - let Inst{18-16} = imm8{7-5}; - let Inst{11-10} = 0b01; - let Inst{9-5} = imm8{4-0}; - let Inst{4-0} = Rd; -} - -class BaseSIMDModifiedImmVector<bit Q, bit op, RegisterOperand vectype, - Operand immtype, dag opt_shift_iop, - string opt_shift, string asm, string kind, - list<dag> pattern> - : BaseSIMDModifiedImm<Q, op, (outs vectype:$Rd), - !con((ins immtype:$imm8), opt_shift_iop), asm, - "{\t$Rd" # kind # ", $imm8" # opt_shift # - "|" # kind # "\t$Rd, $imm8" # opt_shift # "}", - "", pattern> { - let DecoderMethod = "DecodeModImmInstruction"; -} - -class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype, - Operand immtype, dag opt_shift_iop, - string opt_shift, string asm, string kind, - list<dag> pattern> - : BaseSIMDModifiedImm<Q, op, (outs vectype:$dst), - !con((ins vectype:$Rd, immtype:$imm8), opt_shift_iop), - asm, "{\t$Rd" # kind # ", $imm8" # opt_shift # - "|" # kind # "\t$Rd, $imm8" # opt_shift # "}", - "$Rd = $dst", pattern> { - let DecoderMethod = "DecodeModImmTiedInstruction"; -} - -class BaseSIMDModifiedImmVectorShift<bit Q, bit op, bits<2> b15_b12, - RegisterOperand vectype, string asm, - string kind, list<dag> pattern> - : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255, - (ins logical_vec_shift:$shift), - "$shift", asm, kind, pattern> { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14-13} = shift; - let Inst{12} = b15_b12{0}; -} - -class BaseSIMDModifiedImmVectorShiftTied<bit Q, bit op, bits<2> b15_b12, - RegisterOperand vectype, string asm, - string kind, list<dag> pattern> - : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255, - (ins logical_vec_shift:$shift), - "$shift", asm, kind, pattern> { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14-13} = shift; - let Inst{12} = b15_b12{0}; -} - - -class BaseSIMDModifiedImmVectorShiftHalf<bit Q, bit op, bits<2> b15_b12, - RegisterOperand vectype, string asm, - string kind, list<dag> pattern> - : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255, - (ins logical_vec_hw_shift:$shift), - "$shift", asm, kind, pattern> { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14} = 0; - let Inst{13} = shift{0}; - let Inst{12} = b15_b12{0}; -} - -class BaseSIMDModifiedImmVectorShiftHalfTied<bit Q, bit op, bits<2> b15_b12, - RegisterOperand vectype, string asm, - string kind, list<dag> pattern> - : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255, - (ins logical_vec_hw_shift:$shift), - "$shift", asm, kind, pattern> { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14} = 0; - let Inst{13} = shift{0}; - let Inst{12} = b15_b12{0}; -} - -multiclass SIMDModifiedImmVectorShift<bit op, bits<2> hw_cmode, bits<2> w_cmode, - string asm> { - def v4i16 : BaseSIMDModifiedImmVectorShiftHalf<0, op, hw_cmode, V64, - asm, ".4h", []>; - def v8i16 : BaseSIMDModifiedImmVectorShiftHalf<1, op, hw_cmode, V128, - asm, ".8h", []>; - - def v2i32 : BaseSIMDModifiedImmVectorShift<0, op, w_cmode, V64, - asm, ".2s", []>; - def v4i32 : BaseSIMDModifiedImmVectorShift<1, op, w_cmode, V128, - asm, ".4s", []>; -} - -multiclass SIMDModifiedImmVectorShiftTied<bit op, bits<2> hw_cmode, - bits<2> w_cmode, string asm, - SDNode OpNode> { - def v4i16 : BaseSIMDModifiedImmVectorShiftHalfTied<0, op, hw_cmode, V64, - asm, ".4h", - [(set (v4i16 V64:$dst), (OpNode V64:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; - def v8i16 : BaseSIMDModifiedImmVectorShiftHalfTied<1, op, hw_cmode, V128, - asm, ".8h", - [(set (v8i16 V128:$dst), (OpNode V128:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; - - def v2i32 : BaseSIMDModifiedImmVectorShiftTied<0, op, w_cmode, V64, - asm, ".2s", - [(set (v2i32 V64:$dst), (OpNode V64:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; - def v4i32 : BaseSIMDModifiedImmVectorShiftTied<1, op, w_cmode, V128, - asm, ".4s", - [(set (v4i32 V128:$dst), (OpNode V128:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; -} - -class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode, - RegisterOperand vectype, string asm, - string kind, list<dag> pattern> - : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255, - (ins move_vec_shift:$shift), - "$shift", asm, kind, pattern> { - bits<1> shift; - let Inst{15-13} = cmode{3-1}; - let Inst{12} = shift; -} - -class SIMDModifiedImmVectorNoShift<bit Q, bit op, bits<4> cmode, - RegisterOperand vectype, - Operand imm_type, string asm, - string kind, list<dag> pattern> - : BaseSIMDModifiedImmVector<Q, op, vectype, imm_type, (ins), "", - asm, kind, pattern> { - let Inst{15-12} = cmode; -} - -class SIMDModifiedImmScalarNoShift<bit Q, bit op, bits<4> cmode, string asm, - list<dag> pattern> - : BaseSIMDModifiedImm<Q, op, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm, - "\t$Rd, $imm8", "", pattern> { - let Inst{15-12} = cmode; - let DecoderMethod = "DecodeModImmInstruction"; -} - -//---------------------------------------------------------------------------- -// AdvSIMD indexed element -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc, - RegisterOperand dst_reg, RegisterOperand lhs_reg, - RegisterOperand rhs_reg, Operand vec_idx, string asm, - string apple_kind, string dst_kind, string lhs_kind, - string rhs_kind, list<dag> pattern> - : I<(outs dst_reg:$Rd), (ins lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), - asm, - "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # - "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28} = Scalar; - let Inst{27-24} = 0b1111; - let Inst{23-22} = size; - // Bit 21 must be set by the derived class. - let Inst{20-16} = Rm; - let Inst{15-12} = opc; - // Bit 11 must be set by the derived class. - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc, - RegisterOperand dst_reg, RegisterOperand lhs_reg, - RegisterOperand rhs_reg, Operand vec_idx, string asm, - string apple_kind, string dst_kind, string lhs_kind, - string rhs_kind, list<dag> pattern> - : I<(outs dst_reg:$dst), - (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm, - "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # - "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28} = Scalar; - let Inst{27-24} = 0b1111; - let Inst{23-22} = size; - // Bit 21 must be set by the derived class. - let Inst{20-16} = Rm; - let Inst{15-12} = opc; - // Bit 11 must be set by the derived class. - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode> { - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2f32 V64:$Rd), - (OpNode (v2f32 V64:$Rn), - (v2f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4f32 V128:$Rd), - (OpNode (v4f32 V128:$Rn), - (v4f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v2i64_indexed : BaseSIMDIndexed<1, U, 0, 0b11, opc, - V128, V128, - V128, VectorIndexD, - asm, ".2d", ".2d", ".2d", ".d", - [(set (v2f64 V128:$Rd), - (OpNode (v2f64 V128:$Rn), - (v2f64 (ARM64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } - - def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, - FPR32Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", - [(set (f32 FPR32Op:$Rd), - (OpNode (f32 FPR32Op:$Rn), - (f32 (vector_extract (v4f32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b11, opc, - FPR64Op, FPR64Op, V128, VectorIndexD, - asm, ".d", "", "", ".d", - [(set (f64 FPR64Op:$Rd), - (OpNode (f64 FPR64Op:$Rn), - (f64 (vector_extract (v2f64 V128:$Rm), - VectorIndexD:$idx))))]> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } -} - -multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> { - // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar. - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64duplane32 (v4f32 V128:$Rm), - VectorIndexS:$idx))), - (!cast<Instruction>(INST # v2i32_indexed) - V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64dup (f32 FPR32Op:$Rm)))), - (!cast<Instruction>(INST # "v2i32_indexed") V64:$Rd, V64:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - - // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar. - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64duplane32 (v4f32 V128:$Rm), - VectorIndexS:$idx))), - (!cast<Instruction>(INST # "v4i32_indexed") - V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64dup (f32 FPR32Op:$Rm)))), - (!cast<Instruction>(INST # "v4i32_indexed") V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar. - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64duplane64 (v2f64 V128:$Rm), - VectorIndexD:$idx))), - (!cast<Instruction>(INST # "v2i64_indexed") - V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64dup (f64 FPR64Op:$Rm)))), - (!cast<Instruction>(INST # "v2i64_indexed") V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; - - // 2 variants for 32-bit scalar version: extract from .2s or from .4s - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))), - (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, - V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v2f32 V64:$Rm), VectorIndexS:$idx))), - (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; - - // 1 variant for 64-bit scalar version: extract from .1d or from .2d - def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), - (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx))), - (!cast<Instruction>(INST # "v1i64_indexed") FPR64:$Rd, FPR64:$Rn, - V128:$Rm, VectorIndexD:$idx)>; -} - -multiclass SIMDFPIndexedSDTied<bit U, bits<4> opc, string asm> { - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v2i64_indexed : BaseSIMDIndexedTied<1, U, 0, 0b11, opc, - V128, V128, - V128, VectorIndexD, - asm, ".2d", ".2d", ".2d", ".d", []> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } - - - def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, - FPR32Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b11, opc, - FPR64Op, FPR64Op, V128, VectorIndexD, - asm, ".d", "", "", ".d", []> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } -} - -multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4i16 V64:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8i16 V128:$Rd), - (OpNode (v8i16 V128:$Rn), - (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2i32 V64:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4i32 V128:$Rd), - (OpNode (v4i32 V128:$Rn), - (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, - FPR16Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, - FPR32Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", - [(set (i32 FPR32Op:$Rd), - (OpNode FPR32Op:$Rn, - (i32 (vector_extract (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, - V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4i16 V64:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8i16 V128:$Rd), - (OpNode (v8i16 V128:$Rn), - (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2i32 V64:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4i32 V128:$Rd), - (OpNode (v4i32 V128:$Rn), - (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { - - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, - FPR32Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, - FPR64Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm, - SDPatternOperator Accum> { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$dst), - (Accum (v4i32 V128:$Rd), - (v4i32 (int_arm64_neon_sqdmull - (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - // FIXME: it would be nice to use the scalar (v1i32) instruction here, but an - // intermediate EXTRACT_SUBREG would be untyped. - def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), - (i32 (vector_extract (v4i32 - (int_arm64_neon_sqdmull (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx)))), - (i64 0))))), - (EXTRACT_SUBREG - (!cast<Instruction>(NAME # v4i16_indexed) - (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), V64:$Rn, - V128_lo:$Rm, VectorIndexH:$idx), - ssub)>; - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$dst), - (Accum (v4i32 V128:$Rd), - (v4i32 (int_arm64_neon_sqdmull - (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 - (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$dst), - (Accum (v2i64 V128:$Rd), - (v2i64 (int_arm64_neon_sqdmull - (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$dst), - (Accum (v2i64 V128:$Rd), - (v2i64 (int_arm64_neon_sqdmull - (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 - (ARM64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, - FPR32Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - - def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, - FPR64Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", - [(set (i64 FPR64Op:$dst), - (Accum (i64 FPR64Op:$Rd), - (i64 (int_arm64_neon_sqdmulls_scalar - (i32 FPR32Op:$Rn), - (i32 (vector_extract (v4i32 V128:$Rm), - VectorIndexS:$idx))))))]> { - - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { - - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - } -} - -multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm, - SDPatternOperator OpNode> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), - (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), - (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - } -} - -//---------------------------------------------------------------------------- -// AdvSIMD scalar shift by immediate -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm, - RegisterClass regtype1, RegisterClass regtype2, - Operand immtype, string asm, list<dag> pattern> - : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm), - asm, "\t$Rd, $Rn, $imm", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<7> imm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-23} = 0b111110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm, - RegisterClass regtype1, RegisterClass regtype2, - Operand immtype, string asm, list<dag> pattern> - : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm), - asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<7> imm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-23} = 0b111110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - -multiclass SIMDScalarRShiftSD<bit U, bits<5> opc, string asm> { - def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?}, - FPR32, FPR32, vecshiftR32, asm, []> { - let Inst{20-16} = imm{4-0}; - } - - def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?}, - FPR64, FPR64, vecshiftR64, asm, []> { - let Inst{21-16} = imm{5-0}; - } -} - -multiclass SIMDScalarRShiftD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?}, - FPR64, FPR64, vecshiftR64, asm, - [(set (i64 FPR64:$Rd), - (OpNode (i64 FPR64:$Rn), (i32 vecshiftR64:$imm)))]> { - let Inst{21-16} = imm{5-0}; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))), - (!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftR64:$imm)>; -} - -multiclass SIMDScalarRShiftDTied<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?}, - FPR64, FPR64, vecshiftR64, asm, - [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn), - (i32 vecshiftR64:$imm)))]> { - let Inst{21-16} = imm{5-0}; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), - (i32 vecshiftR64:$imm))), - (!cast<Instruction>(NAME # "d") FPR64:$Rd, FPR64:$Rn, - vecshiftR64:$imm)>; -} - -multiclass SIMDScalarLShiftD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?}, - FPR64, FPR64, vecshiftL64, asm, - [(set (v1i64 FPR64:$Rd), - (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> { - let Inst{21-16} = imm{5-0}; - } -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -multiclass SIMDScalarLShiftDTied<bit U, bits<5> opc, string asm> { - def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?}, - FPR64, FPR64, vecshiftL64, asm, []> { - let Inst{21-16} = imm{5-0}; - } -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?}, - FPR8, FPR16, vecshiftR8, asm, []> { - let Inst{18-16} = imm{2-0}; - } - - def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?}, - FPR16, FPR32, vecshiftR16, asm, []> { - let Inst{19-16} = imm{3-0}; - } - - def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?}, - FPR32, FPR64, vecshiftR32, asm, - [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> { - let Inst{20-16} = imm{4-0}; - } -} - -multiclass SIMDScalarLShiftBHSD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?}, - FPR8, FPR8, vecshiftL8, asm, []> { - let Inst{18-16} = imm{2-0}; - } - - def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?}, - FPR16, FPR16, vecshiftL16, asm, []> { - let Inst{19-16} = imm{3-0}; - } - - def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?}, - FPR32, FPR32, vecshiftL32, asm, - [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn), (i32 vecshiftL32:$imm)))]> { - let Inst{20-16} = imm{4-0}; - } - - def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?}, - FPR64, FPR64, vecshiftL64, asm, - [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), - (i32 vecshiftL64:$imm)))]> { - let Inst{21-16} = imm{5-0}; - } -} - -multiclass SIMDScalarRShiftBHSD<bit U, bits<5> opc, string asm> { - def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?}, - FPR8, FPR8, vecshiftR8, asm, []> { - let Inst{18-16} = imm{2-0}; - } - - def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?}, - FPR16, FPR16, vecshiftR16, asm, []> { - let Inst{19-16} = imm{3-0}; - } - - def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?}, - FPR32, FPR32, vecshiftR32, asm, []> { - let Inst{20-16} = imm{4-0}; - } - - def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?}, - FPR64, FPR64, vecshiftR64, asm, []> { - let Inst{21-16} = imm{5-0}; - } -} - -//---------------------------------------------------------------------------- -// AdvSIMD vector x indexed element -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm, - RegisterOperand dst_reg, RegisterOperand src_reg, - Operand immtype, - string asm, string dst_kind, string src_kind, - list<dag> pattern> - : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm), - asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # - "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-23} = 0b011110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm, - RegisterOperand vectype1, RegisterOperand vectype2, - Operand immtype, - string asm, string dst_kind, string src_kind, - list<dag> pattern> - : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm), - asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # - "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-23} = 0b011110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, - Intrinsic OpNode> { - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorRShiftSDToFP<bit U, bits<5> opc, string asm, - Intrinsic OpNode> { - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, - asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, - asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorRShiftNarrowBHS<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V64, V128, vecshiftR16Narrow, - asm, ".8b", ".8h", - [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftR16Narrow, - asm#"2", ".16b", ".8h", []> { - bits<3> imm; - let Inst{18-16} = imm; - let hasSideEffects = 0; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V128, vecshiftR32Narrow, - asm, ".4h", ".4s", - [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR32Narrow, - asm#"2", ".8h", ".4s", []> { - bits<4> imm; - let Inst{19-16} = imm; - let hasSideEffects = 0; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V128, vecshiftR64Narrow, - asm, ".2s", ".2d", - [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR64Narrow, - asm#"2", ".4s", ".2d", []> { - bits<5> imm; - let Inst{20-16} = imm; - let hasSideEffects = 0; - } - - // TableGen doesn't like patters w/ INSERT_SUBREG on the instructions - // themselves, so put them here instead. - - // Patterns involving what's effectively an insert high and a normal - // intrinsic, represented by CONCAT_VECTORS. - def : Pat<(concat_vectors (v8i8 V64:$Rd),(OpNode (v8i16 V128:$Rn), - vecshiftR16Narrow:$imm)), - (!cast<Instruction>(NAME # "v16i8_shift") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR16Narrow:$imm)>; - def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), - vecshiftR32Narrow:$imm)), - (!cast<Instruction>(NAME # "v8i16_shift") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR32Narrow:$imm)>; - def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), - vecshiftR64Narrow:$imm)), - (!cast<Instruction>(NAME # "v4i32_shift") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR64Narrow:$imm)>; -} - -multiclass SIMDVectorLShiftBHSD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftL8, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftL8, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftL16, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftL16, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftL32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftL32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftL64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), - (i32 vecshiftL64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorRShiftBHSD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftR8, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftR8, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftR16, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR16, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), - (i32 vecshiftR64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDVectorRShiftBHSDTied<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftR8, asm, ".8b", ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftR8, asm, ".16b", ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftR16, asm, ".4h", ".4h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR16, asm, ".8h", ".8h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, asm, ".2s", ".2s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, asm, ".4s", ".4s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), - (i32 vecshiftR64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorLShiftBHSDTied<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftL8, - asm, ".8b", ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftL8, - asm, ".16b", ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftL16, - asm, ".4h", ".4h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftL16, - asm, ".8h", ".8h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftL32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftL32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftL64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), - (i32 vecshiftL64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V128, V64, vecshiftL8, asm, ".8h", ".8b", - [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), vecshiftL8:$imm))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftL8, - asm#"2", ".8h", ".16b", - [(set (v8i16 V128:$Rd), - (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V128, V64, vecshiftL16, asm, ".4s", ".4h", - [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), vecshiftL16:$imm))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftL16, - asm#"2", ".4s", ".8h", - [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> { - - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V128, V64, vecshiftL32, asm, ".2d", ".2s", - [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), vecshiftL32:$imm))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftL32, - asm#"2", ".2d", ".4s", - [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> { - bits<5> imm; - let Inst{20-16} = imm; - } -} - - -//--- -// Vector load/store -//--- -// SIMD ldX/stX no-index memory references don't allow the optional -// ", #0" constant and handle post-indexing explicitly, so we use -// a more specialized parse method for them. Otherwise, it's the same as -// the general am_noindex handling. -def MemorySIMDNoIndexOperand : AsmOperandClass { - let Name = "MemorySIMDNoIndex"; - let ParserMethod = "tryParseNoIndexMemory"; -} -def am_simdnoindex : Operand<i64>, - ComplexPattern<i64, 1, "SelectAddrModeNoIndex", []> { - let PrintMethod = "printAMNoIndex"; - let ParserMatchClass = MemorySIMDNoIndexOperand; - let MIOperandInfo = (ops GPR64sp:$base); - let DecoderMethod = "DecodeGPR64spRegisterClass"; -} - -class BaseSIMDLdSt<bit Q, bit L, bits<4> opcode, bits<2> size, - string asm, dag oops, dag iops, list<dag> pattern> - : I<oops, iops, asm, "\t$Vt, $vaddr", "", pattern> { - bits<5> Vt; - bits<5> vaddr; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-23} = 0b0011000; - let Inst{22} = L; - let Inst{21-16} = 0b000000; - let Inst{15-12} = opcode; - let Inst{11-10} = size; - let Inst{9-5} = vaddr; - let Inst{4-0} = Vt; -} - -class BaseSIMDLdStPost<bit Q, bit L, bits<4> opcode, bits<2> size, - string asm, dag oops, dag iops> - : I<oops, iops, asm, "\t$Vt, $vaddr, $Xm", "", []> { - bits<5> Vt; - bits<5> vaddr; - bits<5> Xm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-23} = 0b0011001; - let Inst{22} = L; - let Inst{21} = 0; - let Inst{20-16} = Xm; - let Inst{15-12} = opcode; - let Inst{11-10} = size; - let Inst{9-5} = vaddr; - let Inst{4-0} = Vt; - let DecoderMethod = "DecodeSIMDLdStPost"; -} - -// The immediate form of AdvSIMD post-indexed addressing is encoded with -// register post-index addressing from the zero register. -multiclass SIMDLdStAliases<string asm, string layout, string Count, - int Offset, int Size> { - // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16" - // "ld1\t$Vt, $vaddr, #16" - // may get mapped to - // (LD1Twov8b_POST VecListTwo8b:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias<asm # "\t$Vt, $vaddr, #" # Offset, - (!cast<Instruction>(NAME # Count # "v" # layout # "_POST") - !cast<RegisterOperand>("VecList" # Count # layout):$Vt, - am_simdnoindex:$vaddr, XZR), 1>; - - // E.g. "ld1.8b { v0, v1 }, [x1], #16" - // "ld1.8b\t$Vt, $vaddr, #16" - // may get mapped to - // (LD1Twov8b_POST VecListTwo64:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, #" # Offset, - (!cast<Instruction>(NAME # Count # "v" # layout # "_POST") - !cast<RegisterOperand>("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr, XZR), 0>; - - // E.g. "ld1.8b { v0, v1 }, [x1]" - // "ld1\t$Vt, $vaddr" - // may get mapped to - // (LD1Twov8b VecListTwo64:$Vt, am_simdnoindex:$vaddr) - def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr", - (!cast<Instruction>(NAME # Count # "v" # layout) - !cast<RegisterOperand>("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr), 0>; - - // E.g. "ld1.8b { v0, v1 }, [x1], x2" - // "ld1\t$Vt, $vaddr, $Xm" - // may get mapped to - // (LD1Twov8b_POST VecListTwo64:$Vt, am_simdnoindex:$vaddr, GPR64pi8:$Xm) - def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, $Xm", - (!cast<Instruction>(NAME # Count # "v" # layout # "_POST") - !cast<RegisterOperand>("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>; -} - -multiclass BaseSIMDLdN<string Count, string asm, string veclist, int Offset128, - int Offset64, bits<4> opcode> { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { - def v16b: BaseSIMDLdSt<1, 1, opcode, 0b00, asm, - (outs !cast<RegisterOperand>(veclist # "16b"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm, - (outs !cast<RegisterOperand>(veclist # "8h"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm, - (outs !cast<RegisterOperand>(veclist # "4s"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm, - (outs !cast<RegisterOperand>(veclist # "2d"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm, - (outs !cast<RegisterOperand>(veclist # "8b"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm, - (outs !cast<RegisterOperand>(veclist # "4h"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm, - (outs !cast<RegisterOperand>(veclist # "2s"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - - - def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm, - (outs !cast<RegisterOperand>(veclist # "16b"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; - def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm, - (outs !cast<RegisterOperand>(veclist # "8h"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; - def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm, - (outs !cast<RegisterOperand>(veclist # "4s"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; - def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm, - (outs !cast<RegisterOperand>(veclist # "2d"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; - def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm, - (outs !cast<RegisterOperand>(veclist # "8b"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; - def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm, - (outs !cast<RegisterOperand>(veclist # "4h"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; - def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm, - (outs !cast<RegisterOperand>(veclist # "2s"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases<asm, "16b", Count, Offset128, 128>; - defm : SIMDLdStAliases<asm, "8h", Count, Offset128, 128>; - defm : SIMDLdStAliases<asm, "4s", Count, Offset128, 128>; - defm : SIMDLdStAliases<asm, "2d", Count, Offset128, 128>; - defm : SIMDLdStAliases<asm, "8b", Count, Offset64, 64>; - defm : SIMDLdStAliases<asm, "4h", Count, Offset64, 64>; - defm : SIMDLdStAliases<asm, "2s", Count, Offset64, 64>; -} - -// Only ld1/st1 has a v1d version. -multiclass BaseSIMDStN<string Count, string asm, string veclist, int Offset128, - int Offset64, bits<4> opcode> { - let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in { - def v16b : BaseSIMDLdSt<1, 0, opcode, 0b00, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "16b"):$Vt, - am_simdnoindex:$vaddr), []>; - def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "8h"):$Vt, - am_simdnoindex:$vaddr), []>; - def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "4s"):$Vt, - am_simdnoindex:$vaddr), []>; - def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "2d"):$Vt, - am_simdnoindex:$vaddr), []>; - def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "8b"):$Vt, - am_simdnoindex:$vaddr), []>; - def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "4h"):$Vt, - am_simdnoindex:$vaddr), []>; - def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "2s"):$Vt, - am_simdnoindex:$vaddr), []>; - - def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "16b"):$Vt, - am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; - def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "8h"):$Vt, - am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; - def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "4s"):$Vt, - am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; - def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "2d"):$Vt, - am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; - def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "8b"):$Vt, - am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; - def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "4h"):$Vt, - am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; - def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "2s"):$Vt, - am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases<asm, "16b", Count, Offset128, 128>; - defm : SIMDLdStAliases<asm, "8h", Count, Offset128, 128>; - defm : SIMDLdStAliases<asm, "4s", Count, Offset128, 128>; - defm : SIMDLdStAliases<asm, "2d", Count, Offset128, 128>; - defm : SIMDLdStAliases<asm, "8b", Count, Offset64, 64>; - defm : SIMDLdStAliases<asm, "4h", Count, Offset64, 64>; - defm : SIMDLdStAliases<asm, "2s", Count, Offset64, 64>; -} - -multiclass BaseSIMDLd1<string Count, string asm, string veclist, - int Offset128, int Offset64, bits<4> opcode> - : BaseSIMDLdN<Count, asm, veclist, Offset128, Offset64, opcode> { - - // LD1 instructions have extra "1d" variants. - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { - def v1d : BaseSIMDLdSt<0, 1, opcode, 0b11, asm, - (outs !cast<RegisterOperand>(veclist # "1d"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - - def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm, - (outs !cast<RegisterOperand>(veclist # "1d"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases<asm, "1d", Count, Offset64, 64>; -} - -multiclass BaseSIMDSt1<string Count, string asm, string veclist, - int Offset128, int Offset64, bits<4> opcode> - : BaseSIMDStN<Count, asm, veclist, Offset128, Offset64, opcode> { - - // ST1 instructions have extra "1d" variants. - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { - def v1d : BaseSIMDLdSt<0, 0, opcode, 0b11, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "1d"):$Vt, - am_simdnoindex:$vaddr), []>; - - def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm, (outs), - (ins !cast<RegisterOperand>(veclist # "1d"):$Vt, - am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases<asm, "1d", Count, Offset64, 64>; -} - -multiclass SIMDLd1Multiple<string asm> { - defm One : BaseSIMDLd1<"One", asm, "VecListOne", 16, 8, 0b0111>; - defm Two : BaseSIMDLd1<"Two", asm, "VecListTwo", 32, 16, 0b1010>; - defm Three : BaseSIMDLd1<"Three", asm, "VecListThree", 48, 24, 0b0110>; - defm Four : BaseSIMDLd1<"Four", asm, "VecListFour", 64, 32, 0b0010>; -} - -multiclass SIMDSt1Multiple<string asm> { - defm One : BaseSIMDSt1<"One", asm, "VecListOne", 16, 8, 0b0111>; - defm Two : BaseSIMDSt1<"Two", asm, "VecListTwo", 32, 16, 0b1010>; - defm Three : BaseSIMDSt1<"Three", asm, "VecListThree", 48, 24, 0b0110>; - defm Four : BaseSIMDSt1<"Four", asm, "VecListFour", 64, 32, 0b0010>; -} - -multiclass SIMDLd2Multiple<string asm> { - defm Two : BaseSIMDLdN<"Two", asm, "VecListTwo", 32, 16, 0b1000>; -} - -multiclass SIMDSt2Multiple<string asm> { - defm Two : BaseSIMDStN<"Two", asm, "VecListTwo", 32, 16, 0b1000>; -} - -multiclass SIMDLd3Multiple<string asm> { - defm Three : BaseSIMDLdN<"Three", asm, "VecListThree", 48, 24, 0b0100>; -} - -multiclass SIMDSt3Multiple<string asm> { - defm Three : BaseSIMDStN<"Three", asm, "VecListThree", 48, 24, 0b0100>; -} - -multiclass SIMDLd4Multiple<string asm> { - defm Four : BaseSIMDLdN<"Four", asm, "VecListFour", 64, 32, 0b0000>; -} - -multiclass SIMDSt4Multiple<string asm> { - defm Four : BaseSIMDStN<"Four", asm, "VecListFour", 64, 32, 0b0000>; -} - -//--- -// AdvSIMD Load/store single-element -//--- - -class BaseSIMDLdStSingle<bit L, bit R, bits<3> opcode, - string asm, string operands, dag oops, dag iops, - list<dag> pattern> - : I<oops, iops, asm, operands, "", pattern> { - bits<5> Vt; - bits<5> vaddr; - let Inst{31} = 0; - let Inst{29-24} = 0b001101; - let Inst{22} = L; - let Inst{21} = R; - let Inst{15-13} = opcode; - let Inst{9-5} = vaddr; - let Inst{4-0} = Vt; - let DecoderMethod = "DecodeSIMDLdStSingle"; -} - -class BaseSIMDLdStSingleTied<bit L, bit R, bits<3> opcode, - string asm, string operands, dag oops, dag iops, - list<dag> pattern> - : I<oops, iops, asm, operands, "$Vt = $dst", pattern> { - bits<5> Vt; - bits<5> vaddr; - let Inst{31} = 0; - let Inst{29-24} = 0b001101; - let Inst{22} = L; - let Inst{21} = R; - let Inst{15-13} = opcode; - let Inst{9-5} = vaddr; - let Inst{4-0} = Vt; - let DecoderMethod = "DecodeSIMDLdStSingleTied"; -} - - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDLdR<bit Q, bit R, bits<3> opcode, bit S, bits<2> size, string asm, - Operand listtype> - : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, $vaddr", - (outs listtype:$Vt), (ins am_simdnoindex:$vaddr), []> { - let Inst{30} = Q; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = S; - let Inst{11-10} = size; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDLdRPost<bit Q, bit R, bits<3> opcode, bit S, bits<2> size, - string asm, Operand listtype, Operand GPR64pi> - : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, $vaddr, $Xm", - (outs listtype:$Vt), - (ins am_simdnoindex:$vaddr, GPR64pi:$Xm), []> { - bits<5> Xm; - let Inst{30} = Q; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = S; - let Inst{11-10} = size; -} - -multiclass SIMDLdrAliases<string asm, string layout, string Count, - int Offset, int Size> { - // E.g. "ld1r { v0.8b }, [x1], #1" - // "ld1r.8b\t$Vt, $vaddr, #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne8b:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias<asm # "\t$Vt, $vaddr, #" # Offset, - (!cast<Instruction>(NAME # "v" # layout # "_POST") - !cast<RegisterOperand>("VecList" # Count # layout):$Vt, - am_simdnoindex:$vaddr, XZR), 1>; - - // E.g. "ld1r.8b { v0 }, [x1], #1" - // "ld1r.8b\t$Vt, $vaddr, #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, #" # Offset, - (!cast<Instruction>(NAME # "v" # layout # "_POST") - !cast<RegisterOperand>("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr, XZR), 0>; - - // E.g. "ld1r.8b { v0 }, [x1]" - // "ld1r.8b\t$Vt, $vaddr" - // may get mapped to - // (LD1Rv8b VecListOne64:$Vt, am_simdnoindex:$vaddr) - def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr", - (!cast<Instruction>(NAME # "v" # layout) - !cast<RegisterOperand>("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr), 0>; - - // E.g. "ld1r.8b { v0 }, [x1], x2" - // "ld1r.8b\t$Vt, $vaddr, $Xm" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, GPR64pi1:$Xm) - def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, $Xm", - (!cast<Instruction>(NAME # "v" # layout # "_POST") - !cast<RegisterOperand>("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>; -} - -multiclass SIMDLdR<bit R, bits<3> opcode, bit S, string asm, string Count, - int Offset1, int Offset2, int Offset4, int Offset8> { - def v8b : BaseSIMDLdR<0, R, opcode, S, 0b00, asm, - !cast<Operand>("VecList" # Count # "8b")>; - def v16b: BaseSIMDLdR<1, R, opcode, S, 0b00, asm, - !cast<Operand>("VecList" # Count #"16b")>; - def v4h : BaseSIMDLdR<0, R, opcode, S, 0b01, asm, - !cast<Operand>("VecList" # Count #"4h")>; - def v8h : BaseSIMDLdR<1, R, opcode, S, 0b01, asm, - !cast<Operand>("VecList" # Count #"8h")>; - def v2s : BaseSIMDLdR<0, R, opcode, S, 0b10, asm, - !cast<Operand>("VecList" # Count #"2s")>; - def v4s : BaseSIMDLdR<1, R, opcode, S, 0b10, asm, - !cast<Operand>("VecList" # Count #"4s")>; - def v1d : BaseSIMDLdR<0, R, opcode, S, 0b11, asm, - !cast<Operand>("VecList" # Count #"1d")>; - def v2d : BaseSIMDLdR<1, R, opcode, S, 0b11, asm, - !cast<Operand>("VecList" # Count #"2d")>; - - def v8b_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b00, asm, - !cast<Operand>("VecList" # Count # "8b"), - !cast<Operand>("GPR64pi" # Offset1)>; - def v16b_POST: BaseSIMDLdRPost<1, R, opcode, S, 0b00, asm, - !cast<Operand>("VecList" # Count # "16b"), - !cast<Operand>("GPR64pi" # Offset1)>; - def v4h_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b01, asm, - !cast<Operand>("VecList" # Count # "4h"), - !cast<Operand>("GPR64pi" # Offset2)>; - def v8h_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b01, asm, - !cast<Operand>("VecList" # Count # "8h"), - !cast<Operand>("GPR64pi" # Offset2)>; - def v2s_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b10, asm, - !cast<Operand>("VecList" # Count # "2s"), - !cast<Operand>("GPR64pi" # Offset4)>; - def v4s_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b10, asm, - !cast<Operand>("VecList" # Count # "4s"), - !cast<Operand>("GPR64pi" # Offset4)>; - def v1d_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b11, asm, - !cast<Operand>("VecList" # Count # "1d"), - !cast<Operand>("GPR64pi" # Offset8)>; - def v2d_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b11, asm, - !cast<Operand>("VecList" # Count # "2d"), - !cast<Operand>("GPR64pi" # Offset8)>; - - defm : SIMDLdrAliases<asm, "8b", Count, Offset1, 64>; - defm : SIMDLdrAliases<asm, "16b", Count, Offset1, 128>; - defm : SIMDLdrAliases<asm, "4h", Count, Offset2, 64>; - defm : SIMDLdrAliases<asm, "8h", Count, Offset2, 128>; - defm : SIMDLdrAliases<asm, "2s", Count, Offset4, 64>; - defm : SIMDLdrAliases<asm, "4s", Count, Offset4, 128>; - defm : SIMDLdrAliases<asm, "1d", Count, Offset8, 64>; - defm : SIMDLdrAliases<asm, "2d", Count, Offset8, 128>; -} - -class SIMDLdStSingleB<bit L, bit R, bits<3> opcode, string asm, - dag oops, dag iops, list<dag> pattern> - : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops, - pattern> { - // idx encoded in Q:S:size fields. - bits<4> idx; - let Inst{30} = idx{3}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} -class SIMDLdStSingleBTied<bit L, bit R, bits<3> opcode, string asm, - dag oops, dag iops, list<dag> pattern> - : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops, - pattern> { - // idx encoded in Q:S:size fields. - bits<4> idx; - let Inst{30} = idx{3}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} -class SIMDLdStSingleBPost<bit L, bit R, bits<3> opcode, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm", - oops, iops, []> { - // idx encoded in Q:S:size fields. - bits<4> idx; - bits<5> Xm; - let Inst{30} = idx{3}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} -class SIMDLdStSingleBTiedPost<bit L, bit R, bits<3> opcode, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm", - oops, iops, []> { - // idx encoded in Q:S:size fields. - bits<4> idx; - bits<5> Xm; - let Inst{30} = idx{3}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} - -class SIMDLdStSingleH<bit L, bit R, bits<3> opcode, bit size, string asm, - dag oops, dag iops, list<dag> pattern> - : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops, - pattern> { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - let Inst{30} = idx{2}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} -class SIMDLdStSingleHTied<bit L, bit R, bits<3> opcode, bit size, string asm, - dag oops, dag iops, list<dag> pattern> - : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops, - pattern> { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - let Inst{30} = idx{2}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} - -class SIMDLdStSingleHPost<bit L, bit R, bits<3> opcode, bit size, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm", - oops, iops, []> { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - bits<5> Xm; - let Inst{30} = idx{2}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} -class SIMDLdStSingleHTiedPost<bit L, bit R, bits<3> opcode, bit size, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm", - oops, iops, []> { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - bits<5> Xm; - let Inst{30} = idx{2}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} -class SIMDLdStSingleS<bit L, bit R, bits<3> opcode, bits<2> size, string asm, - dag oops, dag iops, list<dag> pattern> - : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops, - pattern> { - // idx encoded in Q:S fields. - bits<2> idx; - let Inst{30} = idx{1}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleSTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm, - dag oops, dag iops, list<dag> pattern> - : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops, - pattern> { - // idx encoded in Q:S fields. - bits<2> idx; - let Inst{30} = idx{1}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleSPost<bit L, bit R, bits<3> opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm", - oops, iops, []> { - // idx encoded in Q:S fields. - bits<2> idx; - bits<5> Xm; - let Inst{30} = idx{1}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleSTiedPost<bit L, bit R, bits<3> opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm", - oops, iops, []> { - // idx encoded in Q:S fields. - bits<2> idx; - bits<5> Xm; - let Inst{30} = idx{1}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleD<bit L, bit R, bits<3> opcode, bits<2> size, string asm, - dag oops, dag iops, list<dag> pattern> - : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops, - pattern> { - // idx encoded in Q field. - bits<1> idx; - let Inst{30} = idx; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = 0; - let Inst{11-10} = size; -} -class SIMDLdStSingleDTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm, - dag oops, dag iops, list<dag> pattern> - : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops, - pattern> { - // idx encoded in Q field. - bits<1> idx; - let Inst{30} = idx; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = 0; - let Inst{11-10} = size; -} -class SIMDLdStSingleDPost<bit L, bit R, bits<3> opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm", - oops, iops, []> { - // idx encoded in Q field. - bits<1> idx; - bits<5> Xm; - let Inst{30} = idx; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = 0; - let Inst{11-10} = size; -} -class SIMDLdStSingleDTiedPost<bit L, bit R, bits<3> opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm", - oops, iops, []> { - // idx encoded in Q field. - bits<1> idx; - bits<5> Xm; - let Inst{30} = idx; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = 0; - let Inst{11-10} = size; -} - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleBTied<bit R, bits<3> opcode, string asm, - RegisterOperand listtype, - RegisterOperand GPR64pi> { - def i8 : SIMDLdStSingleBTied<1, R, opcode, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr), []>; - - def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleHTied<bit R, bits<3> opcode, bit size, string asm, - RegisterOperand listtype, - RegisterOperand GPR64pi> { - def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr), []>; - - def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleSTied<bit R, bits<3> opcode, bits<2> size,string asm, - RegisterOperand listtype, - RegisterOperand GPR64pi> { - def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr), []>; - - def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleDTied<bit R, bits<3> opcode, bits<2> size, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr), []>; - - def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleB<bit R, bits<3> opcode, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i8 : SIMDLdStSingleB<0, R, opcode, asm, - (outs), (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr), []>; - - def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm, - (outs), (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleH<bit R, bits<3> opcode, bit size, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i16 : SIMDLdStSingleH<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr), []>; - - def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleS<bit R, bits<3> opcode, bits<2> size,string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i32 : SIMDLdStSingleS<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr), []>; - - def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleD<bit R, bits<3> opcode, bits<2> size, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i64 : SIMDLdStSingleD<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr), []>; - - def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} - -multiclass SIMDLdStSingleAliases<string asm, string layout, string Type, - string Count, int Offset, Operand idxtype> { - // E.g. "ld1 { v0.8b }[0], [x1], #1" - // "ld1\t$Vt, $vaddr, #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne8b:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias<asm # "\t$Vt$idx, $vaddr, #" # Offset, - (!cast<Instruction>(NAME # Type # "_POST") - !cast<RegisterOperand>("VecList" # Count # layout):$Vt, - idxtype:$idx, am_simdnoindex:$vaddr, XZR), 1>; - - // E.g. "ld1.8b { v0 }[0], [x1], #1" - // "ld1.8b\t$Vt, $vaddr, #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias<asm # "." # layout # "\t$Vt$idx, $vaddr, #" # Offset, - (!cast<Instruction>(NAME # Type # "_POST") - !cast<RegisterOperand>("VecList" # Count # "128"):$Vt, - idxtype:$idx, am_simdnoindex:$vaddr, XZR), 0>; - - // E.g. "ld1.8b { v0 }[0], [x1]" - // "ld1.8b\t$Vt, $vaddr" - // may get mapped to - // (LD1Rv8b VecListOne64:$Vt, am_simdnoindex:$vaddr) - def : InstAlias<asm # "." # layout # "\t$Vt$idx, $vaddr", - (!cast<Instruction>(NAME # Type) - !cast<RegisterOperand>("VecList" # Count # "128"):$Vt, - idxtype:$idx, am_simdnoindex:$vaddr), 0>; - - // E.g. "ld1.8b { v0 }[0], [x1], x2" - // "ld1.8b\t$Vt, $vaddr, $Xm" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, GPR64pi1:$Xm) - def : InstAlias<asm # "." # layout # "\t$Vt$idx, $vaddr, $Xm", - (!cast<Instruction>(NAME # Type # "_POST") - !cast<RegisterOperand>("VecList" # Count # "128"):$Vt, - idxtype:$idx, am_simdnoindex:$vaddr, - !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>; -} - -multiclass SIMDLdSt1SingleAliases<string asm> { - defm : SIMDLdStSingleAliases<asm, "b", "i8", "One", 1, VectorIndexB>; - defm : SIMDLdStSingleAliases<asm, "h", "i16", "One", 2, VectorIndexH>; - defm : SIMDLdStSingleAliases<asm, "s", "i32", "One", 4, VectorIndexS>; - defm : SIMDLdStSingleAliases<asm, "d", "i64", "One", 8, VectorIndexD>; -} - -multiclass SIMDLdSt2SingleAliases<string asm> { - defm : SIMDLdStSingleAliases<asm, "b", "i8", "Two", 2, VectorIndexB>; - defm : SIMDLdStSingleAliases<asm, "h", "i16", "Two", 4, VectorIndexH>; - defm : SIMDLdStSingleAliases<asm, "s", "i32", "Two", 8, VectorIndexS>; - defm : SIMDLdStSingleAliases<asm, "d", "i64", "Two", 16, VectorIndexD>; -} - -multiclass SIMDLdSt3SingleAliases<string asm> { - defm : SIMDLdStSingleAliases<asm, "b", "i8", "Three", 3, VectorIndexB>; - defm : SIMDLdStSingleAliases<asm, "h", "i16", "Three", 6, VectorIndexH>; - defm : SIMDLdStSingleAliases<asm, "s", "i32", "Three", 12, VectorIndexS>; - defm : SIMDLdStSingleAliases<asm, "d", "i64", "Three", 24, VectorIndexD>; -} - -multiclass SIMDLdSt4SingleAliases<string asm> { - defm : SIMDLdStSingleAliases<asm, "b", "i8", "Four", 4, VectorIndexB>; - defm : SIMDLdStSingleAliases<asm, "h", "i16", "Four", 8, VectorIndexH>; - defm : SIMDLdStSingleAliases<asm, "s", "i32", "Four", 16, VectorIndexS>; - defm : SIMDLdStSingleAliases<asm, "d", "i64", "Four", 32, VectorIndexD>; -} - -//---------------------------------------------------------------------------- -// Crypto extensions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr, - list<dag> pat> - : I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>, - Sched<[WriteV]>{ - bits<5> Rd; - bits<5> Rn; - let Inst{31-16} = 0b0100111000101000; - let Inst{15-12} = opc; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class AESInst<bits<4> opc, string asm, Intrinsic OpNode> - : AESBase<opc, asm, (outs V128:$Rd), (ins V128:$Rn), "", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - -class AESTiedInst<bits<4> opc, string asm, Intrinsic OpNode> - : AESBase<opc, asm, (outs V128:$dst), (ins V128:$Rd, V128:$Rn), - "$Rd = $dst", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind, - dag oops, dag iops, list<dag> pat> - : I<oops, iops, asm, - "{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" # - "|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>, - Sched<[WriteV]>{ - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-21} = 0b01011110000; - let Inst{20-16} = Rm; - let Inst{15} = 0; - let Inst{14-12} = opc; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SHATiedInstQSV<bits<3> opc, string asm, Intrinsic OpNode> - : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst), - (ins FPR128:$Rd, FPR32:$Rn, V128:$Rm), - [(set (v4i32 FPR128:$dst), - (OpNode (v4i32 FPR128:$Rd), (i32 FPR32:$Rn), - (v4i32 V128:$Rm)))]>; - -class SHATiedInstVVV<bits<3> opc, string asm, Intrinsic OpNode> - : SHA3OpTiedInst<opc, asm, ".4s", (outs V128:$dst), - (ins V128:$Rd, V128:$Rn, V128:$Rm), - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (v4i32 V128:$Rm)))]>; - -class SHATiedInstQQV<bits<3> opc, string asm, Intrinsic OpNode> - : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst), - (ins FPR128:$Rd, FPR128:$Rn, V128:$Rm), - [(set (v4i32 FPR128:$dst), - (OpNode (v4i32 FPR128:$Rd), (v4i32 FPR128:$Rn), - (v4i32 V128:$Rm)))]>; - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class SHA2OpInst<bits<4> opc, string asm, string kind, - string cstr, dag oops, dag iops, - list<dag> pat> - : I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind # - "|" # kind # "\t$Rd, $Rn}", cstr, pat>, - Sched<[WriteV]>{ - bits<5> Rd; - bits<5> Rn; - let Inst{31-16} = 0b0101111000101000; - let Inst{15-12} = opc; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SHATiedInstVV<bits<4> opc, string asm, Intrinsic OpNode> - : SHA2OpInst<opc, asm, ".4s", "$Rd = $dst", (outs V128:$dst), - (ins V128:$Rd, V128:$Rn), - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; - -class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode> - : SHA2OpInst<opc, asm, "", "", (outs FPR32:$Rd), (ins FPR32:$Rn), - [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>; - -// Allow the size specifier tokens to be upper case, not just lower. -def : TokenAlias<".8B", ".8b">; -def : TokenAlias<".4H", ".4h">; -def : TokenAlias<".2S", ".2s">; -def : TokenAlias<".1D", ".1d">; -def : TokenAlias<".16B", ".16b">; -def : TokenAlias<".8H", ".8h">; -def : TokenAlias<".4S", ".4s">; -def : TokenAlias<".2D", ".2d">; -def : TokenAlias<".B", ".b">; -def : TokenAlias<".H", ".h">; -def : TokenAlias<".S", ".s">; -def : TokenAlias<".D", ".d">; diff --git a/lib/Target/ARM64/ARM64InstrInfo.cpp b/lib/Target/ARM64/ARM64InstrInfo.cpp deleted file mode 100644 index 8f11757..0000000 --- a/lib/Target/ARM64/ARM64InstrInfo.cpp +++ /dev/null @@ -1,1864 +0,0 @@ -//===- ARM64InstrInfo.cpp - ARM64 Instruction Information -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#include "ARM64InstrInfo.h" -#include "ARM64Subtarget.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_CTOR_DTOR -#include "ARM64GenInstrInfo.inc" - -using namespace llvm; - -ARM64InstrInfo::ARM64InstrInfo(const ARM64Subtarget &STI) - : ARM64GenInstrInfo(ARM64::ADJCALLSTACKDOWN, ARM64::ADJCALLSTACKUP), - RI(this, &STI), Subtarget(STI) {} - -/// GetInstSize - Return the number of bytes of code the specified -/// instruction may be. This returns the maximum number of bytes. -unsigned ARM64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const MCInstrDesc &Desc = MI->getDesc(); - - switch (Desc.getOpcode()) { - default: - // Anything not explicitly designated otherwise is a nomal 4-byte insn. - return 4; - case TargetOpcode::DBG_VALUE: - case TargetOpcode::EH_LABEL: - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - return 0; - } - - llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size"); -} - -static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, - SmallVectorImpl<MachineOperand> &Cond) { - // Block ends with fall-through condbranch. - switch (LastInst->getOpcode()) { - default: - llvm_unreachable("Unknown branch instruction?"); - case ARM64::Bcc: - Target = LastInst->getOperand(1).getMBB(); - Cond.push_back(LastInst->getOperand(0)); - break; - case ARM64::CBZW: - case ARM64::CBZX: - case ARM64::CBNZW: - case ARM64::CBNZX: - Target = LastInst->getOperand(1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(-1)); - Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); - Cond.push_back(LastInst->getOperand(0)); - break; - case ARM64::TBZ: - case ARM64::TBNZ: - Target = LastInst->getOperand(2).getMBB(); - Cond.push_back(MachineOperand::CreateImm(-1)); - Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); - Cond.push_back(LastInst->getOperand(0)); - Cond.push_back(LastInst->getOperand(1)); - } -} - -// Branch analysis. -bool ARM64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - if (!isUnpredicatedTerminator(I)) - return false; - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (isUncondBranchOpcode(LastOpc)) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } - if (isCondBranchOpcode(LastOpc)) { - // Block ends with fall-through condbranch. - parseCondBranch(LastInst, TBB, Cond); - return false; - } - return true; // Can't handle indirect branch. - } - - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If AllowModify is true and the block ends with two or more unconditional - // branches, delete all but the first unconditional branch. - if (AllowModify && isUncondBranchOpcode(LastOpc)) { - while (isUncondBranchOpcode(SecondLastOpc)) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - // Return now the only terminator is an unconditional branch. - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - SecondLastInst = I; - SecondLastOpc = SecondLastInst->getOpcode(); - } - } - } - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with a B and a Bcc, handle it. - if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - parseCondBranch(SecondLastInst, TBB, Cond); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } - - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } - - // ...likewise if it ends with an indirect branch followed by an unconditional - // branch. - if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return true; - } - - // Otherwise, can't handle this. - return true; -} - -bool ARM64InstrInfo::ReverseBranchCondition( - SmallVectorImpl<MachineOperand> &Cond) const { - if (Cond[0].getImm() != -1) { - // Regular Bcc - ARM64CC::CondCode CC = (ARM64CC::CondCode)(int)Cond[0].getImm(); - Cond[0].setImm(ARM64CC::getInvertedCondCode(CC)); - } else { - // Folded compare-and-branch - switch (Cond[1].getImm()) { - default: - llvm_unreachable("Unknown conditional branch!"); - case ARM64::CBZW: - Cond[1].setImm(ARM64::CBNZW); - break; - case ARM64::CBNZW: - Cond[1].setImm(ARM64::CBZW); - break; - case ARM64::CBZX: - Cond[1].setImm(ARM64::CBNZX); - break; - case ARM64::CBNZX: - Cond[1].setImm(ARM64::CBZX); - break; - case ARM64::TBZ: - Cond[1].setImm(ARM64::TBNZ); - break; - case ARM64::TBNZ: - Cond[1].setImm(ARM64::TBZ); - break; - } - } - - return false; -} - -unsigned ARM64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } - if (!isUncondBranchOpcode(I->getOpcode()) && - !isCondBranchOpcode(I->getOpcode())) - return 0; - - // Remove the branch. - I->eraseFromParent(); - - I = MBB.end(); - - if (I == MBB.begin()) - return 1; - --I; - if (!isCondBranchOpcode(I->getOpcode())) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; -} - -void ARM64InstrInfo::instantiateCondBranch( - MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - if (Cond[0].getImm() != -1) { - // Regular Bcc - BuildMI(&MBB, DL, get(ARM64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); - } else { - // Folded compare-and-branch - const MachineInstrBuilder MIB = - BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg()); - if (Cond.size() > 3) - MIB.addImm(Cond[3].getImm()); - MIB.addMBB(TBB); - } -} - -unsigned ARM64InstrInfo::InsertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { - // Shouldn't be a fall through. - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - - if (FBB == 0) { - if (Cond.empty()) // Unconditional branch? - BuildMI(&MBB, DL, get(ARM64::B)).addMBB(TBB); - else - instantiateCondBranch(MBB, DL, TBB, Cond); - return 1; - } - - // Two-way conditional branch. - instantiateCondBranch(MBB, DL, TBB, Cond); - BuildMI(&MBB, DL, get(ARM64::B)).addMBB(FBB); - return 2; -} - -// Find the original register that VReg is copied from. -static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { - while (TargetRegisterInfo::isVirtualRegister(VReg)) { - const MachineInstr *DefMI = MRI.getVRegDef(VReg); - if (!DefMI->isFullCopy()) - return VReg; - VReg = DefMI->getOperand(1).getReg(); - } - return VReg; -} - -// Determine if VReg is defined by an instruction that can be folded into a -// csel instruction. If so, return the folded opcode, and the replacement -// register. -static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, - unsigned *NewVReg = 0) { - VReg = removeCopies(MRI, VReg); - if (!TargetRegisterInfo::isVirtualRegister(VReg)) - return 0; - - bool Is64Bit = ARM64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); - const MachineInstr *DefMI = MRI.getVRegDef(VReg); - unsigned Opc = 0; - unsigned SrcOpNum = 0; - switch (DefMI->getOpcode()) { - case ARM64::ADDSXri: - case ARM64::ADDSWri: - // if CPSR is used, do not fold. - if (DefMI->findRegisterDefOperandIdx(ARM64::CPSR, true) == -1) - return 0; - // fall-through to ADDXri and ADDWri. - case ARM64::ADDXri: - case ARM64::ADDWri: - // add x, 1 -> csinc. - if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || - DefMI->getOperand(3).getImm() != 0) - return 0; - SrcOpNum = 1; - Opc = Is64Bit ? ARM64::CSINCXr : ARM64::CSINCWr; - break; - - case ARM64::ORNXrr: - case ARM64::ORNWrr: { - // not x -> csinv, represented as orn dst, xzr, src. - unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); - if (ZReg != ARM64::XZR && ZReg != ARM64::WZR) - return 0; - SrcOpNum = 2; - Opc = Is64Bit ? ARM64::CSINVXr : ARM64::CSINVWr; - break; - } - - case ARM64::SUBSXrr: - case ARM64::SUBSWrr: - // if CPSR is used, do not fold. - if (DefMI->findRegisterDefOperandIdx(ARM64::CPSR, true) == -1) - return 0; - // fall-through to SUBXrr and SUBWrr. - case ARM64::SUBXrr: - case ARM64::SUBWrr: { - // neg x -> csneg, represented as sub dst, xzr, src. - unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); - if (ZReg != ARM64::XZR && ZReg != ARM64::WZR) - return 0; - SrcOpNum = 2; - Opc = Is64Bit ? ARM64::CSNEGXr : ARM64::CSNEGWr; - break; - } - default: - return 0; - } - assert(Opc && SrcOpNum && "Missing parameters"); - - if (NewVReg) - *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); - return Opc; -} - -bool ARM64InstrInfo::canInsertSelect( - const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond, - unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, - int &FalseCycles) const { - // Check register classes. - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - const TargetRegisterClass *RC = - RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); - if (!RC) - return false; - - // Expanding cbz/tbz requires an extra cycle of latency on the condition. - unsigned ExtraCondLat = Cond.size() != 1; - - // GPRs are handled by csel. - // FIXME: Fold in x+1, -x, and ~x when applicable. - if (ARM64::GPR64allRegClass.hasSubClassEq(RC) || - ARM64::GPR32allRegClass.hasSubClassEq(RC)) { - // Single-cycle csel, csinc, csinv, and csneg. - CondCycles = 1 + ExtraCondLat; - TrueCycles = FalseCycles = 1; - if (canFoldIntoCSel(MRI, TrueReg)) - TrueCycles = 0; - else if (canFoldIntoCSel(MRI, FalseReg)) - FalseCycles = 0; - return true; - } - - // Scalar floating point is handled by fcsel. - // FIXME: Form fabs, fmin, and fmax when applicable. - if (ARM64::FPR64RegClass.hasSubClassEq(RC) || - ARM64::FPR32RegClass.hasSubClassEq(RC)) { - CondCycles = 5 + ExtraCondLat; - TrueCycles = FalseCycles = 2; - return true; - } - - // Can't do vectors. - return false; -} - -void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DstReg, - const SmallVectorImpl<MachineOperand> &Cond, - unsigned TrueReg, unsigned FalseReg) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - - // Parse the condition code, see parseCondBranch() above. - ARM64CC::CondCode CC; - switch (Cond.size()) { - default: - llvm_unreachable("Unknown condition opcode in Cond"); - case 1: // b.cc - CC = ARM64CC::CondCode(Cond[0].getImm()); - break; - case 3: { // cbz/cbnz - // We must insert a compare against 0. - bool Is64Bit; - switch (Cond[1].getImm()) { - default: - llvm_unreachable("Unknown branch opcode in Cond"); - case ARM64::CBZW: - Is64Bit = 0; - CC = ARM64CC::EQ; - break; - case ARM64::CBZX: - Is64Bit = 1; - CC = ARM64CC::EQ; - break; - case ARM64::CBNZW: - Is64Bit = 0; - CC = ARM64CC::NE; - break; - case ARM64::CBNZX: - Is64Bit = 1; - CC = ARM64CC::NE; - break; - } - unsigned SrcReg = Cond[2].getReg(); - if (Is64Bit) { - // cmp reg, #0 is actually subs xzr, reg, #0. - MRI.constrainRegClass(SrcReg, &ARM64::GPR64spRegClass); - BuildMI(MBB, I, DL, get(ARM64::SUBSXri), ARM64::XZR) - .addReg(SrcReg) - .addImm(0) - .addImm(0); - } else { - MRI.constrainRegClass(SrcReg, &ARM64::GPR32spRegClass); - BuildMI(MBB, I, DL, get(ARM64::SUBSWri), ARM64::WZR) - .addReg(SrcReg) - .addImm(0) - .addImm(0); - } - break; - } - case 4: { // tbz/tbnz - // We must insert a tst instruction. - switch (Cond[1].getImm()) { - default: - llvm_unreachable("Unknown branch opcode in Cond"); - case ARM64::TBZ: - CC = ARM64CC::EQ; - break; - case ARM64::TBNZ: - CC = ARM64CC::NE; - break; - } - // cmp reg, #foo is actually ands xzr, reg, #1<<foo. - BuildMI(MBB, I, DL, get(ARM64::ANDSXri), ARM64::XZR) - .addReg(Cond[2].getReg()) - .addImm(ARM64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); - break; - } - } - - unsigned Opc = 0; - const TargetRegisterClass *RC = 0; - bool TryFold = false; - if (MRI.constrainRegClass(DstReg, &ARM64::GPR64RegClass)) { - RC = &ARM64::GPR64RegClass; - Opc = ARM64::CSELXr; - TryFold = true; - } else if (MRI.constrainRegClass(DstReg, &ARM64::GPR32RegClass)) { - RC = &ARM64::GPR32RegClass; - Opc = ARM64::CSELWr; - TryFold = true; - } else if (MRI.constrainRegClass(DstReg, &ARM64::FPR64RegClass)) { - RC = &ARM64::FPR64RegClass; - Opc = ARM64::FCSELDrrr; - } else if (MRI.constrainRegClass(DstReg, &ARM64::FPR32RegClass)) { - RC = &ARM64::FPR32RegClass; - Opc = ARM64::FCSELSrrr; - } - assert(RC && "Unsupported regclass"); - - // Try folding simple instructions into the csel. - if (TryFold) { - unsigned NewVReg = 0; - unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); - if (FoldedOpc) { - // The folded opcodes csinc, csinc and csneg apply the operation to - // FalseReg, so we need to invert the condition. - CC = ARM64CC::getInvertedCondCode(CC); - TrueReg = FalseReg; - } else - FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); - - // Fold the operation. Leave any dead instructions for DCE to clean up. - if (FoldedOpc) { - FalseReg = NewVReg; - Opc = FoldedOpc; - // The extends the live range of NewVReg. - MRI.clearKillFlags(NewVReg); - } - } - - // Pull all virtual register into the appropriate class. - MRI.constrainRegClass(TrueReg, RC); - MRI.constrainRegClass(FalseReg, RC); - - // Insert the csel. - BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm( - CC); -} - -bool ARM64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SubIdx) const { - switch (MI.getOpcode()) { - default: - return false; - case ARM64::SBFMXri: // aka sxtw - case ARM64::UBFMXri: // aka uxtw - // Check for the 32 -> 64 bit extension case, these instructions can do - // much more. - if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) - return false; - // This is a signed or unsigned 32 -> 64 bit extension. - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SubIdx = ARM64::sub_32; - return true; - } -} - -/// analyzeCompare - For a comparison instruction, return the source registers -/// in SrcReg and SrcReg2, and the value it compares against in CmpValue. -/// Return true if the comparison instruction can be analyzed. -bool ARM64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - unsigned &SrcReg2, int &CmpMask, - int &CmpValue) const { - switch (MI->getOpcode()) { - default: - break; - case ARM64::SUBSWrr: - case ARM64::SUBSWrs: - case ARM64::SUBSWrx: - case ARM64::SUBSXrr: - case ARM64::SUBSXrs: - case ARM64::SUBSXrx: - case ARM64::ADDSWrr: - case ARM64::ADDSWrs: - case ARM64::ADDSWrx: - case ARM64::ADDSXrr: - case ARM64::ADDSXrs: - case ARM64::ADDSXrx: - // Replace SUBSWrr with SUBWrr if CPSR is not used. - SrcReg = MI->getOperand(1).getReg(); - SrcReg2 = MI->getOperand(2).getReg(); - CmpMask = ~0; - CmpValue = 0; - return true; - case ARM64::SUBSWri: - case ARM64::ADDSWri: - case ARM64::ANDSWri: - case ARM64::SUBSXri: - case ARM64::ADDSXri: - case ARM64::ANDSXri: - SrcReg = MI->getOperand(1).getReg(); - SrcReg2 = 0; - CmpMask = ~0; - CmpValue = MI->getOperand(2).getImm(); - return true; - } - - return false; -} - -static bool UpdateOperandRegClass(MachineInstr *Instr) { - MachineBasicBlock *MBB = Instr->getParent(); - assert(MBB && "Can't get MachineBasicBlock here"); - MachineFunction *MF = MBB->getParent(); - assert(MF && "Can't get MachineFunction here"); - const TargetMachine *TM = &MF->getTarget(); - const TargetInstrInfo *TII = TM->getInstrInfo(); - const TargetRegisterInfo *TRI = TM->getRegisterInfo(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - - for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx; - ++OpIdx) { - MachineOperand &MO = Instr->getOperand(OpIdx); - const TargetRegisterClass *OpRegCstraints = - Instr->getRegClassConstraint(OpIdx, TII, TRI); - - // If there's no constraint, there's nothing to do. - if (!OpRegCstraints) - continue; - // If the operand is a frame index, there's nothing to do here. - // A frame index operand will resolve correctly during PEI. - if (MO.isFI()) - continue; - - assert(MO.isReg() && - "Operand has register constraints without being a register!"); - - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (!OpRegCstraints->contains(Reg)) - return false; - } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && - !MRI->constrainRegClass(Reg, OpRegCstraints)) - return false; - } - - return true; -} - -/// optimizeCompareInstr - Convert the instruction supplying the argument to the -/// comparison into one that sets the zero bit in the flags register. -bool ARM64InstrInfo::optimizeCompareInstr( - MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, - int CmpValue, const MachineRegisterInfo *MRI) const { - - // Replace SUBSWrr with SUBWrr if CPSR is not used. - int Cmp_CPSR = CmpInstr->findRegisterDefOperandIdx(ARM64::CPSR, true); - if (Cmp_CPSR != -1) { - unsigned NewOpc; - switch (CmpInstr->getOpcode()) { - default: - return false; - case ARM64::ADDSWrr: NewOpc = ARM64::ADDWrr; break; - case ARM64::ADDSWri: NewOpc = ARM64::ADDWri; break; - case ARM64::ADDSWrs: NewOpc = ARM64::ADDWrs; break; - case ARM64::ADDSWrx: NewOpc = ARM64::ADDWrx; break; - case ARM64::ADDSXrr: NewOpc = ARM64::ADDXrr; break; - case ARM64::ADDSXri: NewOpc = ARM64::ADDXri; break; - case ARM64::ADDSXrs: NewOpc = ARM64::ADDXrs; break; - case ARM64::ADDSXrx: NewOpc = ARM64::ADDXrx; break; - case ARM64::SUBSWrr: NewOpc = ARM64::SUBWrr; break; - case ARM64::SUBSWri: NewOpc = ARM64::SUBWri; break; - case ARM64::SUBSWrs: NewOpc = ARM64::SUBWrs; break; - case ARM64::SUBSWrx: NewOpc = ARM64::SUBWrx; break; - case ARM64::SUBSXrr: NewOpc = ARM64::SUBXrr; break; - case ARM64::SUBSXri: NewOpc = ARM64::SUBXri; break; - case ARM64::SUBSXrs: NewOpc = ARM64::SUBXrs; break; - case ARM64::SUBSXrx: NewOpc = ARM64::SUBXrx; break; - } - - const MCInstrDesc &MCID = get(NewOpc); - CmpInstr->setDesc(MCID); - CmpInstr->RemoveOperand(Cmp_CPSR); - bool succeeded = UpdateOperandRegClass(CmpInstr); - (void)succeeded; - assert(succeeded && "Some operands reg class are incompatible!"); - return true; - } - - // Continue only if we have a "ri" where immediate is zero. - if (CmpValue != 0 || SrcReg2 != 0) - return false; - - // CmpInstr is a Compare instruction if destination register is not used. - if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) - return false; - - // Get the unique definition of SrcReg. - MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); - if (!MI) - return false; - - // We iterate backward, starting from the instruction before CmpInstr and - // stop when reaching the definition of the source register or done with the - // basic block, to check whether CPSR is used or modified in between. - MachineBasicBlock::iterator I = CmpInstr, E = MI, - B = CmpInstr->getParent()->begin(); - - // Early exit if CmpInstr is at the beginning of the BB. - if (I == B) - return false; - - // Check whether the definition of SrcReg is in the same basic block as - // Compare. If not, we can't optimize away the Compare. - if (MI->getParent() != CmpInstr->getParent()) - return false; - - // Check that CPSR isn't set between the comparison instruction and the one we - // want to change. - const TargetRegisterInfo *TRI = &getRegisterInfo(); - for (--I; I != E; --I) { - const MachineInstr &Instr = *I; - - if (Instr.modifiesRegister(ARM64::CPSR, TRI) || - Instr.readsRegister(ARM64::CPSR, TRI)) - // This instruction modifies or uses CPSR after the one we want to - // change. We can't do this transformation. - return false; - if (I == B) - // The 'and' is below the comparison instruction. - return false; - } - - unsigned NewOpc = MI->getOpcode(); - switch (MI->getOpcode()) { - default: - return false; - case ARM64::ADDSWrr: - case ARM64::ADDSWri: - case ARM64::ADDSXrr: - case ARM64::ADDSXri: - case ARM64::SUBSWrr: - case ARM64::SUBSWri: - case ARM64::SUBSXrr: - case ARM64::SUBSXri: - break; - case ARM64::ADDWrr: NewOpc = ARM64::ADDSWrr; break; - case ARM64::ADDWri: NewOpc = ARM64::ADDSWri; break; - case ARM64::ADDXrr: NewOpc = ARM64::ADDSXrr; break; - case ARM64::ADDXri: NewOpc = ARM64::ADDSXri; break; - case ARM64::ADCWr: NewOpc = ARM64::ADCSWr; break; - case ARM64::ADCXr: NewOpc = ARM64::ADCSXr; break; - case ARM64::SUBWrr: NewOpc = ARM64::SUBSWrr; break; - case ARM64::SUBWri: NewOpc = ARM64::SUBSWri; break; - case ARM64::SUBXrr: NewOpc = ARM64::SUBSXrr; break; - case ARM64::SUBXri: NewOpc = ARM64::SUBSXri; break; - case ARM64::SBCWr: NewOpc = ARM64::SBCSWr; break; - case ARM64::SBCXr: NewOpc = ARM64::SBCSXr; break; - case ARM64::ANDWri: NewOpc = ARM64::ANDSWri; break; - case ARM64::ANDXri: NewOpc = ARM64::ANDSXri; break; - } - - // Scan forward for the use of CPSR. - // When checking against MI: if it's a conditional code requires - // checking of V bit, then this is not safe to do. - // It is safe to remove CmpInstr if CPSR is redefined or killed. - // If we are done with the basic block, we need to check whether CPSR is - // live-out. - bool IsSafe = false; - for (MachineBasicBlock::iterator I = CmpInstr, - E = CmpInstr->getParent()->end(); - !IsSafe && ++I != E;) { - const MachineInstr &Instr = *I; - for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; - ++IO) { - const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(ARM64::CPSR)) { - IsSafe = true; - break; - } - if (!MO.isReg() || MO.getReg() != ARM64::CPSR) - continue; - if (MO.isDef()) { - IsSafe = true; - break; - } - - // Decode the condition code. - unsigned Opc = Instr.getOpcode(); - ARM64CC::CondCode CC; - switch (Opc) { - default: - return false; - case ARM64::Bcc: - CC = (ARM64CC::CondCode)Instr.getOperand(IO - 2).getImm(); - break; - case ARM64::CSINVWr: - case ARM64::CSINVXr: - case ARM64::CSINCWr: - case ARM64::CSINCXr: - case ARM64::CSELWr: - case ARM64::CSELXr: - case ARM64::CSNEGWr: - case ARM64::CSNEGXr: - CC = (ARM64CC::CondCode)Instr.getOperand(IO - 1).getImm(); - break; - } - - // It is not safe to remove Compare instruction if Overflow(V) is used. - switch (CC) { - default: - // CPSR can be used multiple times, we should continue. - break; - case ARM64CC::VS: - case ARM64CC::VC: - case ARM64CC::GE: - case ARM64CC::LT: - case ARM64CC::GT: - case ARM64CC::LE: - return false; - } - } - } - - // If CPSR is not killed nor re-defined, we should check whether it is - // live-out. If it is live-out, do not optimize. - if (!IsSafe) { - MachineBasicBlock *MBB = CmpInstr->getParent(); - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); - SI != SE; ++SI) - if ((*SI)->isLiveIn(ARM64::CPSR)) - return false; - } - - // Update the instruction to set CPSR. - MI->setDesc(get(NewOpc)); - CmpInstr->eraseFromParent(); - bool succeeded = UpdateOperandRegClass(MI); - (void)succeeded; - assert(succeeded && "Some operands reg class are incompatible!"); - MI->addRegisterDefined(ARM64::CPSR, TRI); - return true; -} - -// Return true if this instruction simply sets its single destination register -// to zero. This is equivalent to a register rename of the zero-register. -bool ARM64InstrInfo::isGPRZero(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: - break; - case ARM64::MOVZWi: - case ARM64::MOVZXi: // movz Rd, #0 (LSL #0) - if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) { - assert(MI->getDesc().getNumOperands() == 3 && - MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands"); - return true; - } - break; - case ARM64::ANDWri: // and Rd, Rzr, #imm - return MI->getOperand(1).getReg() == ARM64::WZR; - case ARM64::ANDXri: - return MI->getOperand(1).getReg() == ARM64::XZR; - case TargetOpcode::COPY: - return MI->getOperand(1).getReg() == ARM64::WZR; - } - return false; -} - -// Return true if this instruction simply renames a general register without -// modifying bits. -bool ARM64InstrInfo::isGPRCopy(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: - break; - case TargetOpcode::COPY: { - // GPR32 copies will by lowered to ORRXrs - unsigned DstReg = MI->getOperand(0).getReg(); - return (ARM64::GPR32RegClass.contains(DstReg) || - ARM64::GPR64RegClass.contains(DstReg)); - } - case ARM64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) - if (MI->getOperand(1).getReg() == ARM64::XZR) { - assert(MI->getDesc().getNumOperands() == 4 && - MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands"); - return true; - } - case ARM64::ADDXri: // add Xd, Xn, #0 (LSL #0) - if (MI->getOperand(2).getImm() == 0) { - assert(MI->getDesc().getNumOperands() == 4 && - MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands"); - return true; - } - } - return false; -} - -// Return true if this instruction simply renames a general register without -// modifying bits. -bool ARM64InstrInfo::isFPRCopy(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: - break; - case TargetOpcode::COPY: { - // FPR64 copies will by lowered to ORR.16b - unsigned DstReg = MI->getOperand(0).getReg(); - return (ARM64::FPR64RegClass.contains(DstReg) || - ARM64::FPR128RegClass.contains(DstReg)); - } - case ARM64::ORRv16i8: - if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { - assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() && - "invalid ORRv16i8 operands"); - return true; - } - } - return false; -} - -unsigned ARM64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: - break; - case ARM64::LDRWui: - case ARM64::LDRXui: - case ARM64::LDRBui: - case ARM64::LDRHui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - - return 0; -} - -unsigned ARM64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: - break; - case ARM64::STRWui: - case ARM64::STRXui: - case ARM64::STRBui: - case ARM64::STRHui: - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; -} - -/// Return true if this is load/store scales or extends its register offset. -/// This refers to scaling a dynamic index as opposed to scaled immediates. -/// MI should be a memory op that allows scaled addressing. -bool ARM64InstrInfo::isScaledAddr(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: - break; - case ARM64::LDRBBro: - case ARM64::LDRBro: - case ARM64::LDRDro: - case ARM64::LDRHHro: - case ARM64::LDRHro: - case ARM64::LDRQro: - case ARM64::LDRSBWro: - case ARM64::LDRSBXro: - case ARM64::LDRSHWro: - case ARM64::LDRSHXro: - case ARM64::LDRSWro: - case ARM64::LDRSro: - case ARM64::LDRWro: - case ARM64::LDRXro: - case ARM64::STRBBro: - case ARM64::STRBro: - case ARM64::STRDro: - case ARM64::STRHHro: - case ARM64::STRHro: - case ARM64::STRQro: - case ARM64::STRSro: - case ARM64::STRWro: - case ARM64::STRXro: - unsigned Val = MI->getOperand(3).getImm(); - ARM64_AM::ExtendType ExtType = ARM64_AM::getMemExtendType(Val); - return (ExtType != ARM64_AM::UXTX) || ARM64_AM::getMemDoShift(Val); - } - return false; -} - -/// Check all MachineMemOperands for a hint to suppress pairing. -bool ARM64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { - assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && - "Too many target MO flags"); - for (MachineInstr::mmo_iterator MM = MI->memoperands_begin(), - E = MI->memoperands_end(); - MM != E; ++MM) { - - if ((*MM)->getFlags() & - (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) { - return true; - } - } - return false; -} - -/// Set a flag on the first MachineMemOperand to suppress pairing. -void ARM64InstrInfo::suppressLdStPair(MachineInstr *MI) const { - if (MI->memoperands_empty()) - return; - - assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && - "Too many target MO flags"); - (*MI->memoperands_begin()) - ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit); -} - -bool ARM64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, - unsigned &Offset, - const TargetRegisterInfo *TRI) const { - switch (LdSt->getOpcode()) { - default: - return false; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - case ARM64::STRXui: - case ARM64::STRWui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - case ARM64::LDRXui: - case ARM64::LDRWui: - if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) - return false; - BaseReg = LdSt->getOperand(1).getReg(); - MachineFunction &MF = *LdSt->getParent()->getParent(); - unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize(); - Offset = LdSt->getOperand(2).getImm() * Width; - return true; - }; -} - -/// Detect opportunities for ldp/stp formation. -/// -/// Only called for LdSt for which getLdStBaseRegImmOfs returns true. -bool ARM64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, - MachineInstr *SecondLdSt, - unsigned NumLoads) const { - // Only cluster up to a single pair. - if (NumLoads > 1) - return false; - if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode()) - return false; - // getLdStBaseRegImmOfs guarantees that oper 2 isImm. - unsigned Ofs1 = FirstLdSt->getOperand(2).getImm(); - // Allow 6 bits of positive range. - if (Ofs1 > 64) - return false; - // The caller should already have ordered First/SecondLdSt by offset. - unsigned Ofs2 = SecondLdSt->getOperand(2).getImm(); - return Ofs1 + 1 == Ofs2; -} - -bool ARM64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, - MachineInstr *Second) const { - // Cyclone can fuse CMN, CMP followed by Bcc. - - // FIXME: B0 can also fuse: - // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. - if (Second->getOpcode() != ARM64::Bcc) - return false; - switch (First->getOpcode()) { - default: - return false; - case ARM64::SUBSWri: - case ARM64::ADDSWri: - case ARM64::ANDSWri: - case ARM64::SUBSXri: - case ARM64::ADDSXri: - case ARM64::ANDSXri: - return true; - } -} - -MachineInstr *ARM64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM64::DBG_VALUE)) - .addFrameIndex(FrameIx) - .addImm(0) - .addImm(Offset) - .addMetadata(MDPtr); - return &*MIB; -} - -static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, - unsigned Reg, unsigned SubIdx, - unsigned State, - const TargetRegisterInfo *TRI) { - if (!SubIdx) - return MIB.addReg(Reg, State); - - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); - return MIB.addReg(Reg, State, SubIdx); -} - -static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, - unsigned NumRegs) { - // We really want the positive remainder mod 32 here, that happens to be - // easily obtainable with a mask. - return ((DestReg - SrcReg) & 0x1f) < NumRegs; -} - -void ARM64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, - unsigned SrcReg, bool KillSrc, - unsigned Opcode, - llvm::ArrayRef<unsigned> Indices) const { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - uint16_t DestEncoding = TRI->getEncodingValue(DestReg); - uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); - unsigned NumRegs = Indices.size(); - - int SubReg = 0, End = NumRegs, Incr = 1; - if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { - SubReg = NumRegs - 1; - End = -1; - Incr = -1; - } - - for (; SubReg != End; SubReg += Incr) { - const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); - AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); - AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); - AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); - } -} - -void ARM64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - if (ARM64::GPR32spRegClass.contains(DestReg) && - (ARM64::GPR32spRegClass.contains(SrcReg) || SrcReg == ARM64::WZR)) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - - if (DestReg == ARM64::WSP || SrcReg == ARM64::WSP) { - // If either operand is WSP, expand to ADD #0. - if (Subtarget.hasZeroCycleRegMove()) { - // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. - unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); - unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); - // This instruction is reading and writing X registers. This may upset - // the register scavenger and machine verifier, so we need to indicate - // that we are reading an undefined value from SrcRegX, but a proper - // value from SrcReg. - BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestRegX) - .addReg(SrcRegX, RegState::Undef) - .addImm(0) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)) - .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); - } else { - BuildMI(MBB, I, DL, get(ARM64::ADDWri), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addImm(0) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - } - } else if (SrcReg == ARM64::WZR && Subtarget.hasZeroCycleZeroing()) { - BuildMI(MBB, I, DL, get(ARM64::MOVZWi), DestReg).addImm(0).addImm( - ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - } else { - if (Subtarget.hasZeroCycleRegMove()) { - // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. - unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); - unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); - // This instruction is reading and writing X registers. This may upset - // the register scavenger and machine verifier, so we need to indicate - // that we are reading an undefined value from SrcRegX, but a proper - // value from SrcReg. - BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestRegX) - .addReg(ARM64::XZR) - .addReg(SrcRegX, RegState::Undef) - .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); - } else { - // Otherwise, expand to ORR WZR. - BuildMI(MBB, I, DL, get(ARM64::ORRWrr), DestReg) - .addReg(ARM64::WZR) - .addReg(SrcReg, getKillRegState(KillSrc)); - } - } - return; - } - - if (ARM64::GPR64spRegClass.contains(DestReg) && - (ARM64::GPR64spRegClass.contains(SrcReg) || SrcReg == ARM64::XZR)) { - if (DestReg == ARM64::SP || SrcReg == ARM64::SP) { - // If either operand is SP, expand to ADD #0. - BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addImm(0) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - } else if (SrcReg == ARM64::XZR && Subtarget.hasZeroCycleZeroing()) { - BuildMI(MBB, I, DL, get(ARM64::MOVZXi), DestReg).addImm(0).addImm( - ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - } else { - // Otherwise, expand to ORR XZR. - BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestReg) - .addReg(ARM64::XZR) - .addReg(SrcReg, getKillRegState(KillSrc)); - } - return; - } - - // Copy a DDDD register quad by copying the individual sub-registers. - if (ARM64::DDDDRegClass.contains(DestReg) && - ARM64::DDDDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1, - ARM64::dsub2, ARM64::dsub3 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8, - Indices); - return; - } - - // Copy a DDD register triple by copying the individual sub-registers. - if (ARM64::DDDRegClass.contains(DestReg) && - ARM64::DDDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1, - ARM64::dsub2 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8, - Indices); - return; - } - - // Copy a DD register pair by copying the individual sub-registers. - if (ARM64::DDRegClass.contains(DestReg) && - ARM64::DDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8, - Indices); - return; - } - - // Copy a QQQQ register quad by copying the individual sub-registers. - if (ARM64::QQQQRegClass.contains(DestReg) && - ARM64::QQQQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1, - ARM64::qsub2, ARM64::qsub3 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8, - Indices); - return; - } - - // Copy a QQQ register triple by copying the individual sub-registers. - if (ARM64::QQQRegClass.contains(DestReg) && - ARM64::QQQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1, - ARM64::qsub2 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8, - Indices); - return; - } - - // Copy a QQ register pair by copying the individual sub-registers. - if (ARM64::QQRegClass.contains(DestReg) && - ARM64::QQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8, - Indices); - return; - } - - if (ARM64::FPR128RegClass.contains(DestReg) && - ARM64::FPR128RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); - return; - } - - if (ARM64::FPR64RegClass.contains(DestReg) && - ARM64::FPR64RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::dsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::dsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); - return; - } - - if (ARM64::FPR32RegClass.contains(DestReg) && - ARM64::FPR32RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::ssub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::ssub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); - return; - } - - if (ARM64::FPR16RegClass.contains(DestReg) && - ARM64::FPR16RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); - return; - } - - if (ARM64::FPR8RegClass.contains(DestReg) && - ARM64::FPR8RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); - return; - } - - // Copies between GPR64 and FPR64. - if (ARM64::FPR64RegClass.contains(DestReg) && - ARM64::GPR64RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVXDr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - if (ARM64::GPR64RegClass.contains(DestReg) && - ARM64::FPR64RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVDXr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - // Copies between GPR32 and FPR32. - if (ARM64::FPR32RegClass.contains(DestReg) && - ARM64::GPR32RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVWSr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - if (ARM64::GPR32RegClass.contains(DestReg) && - ARM64::FPR32RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVSWr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - - assert(0 && "unimplemented reg-to-reg copy"); -} - -void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL; - if (MBBI != MBB.end()) - DL = MBBI->getDebugLoc(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FI); - - MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); - unsigned Opc = 0; - bool Offset = true; - switch (RC->getSize()) { - case 1: - if (ARM64::FPR8RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRBui; - break; - case 2: - if (ARM64::FPR16RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRHui; - break; - case 4: - if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::STRWui; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) - MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR32RegClass); - else - assert(SrcReg != ARM64::WSP); - } else if (ARM64::FPR32RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRSui; - break; - case 8: - if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::STRXui; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) - MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass); - else - assert(SrcReg != ARM64::SP); - } else if (ARM64::FPR64RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRDui; - break; - case 16: - if (ARM64::FPR128RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRQui; - else if (ARM64::DDRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Twov1d, Offset = false; - break; - case 24: - if (ARM64::DDDRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Threev1d, Offset = false; - break; - case 32: - if (ARM64::DDDDRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Fourv1d, Offset = false; - else if (ARM64::QQRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Twov2d, Offset = false; - break; - case 48: - if (ARM64::QQQRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Threev2d, Offset = false; - break; - case 64: - if (ARM64::QQQQRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Fourv2d, Offset = false; - break; - } - assert(Opc && "Unknown register class"); - - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI); - - if (Offset) - MI.addImm(0); - MI.addMemOperand(MMO); -} - -void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL; - if (MBBI != MBB.end()) - DL = MBBI->getDebugLoc(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FI); - MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); - - unsigned Opc = 0; - bool Offset = true; - switch (RC->getSize()) { - case 1: - if (ARM64::FPR8RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRBui; - break; - case 2: - if (ARM64::FPR16RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRHui; - break; - case 4: - if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::LDRWui; - if (TargetRegisterInfo::isVirtualRegister(DestReg)) - MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR32RegClass); - else - assert(DestReg != ARM64::WSP); - } else if (ARM64::FPR32RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRSui; - break; - case 8: - if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::LDRXui; - if (TargetRegisterInfo::isVirtualRegister(DestReg)) - MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR64RegClass); - else - assert(DestReg != ARM64::SP); - } else if (ARM64::FPR64RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRDui; - break; - case 16: - if (ARM64::FPR128RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRQui; - else if (ARM64::DDRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Twov1d, Offset = false; - break; - case 24: - if (ARM64::DDDRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Threev1d, Offset = false; - break; - case 32: - if (ARM64::DDDDRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Fourv1d, Offset = false; - else if (ARM64::QQRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Twov2d, Offset = false; - break; - case 48: - if (ARM64::QQQRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Threev2d, Offset = false; - break; - case 64: - if (ARM64::QQQQRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Fourv2d, Offset = false; - break; - } - assert(Opc && "Unknown register class"); - - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) - .addReg(DestReg, getDefRegState(true)) - .addFrameIndex(FI); - if (Offset) - MI.addImm(0); - MI.addMemOperand(MMO); -} - -void llvm::emitFrameOffset(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, int Offset, - const ARM64InstrInfo *TII, MachineInstr::MIFlag Flag, - bool SetCPSR) { - if (DestReg == SrcReg && Offset == 0) - return; - - bool isSub = Offset < 0; - if (isSub) - Offset = -Offset; - - // FIXME: If the offset won't fit in 24-bits, compute the offset into a - // scratch register. If DestReg is a virtual register, use it as the - // scratch register; otherwise, create a new virtual register (to be - // replaced by the scavenger at the end of PEI). That case can be optimized - // slightly if DestReg is SP which is always 16-byte aligned, so the scratch - // register can be loaded with offset%8 and the add/sub can use an extending - // instruction with LSL#3. - // Currently the function handles any offsets but generates a poor sequence - // of code. - // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); - - unsigned Opc; - if (SetCPSR) - Opc = isSub ? ARM64::SUBSXri : ARM64::ADDSXri; - else - Opc = isSub ? ARM64::SUBXri : ARM64::ADDXri; - const unsigned MaxEncoding = 0xfff; - const unsigned ShiftSize = 12; - const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; - while (((unsigned)Offset) >= (1 << ShiftSize)) { - unsigned ThisVal; - if (((unsigned)Offset) > MaxEncodableValue) { - ThisVal = MaxEncodableValue; - } else { - ThisVal = Offset & MaxEncodableValue; - } - assert((ThisVal >> ShiftSize) <= MaxEncoding && - "Encoding cannot handle value that big"); - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) - .addReg(SrcReg) - .addImm(ThisVal >> ShiftSize) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftSize)) - .setMIFlag(Flag); - - SrcReg = DestReg; - Offset -= ThisVal; - if (Offset == 0) - return; - } - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) - .addReg(SrcReg) - .addImm(Offset) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)) - .setMIFlag(Flag); -} - -MachineInstr * -ARM64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { - // This is a bit of a hack. Consider this instruction: - // - // %vreg0<def> = COPY %SP; GPR64all:%vreg0 - // - // We explicitly chose GPR64all for the virtual register so such a copy might - // be eliminated by RegisterCoalescer. However, that may not be possible, and - // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all - // register class, TargetInstrInfo::foldMemoryOperand() is going to try. - // - // To prevent that, we are going to constrain the %vreg0 register class here. - // - // <rdar://problem/11522048> - // - if (MI->isCopy()) { - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); - if (SrcReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(DstReg)) { - MF.getRegInfo().constrainRegClass(DstReg, &ARM64::GPR64RegClass); - return 0; - } - if (DstReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(SrcReg)) { - MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass); - return 0; - } - } - - // Cannot fold. - return 0; -} - -int llvm::isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset, - bool *OutUseUnscaledOp, - unsigned *OutUnscaledOp, - int *EmittableOffset) { - int Scale = 1; - bool IsSigned = false; - // The ImmIdx should be changed case by case if it is not 2. - unsigned ImmIdx = 2; - unsigned UnscaledOp = 0; - // Set output values in case of early exit. - if (EmittableOffset) - *EmittableOffset = 0; - if (OutUseUnscaledOp) - *OutUseUnscaledOp = false; - if (OutUnscaledOp) - *OutUnscaledOp = 0; - switch (MI.getOpcode()) { - default: - assert(0 && "unhandled opcode in rewriteARM64FrameIndex"); - // Vector spills/fills can't take an immediate offset. - case ARM64::LD1Twov2d: - case ARM64::LD1Threev2d: - case ARM64::LD1Fourv2d: - case ARM64::LD1Twov1d: - case ARM64::LD1Threev1d: - case ARM64::LD1Fourv1d: - case ARM64::ST1Twov2d: - case ARM64::ST1Threev2d: - case ARM64::ST1Fourv2d: - case ARM64::ST1Twov1d: - case ARM64::ST1Threev1d: - case ARM64::ST1Fourv1d: - return ARM64FrameOffsetCannotUpdate; - case ARM64::PRFMui: - Scale = 8; - UnscaledOp = ARM64::PRFUMi; - break; - case ARM64::LDRXui: - Scale = 8; - UnscaledOp = ARM64::LDURXi; - break; - case ARM64::LDRWui: - Scale = 4; - UnscaledOp = ARM64::LDURWi; - break; - case ARM64::LDRBui: - Scale = 1; - UnscaledOp = ARM64::LDURBi; - break; - case ARM64::LDRHui: - Scale = 2; - UnscaledOp = ARM64::LDURHi; - break; - case ARM64::LDRSui: - Scale = 4; - UnscaledOp = ARM64::LDURSi; - break; - case ARM64::LDRDui: - Scale = 8; - UnscaledOp = ARM64::LDURDi; - break; - case ARM64::LDRQui: - Scale = 16; - UnscaledOp = ARM64::LDURQi; - break; - case ARM64::LDRBBui: - Scale = 1; - UnscaledOp = ARM64::LDURBBi; - break; - case ARM64::LDRHHui: - Scale = 2; - UnscaledOp = ARM64::LDURHHi; - break; - case ARM64::LDRSBXui: - Scale = 1; - UnscaledOp = ARM64::LDURSBXi; - break; - case ARM64::LDRSBWui: - Scale = 1; - UnscaledOp = ARM64::LDURSBWi; - break; - case ARM64::LDRSHXui: - Scale = 2; - UnscaledOp = ARM64::LDURSHXi; - break; - case ARM64::LDRSHWui: - Scale = 2; - UnscaledOp = ARM64::LDURSHWi; - break; - case ARM64::LDRSWui: - Scale = 4; - UnscaledOp = ARM64::LDURSWi; - break; - - case ARM64::STRXui: - Scale = 8; - UnscaledOp = ARM64::STURXi; - break; - case ARM64::STRWui: - Scale = 4; - UnscaledOp = ARM64::STURWi; - break; - case ARM64::STRBui: - Scale = 1; - UnscaledOp = ARM64::STURBi; - break; - case ARM64::STRHui: - Scale = 2; - UnscaledOp = ARM64::STURHi; - break; - case ARM64::STRSui: - Scale = 4; - UnscaledOp = ARM64::STURSi; - break; - case ARM64::STRDui: - Scale = 8; - UnscaledOp = ARM64::STURDi; - break; - case ARM64::STRQui: - Scale = 16; - UnscaledOp = ARM64::STURQi; - break; - case ARM64::STRBBui: - Scale = 1; - UnscaledOp = ARM64::STURBBi; - break; - case ARM64::STRHHui: - Scale = 2; - UnscaledOp = ARM64::STURHHi; - break; - - case ARM64::LDPXi: - case ARM64::LDPDi: - case ARM64::STPXi: - case ARM64::STPDi: - IsSigned = true; - Scale = 8; - break; - case ARM64::LDPQi: - case ARM64::STPQi: - IsSigned = true; - Scale = 16; - break; - case ARM64::LDPWi: - case ARM64::LDPSi: - case ARM64::STPWi: - case ARM64::STPSi: - IsSigned = true; - Scale = 4; - break; - - case ARM64::LDURXi: - case ARM64::LDURWi: - case ARM64::LDURBi: - case ARM64::LDURHi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURHHi: - case ARM64::LDURBBi: - case ARM64::LDURSBXi: - case ARM64::LDURSBWi: - case ARM64::LDURSHXi: - case ARM64::LDURSHWi: - case ARM64::LDURSWi: - case ARM64::STURXi: - case ARM64::STURWi: - case ARM64::STURBi: - case ARM64::STURHi: - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURBBi: - case ARM64::STURHHi: - Scale = 1; - break; - } - - Offset += MI.getOperand(ImmIdx).getImm() * Scale; - - bool useUnscaledOp = false; - // If the offset doesn't match the scale, we rewrite the instruction to - // use the unscaled instruction instead. Likewise, if we have a negative - // offset (and have an unscaled op to use). - if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) - useUnscaledOp = true; - - // Use an unscaled addressing mode if the instruction has a negative offset - // (or if the instruction is already using an unscaled addressing mode). - unsigned MaskBits; - if (IsSigned) { - // ldp/stp instructions. - MaskBits = 7; - Offset /= Scale; - } else if (UnscaledOp == 0 || useUnscaledOp) { - MaskBits = 9; - IsSigned = true; - Scale = 1; - } else { - MaskBits = 12; - IsSigned = false; - Offset /= Scale; - } - - // Attempt to fold address computation. - int MaxOff = (1 << (MaskBits - IsSigned)) - 1; - int MinOff = (IsSigned ? (-MaxOff - 1) : 0); - if (Offset >= MinOff && Offset <= MaxOff) { - if (EmittableOffset) - *EmittableOffset = Offset; - Offset = 0; - } else { - int NewOff = Offset < 0 ? MinOff : MaxOff; - if (EmittableOffset) - *EmittableOffset = NewOff; - Offset = (Offset - NewOff) * Scale; - } - if (OutUseUnscaledOp) - *OutUseUnscaledOp = useUnscaledOp; - if (OutUnscaledOp) - *OutUnscaledOp = UnscaledOp; - return ARM64FrameOffsetCanUpdate | - (Offset == 0 ? ARM64FrameOffsetIsLegal : 0); -} - -bool llvm::rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const ARM64InstrInfo *TII) { - unsigned Opcode = MI.getOpcode(); - unsigned ImmIdx = FrameRegIdx + 1; - - if (Opcode == ARM64::ADDSXri || Opcode == ARM64::ADDXri) { - Offset += MI.getOperand(ImmIdx).getImm(); - emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), - MI.getOperand(0).getReg(), FrameReg, Offset, TII, - MachineInstr::NoFlags, (Opcode == ARM64::ADDSXri)); - MI.eraseFromParent(); - Offset = 0; - return true; - } - - int NewOffset; - unsigned UnscaledOp; - bool UseUnscaledOp; - int Status = isARM64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, &UnscaledOp, - &NewOffset); - if (Status & ARM64FrameOffsetCanUpdate) { - if (Status & ARM64FrameOffsetIsLegal) - // Replace the FrameIndex with FrameReg. - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - if (UseUnscaledOp) - MI.setDesc(TII->get(UnscaledOp)); - - MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); - return Offset == 0; - } - - return false; -} - -void ARM64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { - NopInst.setOpcode(ARM64::HINT); - NopInst.addOperand(MCOperand::CreateImm(0)); -} diff --git a/lib/Target/ARM64/ARM64InstrInfo.h b/lib/Target/ARM64/ARM64InstrInfo.h deleted file mode 100644 index 2591ca0..0000000 --- a/lib/Target/ARM64/ARM64InstrInfo.h +++ /dev/null @@ -1,219 +0,0 @@ -//===- ARM64InstrInfo.h - ARM64 Instruction Information ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ARM64INSTRINFO_H -#define LLVM_TARGET_ARM64INSTRINFO_H - -#include "ARM64.h" -#include "ARM64RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -#define GET_INSTRINFO_HEADER -#include "ARM64GenInstrInfo.inc" - -namespace llvm { - -class ARM64Subtarget; -class ARM64TargetMachine; - -class ARM64InstrInfo : public ARM64GenInstrInfo { - // Reserve bits in the MachineMemOperand target hint flags, starting at 1. - // They will be shifted into MOTargetHintStart when accessed. - enum TargetMemOperandFlags { - MOSuppressPair = 1 - }; - - const ARM64RegisterInfo RI; - const ARM64Subtarget &Subtarget; - -public: - explicit ARM64InstrInfo(const ARM64Subtarget &STI); - - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - const ARM64RegisterInfo &getRegisterInfo() const { return RI; } - - unsigned GetInstSizeInBytes(const MachineInstr *MI) const; - - bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, - unsigned &DstReg, unsigned &SubIdx) const override; - - unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const override; - unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const override; - - /// \brief Does this instruction set its full destination register to zero? - bool isGPRZero(const MachineInstr *MI) const; - - /// \brief Does this instruction rename a GPR without modifying bits? - bool isGPRCopy(const MachineInstr *MI) const; - - /// \brief Does this instruction rename an FPR without modifying bits? - bool isFPRCopy(const MachineInstr *MI) const; - - /// Return true if this is load/store scales or extends its register offset. - /// This refers to scaling a dynamic index as opposed to scaled immediates. - /// MI should be a memory op that allows scaled addressing. - bool isScaledAddr(const MachineInstr *MI) const; - - /// Return true if pairing the given load or store is hinted to be - /// unprofitable. - bool isLdStPairSuppressed(const MachineInstr *MI) const; - - /// Hint that pairing the given load or store is unprofitable. - void suppressLdStPair(MachineInstr *MI) const; - - bool getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, - unsigned &Offset, - const TargetRegisterInfo *TRI) const override; - - bool enableClusterLoads() const override { return true; } - - bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt, - unsigned NumLoads) const override; - - bool shouldScheduleAdjacent(MachineInstr *First, - MachineInstr *Second) const override; - - MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, - uint64_t Offset, const MDNode *MDPtr, - DebugLoc DL) const; - void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, unsigned SrcReg, - bool KillSrc, unsigned Opcode, - llvm::ArrayRef<unsigned> Indices) const; - void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, unsigned SrcReg, - bool KillSrc) const override; - - void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, unsigned SrcReg, - bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; - - void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, unsigned DestReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; - - MachineInstr * - foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const override; - - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify = false) const override; - unsigned RemoveBranch(MachineBasicBlock &MBB) const override; - unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const override; - bool - ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; - bool canInsertSelect(const MachineBasicBlock &, - const SmallVectorImpl<MachineOperand> &Cond, unsigned, - unsigned, int &, int &, int &) const override; - void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc DL, unsigned DstReg, - const SmallVectorImpl<MachineOperand> &Cond, - unsigned TrueReg, unsigned FalseReg) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; - - /// analyzeCompare - For a comparison instruction, return the source registers - /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. - /// Return true if the comparison instruction can be analyzed. - bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - unsigned &SrcReg2, int &CmpMask, - int &CmpValue) const override; - /// optimizeCompareInstr - Convert the instruction supplying the argument to - /// the comparison into one that sets the zero bit in the flags register. - bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, - unsigned SrcReg2, int CmpMask, int CmpValue, - const MachineRegisterInfo *MRI) const override; - -private: - void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL, - MachineBasicBlock *TBB, - const SmallVectorImpl<MachineOperand> &Cond) const; -}; - -/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg -/// plus Offset. This is intended to be used from within the prolog/epilog -/// insertion (PEI) pass, where a virtual scratch register may be allocated -/// if necessary, to be replaced by the scavenger at the end of PEI. -void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset, - const ARM64InstrInfo *TII, - MachineInstr::MIFlag = MachineInstr::NoFlags, - bool SetCPSR = false); - -/// rewriteARM64FrameIndex - Rewrite MI to access 'Offset' bytes from the -/// FP. Return false if the offset could not be handled directly in MI, and -/// return the left-over portion by reference. -bool rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const ARM64InstrInfo *TII); - -/// \brief Use to report the frame offset status in isARM64FrameOffsetLegal. -enum ARM64FrameOffsetStatus { - ARM64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply. - ARM64FrameOffsetIsLegal = 0x1, ///< Offset is legal. - ARM64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly. -}; - -/// \brief Check if the @p Offset is a valid frame offset for @p MI. -/// The returned value reports the validity of the frame offset for @p MI. -/// It uses the values defined by ARM64FrameOffsetStatus for that. -/// If result == ARM64FrameOffsetCannotUpdate, @p MI cannot be updated to -/// use an offset.eq -/// If result & ARM64FrameOffsetIsLegal, @p Offset can completely be -/// rewriten in @p MI. -/// If result & ARM64FrameOffsetCanUpdate, @p Offset contains the -/// amount that is off the limit of the legal offset. -/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be -/// turned into an unscaled operator, which opcode is in @p OutUnscaledOp. -/// If set, @p EmittableOffset contains the amount that can be set in @p MI -/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that -/// is a legal offset. -int isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset, - bool *OutUseUnscaledOp = NULL, - unsigned *OutUnscaledOp = NULL, - int *EmittableOffset = NULL); - -static inline bool isUncondBranchOpcode(int Opc) { return Opc == ARM64::B; } - -static inline bool isCondBranchOpcode(int Opc) { - switch (Opc) { - case ARM64::Bcc: - case ARM64::CBZW: - case ARM64::CBZX: - case ARM64::CBNZW: - case ARM64::CBNZX: - case ARM64::TBZ: - case ARM64::TBNZ: - return true; - default: - return false; - } -} - -static inline bool isIndirectBranchOpcode(int Opc) { return Opc == ARM64::BR; } - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td deleted file mode 100644 index 2fe1720..0000000 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ /dev/null @@ -1,4458 +0,0 @@ -//===- ARM64InstrInfo.td - Describe the ARM64 Instructions -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// ARM64 Instruction definitions. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ARM64-specific DAG Nodes. -// - -// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS -def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, - [SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisInt<0>, SDTCisVT<1, i32>]>; - -// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS -def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, - [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisInt<0>, - SDTCisVT<3, i32>]>; - -// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS -def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, - [SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisInt<0>, - SDTCisVT<1, i32>, - SDTCisVT<4, i32>]>; - -def SDT_ARM64Brcond : SDTypeProfile<0, 3, - [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>]>; -def SDT_ARM64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; -def SDT_ARM64tbz : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisVT<1, i64>, - SDTCisVT<2, OtherVT>]>; - - -def SDT_ARM64CSel : SDTypeProfile<1, 4, - [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisInt<3>, - SDTCisVT<4, i32>]>; -def SDT_ARM64FCmp : SDTypeProfile<0, 2, - [SDTCisFP<0>, - SDTCisSameAs<0, 1>]>; -def SDT_ARM64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; -def SDT_ARM64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; -def SDT_ARM64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, - SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>]>; -def SDT_ARM64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; -def SDT_ARM64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; -def SDT_ARM64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisInt<2>, SDTCisInt<3>]>; -def SDT_ARM64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; -def SDT_ARM64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisInt<3>]>; -def SDT_ARM64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; - -def SDT_ARM64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; -def SDT_ARM64fcmpz : SDTypeProfile<1, 1, []>; -def SDT_ARM64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; -def SDT_ARM64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>; -def SDT_ARM64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, - SDTCisSameAs<0,3>]>; -def SDT_ARM64TCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; -def SDT_ARM64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; - -def SDT_ARM64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; - -def SDT_ARM64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, - SDTCisPtrTy<1>]>; -def SDT_ARM64WrapperLarge : SDTypeProfile<1, 4, - [SDTCisVT<0, i64>, SDTCisVT<1, i32>, - SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, - SDTCisSameAs<1, 4>]>; - - -// Node definitions. -def ARM64adrp : SDNode<"ARM64ISD::ADRP", SDTIntUnaryOp, []>; -def ARM64addlow : SDNode<"ARM64ISD::ADDlow", SDTIntBinOp, []>; -def ARM64LOADgot : SDNode<"ARM64ISD::LOADgot", SDTIntUnaryOp>; -def ARM64callseq_start : SDNode<"ISD::CALLSEQ_START", - SDCallSeqStart<[ SDTCisVT<0, i32> ]>, - [SDNPHasChain, SDNPOutGlue]>; -def ARM64callseq_end : SDNode<"ISD::CALLSEQ_END", - SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def ARM64call : SDNode<"ARM64ISD::CALL", - SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def ARM64brcond : SDNode<"ARM64ISD::BRCOND", SDT_ARM64Brcond, - [SDNPHasChain]>; -def ARM64cbz : SDNode<"ARM64ISD::CBZ", SDT_ARM64cbz, - [SDNPHasChain]>; -def ARM64cbnz : SDNode<"ARM64ISD::CBNZ", SDT_ARM64cbz, - [SDNPHasChain]>; -def ARM64tbz : SDNode<"ARM64ISD::TBZ", SDT_ARM64tbz, - [SDNPHasChain]>; -def ARM64tbnz : SDNode<"ARM64ISD::TBNZ", SDT_ARM64tbz, - [SDNPHasChain]>; - - -def ARM64csel : SDNode<"ARM64ISD::CSEL", SDT_ARM64CSel>; -def ARM64csinv : SDNode<"ARM64ISD::CSINV", SDT_ARM64CSel>; -def ARM64csneg : SDNode<"ARM64ISD::CSNEG", SDT_ARM64CSel>; -def ARM64csinc : SDNode<"ARM64ISD::CSINC", SDT_ARM64CSel>; -def ARM64retflag : SDNode<"ARM64ISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def ARM64adc : SDNode<"ARM64ISD::ADC", SDTBinaryArithWithFlagsIn >; -def ARM64sbc : SDNode<"ARM64ISD::SBC", SDTBinaryArithWithFlagsIn>; -def ARM64add_flag : SDNode<"ARM64ISD::ADDS", SDTBinaryArithWithFlagsOut, - [SDNPCommutative]>; -def ARM64sub_flag : SDNode<"ARM64ISD::SUBS", SDTBinaryArithWithFlagsOut>; -def ARM64and_flag : SDNode<"ARM64ISD::ANDS", SDTBinaryArithWithFlagsOut>; -def ARM64adc_flag : SDNode<"ARM64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; -def ARM64sbc_flag : SDNode<"ARM64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; - -def ARM64threadpointer : SDNode<"ARM64ISD::THREAD_POINTER", SDTPtrLeaf>; - -def ARM64fcmp : SDNode<"ARM64ISD::FCMP", SDT_ARM64FCmp>; - -def ARM64fmax : SDNode<"ARM64ISD::FMAX", SDTFPBinOp>; -def ARM64fmin : SDNode<"ARM64ISD::FMIN", SDTFPBinOp>; - -def ARM64dup : SDNode<"ARM64ISD::DUP", SDT_ARM64Dup>; -def ARM64duplane8 : SDNode<"ARM64ISD::DUPLANE8", SDT_ARM64DupLane>; -def ARM64duplane16 : SDNode<"ARM64ISD::DUPLANE16", SDT_ARM64DupLane>; -def ARM64duplane32 : SDNode<"ARM64ISD::DUPLANE32", SDT_ARM64DupLane>; -def ARM64duplane64 : SDNode<"ARM64ISD::DUPLANE64", SDT_ARM64DupLane>; - -def ARM64zip1 : SDNode<"ARM64ISD::ZIP1", SDT_ARM64Zip>; -def ARM64zip2 : SDNode<"ARM64ISD::ZIP2", SDT_ARM64Zip>; -def ARM64uzp1 : SDNode<"ARM64ISD::UZP1", SDT_ARM64Zip>; -def ARM64uzp2 : SDNode<"ARM64ISD::UZP2", SDT_ARM64Zip>; -def ARM64trn1 : SDNode<"ARM64ISD::TRN1", SDT_ARM64Zip>; -def ARM64trn2 : SDNode<"ARM64ISD::TRN2", SDT_ARM64Zip>; - -def ARM64movi_edit : SDNode<"ARM64ISD::MOVIedit", SDT_ARM64MOVIedit>; -def ARM64movi_shift : SDNode<"ARM64ISD::MOVIshift", SDT_ARM64MOVIshift>; -def ARM64movi_msl : SDNode<"ARM64ISD::MOVImsl", SDT_ARM64MOVIshift>; -def ARM64mvni_shift : SDNode<"ARM64ISD::MVNIshift", SDT_ARM64MOVIshift>; -def ARM64mvni_msl : SDNode<"ARM64ISD::MVNImsl", SDT_ARM64MOVIshift>; -def ARM64movi : SDNode<"ARM64ISD::MOVI", SDT_ARM64MOVIedit>; -def ARM64fmov : SDNode<"ARM64ISD::FMOV", SDT_ARM64MOVIedit>; - -def ARM64rev16 : SDNode<"ARM64ISD::REV16", SDT_ARM64UnaryVec>; -def ARM64rev32 : SDNode<"ARM64ISD::REV32", SDT_ARM64UnaryVec>; -def ARM64rev64 : SDNode<"ARM64ISD::REV64", SDT_ARM64UnaryVec>; -def ARM64ext : SDNode<"ARM64ISD::EXT", SDT_ARM64ExtVec>; - -def ARM64vashr : SDNode<"ARM64ISD::VASHR", SDT_ARM64vshift>; -def ARM64vlshr : SDNode<"ARM64ISD::VLSHR", SDT_ARM64vshift>; -def ARM64vshl : SDNode<"ARM64ISD::VSHL", SDT_ARM64vshift>; -def ARM64sqshli : SDNode<"ARM64ISD::SQSHL_I", SDT_ARM64vshift>; -def ARM64uqshli : SDNode<"ARM64ISD::UQSHL_I", SDT_ARM64vshift>; -def ARM64sqshlui : SDNode<"ARM64ISD::SQSHLU_I", SDT_ARM64vshift>; -def ARM64srshri : SDNode<"ARM64ISD::SRSHR_I", SDT_ARM64vshift>; -def ARM64urshri : SDNode<"ARM64ISD::URSHR_I", SDT_ARM64vshift>; - -def ARM64not: SDNode<"ARM64ISD::NOT", SDT_ARM64unvec>; -def ARM64bit: SDNode<"ARM64ISD::BIT", SDT_ARM64trivec>; - -def ARM64cmeq: SDNode<"ARM64ISD::CMEQ", SDT_ARM64binvec>; -def ARM64cmge: SDNode<"ARM64ISD::CMGE", SDT_ARM64binvec>; -def ARM64cmgt: SDNode<"ARM64ISD::CMGT", SDT_ARM64binvec>; -def ARM64cmhi: SDNode<"ARM64ISD::CMHI", SDT_ARM64binvec>; -def ARM64cmhs: SDNode<"ARM64ISD::CMHS", SDT_ARM64binvec>; - -def ARM64fcmeq: SDNode<"ARM64ISD::FCMEQ", SDT_ARM64fcmp>; -def ARM64fcmge: SDNode<"ARM64ISD::FCMGE", SDT_ARM64fcmp>; -def ARM64fcmgt: SDNode<"ARM64ISD::FCMGT", SDT_ARM64fcmp>; - -def ARM64cmeqz: SDNode<"ARM64ISD::CMEQz", SDT_ARM64unvec>; -def ARM64cmgez: SDNode<"ARM64ISD::CMGEz", SDT_ARM64unvec>; -def ARM64cmgtz: SDNode<"ARM64ISD::CMGTz", SDT_ARM64unvec>; -def ARM64cmlez: SDNode<"ARM64ISD::CMLEz", SDT_ARM64unvec>; -def ARM64cmltz: SDNode<"ARM64ISD::CMLTz", SDT_ARM64unvec>; -def ARM64cmtst : PatFrag<(ops node:$LHS, node:$RHS), - (ARM64not (ARM64cmeqz (and node:$LHS, node:$RHS)))>; - -def ARM64fcmeqz: SDNode<"ARM64ISD::FCMEQz", SDT_ARM64fcmpz>; -def ARM64fcmgez: SDNode<"ARM64ISD::FCMGEz", SDT_ARM64fcmpz>; -def ARM64fcmgtz: SDNode<"ARM64ISD::FCMGTz", SDT_ARM64fcmpz>; -def ARM64fcmlez: SDNode<"ARM64ISD::FCMLEz", SDT_ARM64fcmpz>; -def ARM64fcmltz: SDNode<"ARM64ISD::FCMLTz", SDT_ARM64fcmpz>; - -def ARM64bici: SDNode<"ARM64ISD::BICi", SDT_ARM64vecimm>; -def ARM64orri: SDNode<"ARM64ISD::ORRi", SDT_ARM64vecimm>; - -def ARM64neg : SDNode<"ARM64ISD::NEG", SDT_ARM64unvec>; - -def ARM64tcret: SDNode<"ARM64ISD::TC_RETURN", SDT_ARM64TCRET, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - -def ARM64Prefetch : SDNode<"ARM64ISD::PREFETCH", SDT_ARM64PREFETCH, - [SDNPHasChain, SDNPSideEffect]>; - -def ARM64sitof: SDNode<"ARM64ISD::SITOF", SDT_ARM64ITOF>; -def ARM64uitof: SDNode<"ARM64ISD::UITOF", SDT_ARM64ITOF>; - -def ARM64tlsdesc_call : SDNode<"ARM64ISD::TLSDESC_CALL", SDT_ARM64TLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; - -def ARM64WrapperLarge : SDNode<"ARM64ISD::WrapperLarge", SDT_ARM64WrapperLarge>; - - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// - -// ARM64 Instruction Predicate Definitions. -// -def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; -def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">; -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; -def ForCodeSize : Predicate<"ForCodeSize">; -def NotForCodeSize : Predicate<"!ForCodeSize">; - -include "ARM64InstrFormats.td" - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Miscellaneous instructions. -//===----------------------------------------------------------------------===// - -let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - [(ARM64callseq_start timm:$amt)]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - [(ARM64callseq_end timm:$amt1, timm:$amt2)]>; -} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 - -let isReMaterializable = 1, isCodeGenOnly = 1 in { -// FIXME: The following pseudo instructions are only needed because remat -// cannot handle multiple instructions. When that changes, they can be -// removed, along with the ARM64Wrapper node. - -let AddedComplexity = 10 in -def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), - [(set GPR64:$dst, (ARM64LOADgot tglobaladdr:$addr))]>, - Sched<[WriteLDAdr]>; - -// The MOVaddr instruction should match only when the add is not folded -// into a load or store address. -def MOVaddr - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaladdr:$hi), - tglobaladdr:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrJT - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tjumptable:$hi), - tjumptable:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrCP - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tconstpool:$hi), - tconstpool:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrBA - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tblockaddress:$hi), - tblockaddress:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrTLS - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaltlsaddr:$hi), - tglobaltlsaddr:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrEXT - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp texternalsym:$hi), - texternalsym:$low))]>, - Sched<[WriteAdrAdr]>; - -} // isReMaterializable, isCodeGenOnly - -def : Pat<(ARM64LOADgot tglobaltlsaddr:$addr), - (LOADgot tglobaltlsaddr:$addr)>; - -def : Pat<(ARM64LOADgot texternalsym:$addr), - (LOADgot texternalsym:$addr)>; - -def : Pat<(ARM64LOADgot tconstpool:$addr), - (LOADgot tconstpool:$addr)>; - -//===----------------------------------------------------------------------===// -// System instructions. -//===----------------------------------------------------------------------===// - -def HINT : HintI<"hint">; -def : InstAlias<"nop", (HINT 0b000)>; -def : InstAlias<"yield",(HINT 0b001)>; -def : InstAlias<"wfe", (HINT 0b010)>; -def : InstAlias<"wfi", (HINT 0b011)>; -def : InstAlias<"sev", (HINT 0b100)>; -def : InstAlias<"sevl", (HINT 0b101)>; - - // As far as LLVM is concerned this writes to the system's exclusive monitors. -let mayLoad = 1, mayStore = 1 in -def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; - -def DMB : CRmSystemI<barrier_op, 0b101, "dmb">; -def DSB : CRmSystemI<barrier_op, 0b100, "dsb">; -def ISB : CRmSystemI<barrier_op, 0b110, "isb">; -def : InstAlias<"clrex", (CLREX 0xf)>; -def : InstAlias<"isb", (ISB 0xf)>; - -def MRS : MRSI; -def MSR : MSRI; -def MSRcpsr: MSRcpsrI; - -// The thread pointer (on Linux, at least, where this has been implemented) is -// TPIDR_EL0. -def : Pat<(ARM64threadpointer), (MRS 0xde82)>; - -// Generic system instructions -def SYS : SystemI<0, "sys">; -def SYSxt : SystemXtI<0, "sys">; -def SYSLxt : SystemLXtI<1, "sysl">; - -//===----------------------------------------------------------------------===// -// Move immediate instructions. -//===----------------------------------------------------------------------===// - -defm MOVK : InsertImmediate<0b11, "movk">; -defm MOVN : MoveImmediate<0b00, "movn">; - -let PostEncoderMethod = "fixMOVZ" in -defm MOVZ : MoveImmediate<0b10, "movz">; - -def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; - -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; - -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; - -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g3:$sym, 48)>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g2:$sym, 32)>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; - -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>; - -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g2:$sym, 32)>; -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>; -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>; - -let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, - isAsCheapAsAMove = 1 in { -// FIXME: The following pseudo instructions are only needed because remat -// cannot handle multiple instructions. When that changes, we can select -// directly to the real instructions and get rid of these pseudos. - -def MOVi32imm - : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), - [(set GPR32:$dst, imm:$src)]>, - Sched<[WriteImm]>; -def MOVi64imm - : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), - [(set GPR64:$dst, imm:$src)]>, - Sched<[WriteImm]>; -} // isReMaterializable, isCodeGenOnly - -def : Pat<(ARM64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, - tglobaladdr:$g1, tglobaladdr:$g0), - (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48), - tglobaladdr:$g2, 32), - tglobaladdr:$g1, 16), - tglobaladdr:$g0, 0)>; - -def : Pat<(ARM64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, - tblockaddress:$g1, tblockaddress:$g0), - (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48), - tblockaddress:$g2, 32), - tblockaddress:$g1, 16), - tblockaddress:$g0, 0)>; - -def : Pat<(ARM64WrapperLarge tconstpool:$g3, tconstpool:$g2, - tconstpool:$g1, tconstpool:$g0), - (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48), - tconstpool:$g2, 32), - tconstpool:$g1, 16), - tconstpool:$g0, 0)>; - - -//===----------------------------------------------------------------------===// -// Arithmetic instructions. -//===----------------------------------------------------------------------===// - -// Add/subtract with carry. -defm ADC : AddSubCarry<0, "adc", "adcs", ARM64adc, ARM64adc_flag>; -defm SBC : AddSubCarry<1, "sbc", "sbcs", ARM64sbc, ARM64sbc_flag>; - -def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; -def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; -def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; -def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; - -// Add/subtract -defm ADD : AddSub<0, "add", add>; -defm SUB : AddSub<1, "sub">; - -defm ADDS : AddSubS<0, "adds", ARM64add_flag>; -defm SUBS : AddSubS<1, "subs", ARM64sub_flag>; - -// Use SUBS instead of SUB to enable CSE between SUBS and SUB. -def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), - (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; -def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), - (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; -def : Pat<(sub GPR32:$Rn, GPR32:$Rm), - (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; -def : Pat<(sub GPR64:$Rn, GPR64:$Rm), - (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; -def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), - (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; -def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), - (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; -def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3), - (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>; -def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3), - (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>; - -// Because of the immediate format for add/sub-imm instructions, the -// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). -// These patterns capture that transformation. -let AddedComplexity = 1 in { -def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -} - -def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0)>; -def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0)>; -def : InstAlias<"neg $dst, $src, $shift", - (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift:$shift)>; -def : InstAlias<"neg $dst, $src, $shift", - (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift:$shift)>; - -// Because of the immediate format for add/sub-imm instructions, the -// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). -// These patterns capture that transformation. -let AddedComplexity = 1 in { -def : Pat<(ARM64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(ARM64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -def : Pat<(ARM64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(ARM64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -} - -def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0)>; -def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0)>; -def : InstAlias<"negs $dst, $src, $shift", - (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift:$shift)>; -def : InstAlias<"negs $dst, $src, $shift", - (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift:$shift)>; - -// Unsigned/Signed divide -defm UDIV : Div<0, "udiv", udiv>; -defm SDIV : Div<1, "sdiv", sdiv>; -let isCodeGenOnly = 1 in { -defm UDIV_Int : Div<0, "udiv", int_arm64_udiv>; -defm SDIV_Int : Div<1, "sdiv", int_arm64_sdiv>; -} - -// Variable shift -defm ASRV : Shift<0b10, "asrv", sra>; -defm LSLV : Shift<0b00, "lslv", shl>; -defm LSRV : Shift<0b01, "lsrv", srl>; -defm RORV : Shift<0b11, "rorv", rotr>; - -def : ShiftAlias<"asr", ASRVWr, GPR32>; -def : ShiftAlias<"asr", ASRVXr, GPR64>; -def : ShiftAlias<"lsl", LSLVWr, GPR32>; -def : ShiftAlias<"lsl", LSLVXr, GPR64>; -def : ShiftAlias<"lsr", LSRVWr, GPR32>; -def : ShiftAlias<"lsr", LSRVXr, GPR64>; -def : ShiftAlias<"ror", RORVWr, GPR32>; -def : ShiftAlias<"ror", RORVXr, GPR64>; - -// Multiply-add -let AddedComplexity = 7 in { -defm MADD : MulAccum<0, "madd", add>; -defm MSUB : MulAccum<1, "msub", sub>; - -def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), - (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; -def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), - (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; - -def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), - (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; -def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), - (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; -} // AddedComplexity = 7 - -let AddedComplexity = 5 in { -def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; -def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; -def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; -def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; - -def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), - (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; -def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), - (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; - -def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), - (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; -def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), - (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; -} // AddedComplexity = 5 - -def : MulAccumWAlias<"mul", MADDWrrr>; -def : MulAccumXAlias<"mul", MADDXrrr>; -def : MulAccumWAlias<"mneg", MSUBWrrr>; -def : MulAccumXAlias<"mneg", MSUBXrrr>; -def : WideMulAccumAlias<"smull", SMADDLrrr>; -def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; -def : WideMulAccumAlias<"umull", UMADDLrrr>; -def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; - -// Multiply-high -def SMULHrr : MulHi<0b010, "smulh", mulhs>; -def UMULHrr : MulHi<0b110, "umulh", mulhu>; - -// CRC32 -def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_arm64_crc32b, "crc32b">; -def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_arm64_crc32h, "crc32h">; -def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_arm64_crc32w, "crc32w">; -def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_arm64_crc32x, "crc32x">; - -def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_arm64_crc32cb, "crc32cb">; -def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_arm64_crc32ch, "crc32ch">; -def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_arm64_crc32cw, "crc32cw">; -def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_arm64_crc32cx, "crc32cx">; - - -//===----------------------------------------------------------------------===// -// Logical instructions. -//===----------------------------------------------------------------------===// - -// (immediate) -defm ANDS : LogicalImmS<0b11, "ands", ARM64and_flag>; -defm AND : LogicalImm<0b00, "and", and>; -defm EOR : LogicalImm<0b10, "eor", xor>; -defm ORR : LogicalImm<0b01, "orr", or>; - -def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, - logical_imm32:$imm)>; -def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, - logical_imm64:$imm)>; - - -// (register) -defm ANDS : LogicalRegS<0b11, 0, "ands">; -defm BICS : LogicalRegS<0b11, 1, "bics">; -defm AND : LogicalReg<0b00, 0, "and", and>; -defm BIC : LogicalReg<0b00, 1, "bic", - BinOpFrag<(and node:$LHS, (not node:$RHS))>>; -defm EON : LogicalReg<0b10, 1, "eon", - BinOpFrag<(xor node:$LHS, (not node:$RHS))>>; -defm EOR : LogicalReg<0b10, 0, "eor", xor>; -defm ORN : LogicalReg<0b01, 1, "orn", - BinOpFrag<(or node:$LHS, (not node:$RHS))>>; -defm ORR : LogicalReg<0b01, 0, "orr", or>; - -def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0)>; -def : InstAlias<"mov $dst, $src", - (ADDWri GPR32sp:$dst, GPR32sp:$src, 0, 0)>; -def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0)>; -def : InstAlias<"mov $dst, $src", - (ADDXri GPR64sp:$dst, GPR64sp:$src, 0, 0)>; - -def : InstAlias<"tst $src1, $src2", - (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2)>; -def : InstAlias<"tst $src1, $src2", - (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2)>; - -def : InstAlias<"tst $src1, $src2", - (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0)>; -def : InstAlias<"tst $src1, $src2", - (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0)>; - -def : InstAlias<"tst $src1, $src2, $sh", - (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift:$sh)>; -def : InstAlias<"tst $src1, $src2, $sh", - (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift:$sh)>; - -def : InstAlias<"mvn $Wd, $Wm", - (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0)>; -def : InstAlias<"mvn $Xd, $Xm", - (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0)>; - -def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; -def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; - - -//===----------------------------------------------------------------------===// -// One operand data processing instructions. -//===----------------------------------------------------------------------===// - -defm CLS : OneOperandData<0b101, "cls">; -defm CLZ : OneOperandData<0b100, "clz", ctlz>; -defm RBIT : OneOperandData<0b000, "rbit">; -def REV16Wr : OneWRegData<0b001, "rev16", - UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; -def REV16Xr : OneXRegData<0b001, "rev16", - UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; - -def : Pat<(cttz GPR32:$Rn), - (CLZWr (RBITWr GPR32:$Rn))>; -def : Pat<(cttz GPR64:$Rn), - (CLZXr (RBITXr GPR64:$Rn))>; - -// Unlike the other one operand instructions, the instructions with the "rev" -// mnemonic do *not* just different in the size bit, but actually use different -// opcode bits for the different sizes. -def REVWr : OneWRegData<0b010, "rev", bswap>; -def REVXr : OneXRegData<0b011, "rev", bswap>; -def REV32Xr : OneXRegData<0b010, "rev32", - UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; - -//===----------------------------------------------------------------------===// -// Bitfield immediate extraction instruction. -//===----------------------------------------------------------------------===// -let neverHasSideEffects = 1 in -defm EXTR : ExtractImm<"extr">; -def : InstAlias<"ror $dst, $src, $shift", - (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; -def : InstAlias<"ror $dst, $src, $shift", - (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; - -def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), - (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; -def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), - (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; - -//===----------------------------------------------------------------------===// -// Other bitfield immediate instructions. -//===----------------------------------------------------------------------===// -let neverHasSideEffects = 1 in { -defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; -defm SBFM : BitfieldImm<0b00, "sbfm">; -defm UBFM : BitfieldImm<0b10, "ubfm">; -} - -def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ - uint64_t enc = (32 - N->getZExtValue()) & 0x1f; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ - uint64_t enc = 31 - N->getZExtValue(); - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -// min(7, 31 - shift_amt) -def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ - uint64_t enc = 31 - N->getZExtValue(); - enc = enc > 7 ? 7 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -// min(15, 31 - shift_amt) -def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ - uint64_t enc = 31 - N->getZExtValue(); - enc = enc > 15 ? 15 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ - uint64_t enc = (64 - N->getZExtValue()) & 0x3f; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ - uint64_t enc = 63 - N->getZExtValue(); - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -// min(7, 63 - shift_amt) -def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ - uint64_t enc = 63 - N->getZExtValue(); - enc = enc > 7 ? 7 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -// min(15, 63 - shift_amt) -def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ - uint64_t enc = 63 - N->getZExtValue(); - enc = enc > 15 ? 15 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -// min(31, 63 - shift_amt) -def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ - uint64_t enc = 63 - N->getZExtValue(); - enc = enc > 31 ? 31 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), - (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), - (i64 (i32shift_b imm0_31:$imm)))>; -def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), - (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_b imm0_63:$imm)))>; - -let AddedComplexity = 10 in { -def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; -def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), - (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; -} - -def : InstAlias<"asr $dst, $src, $shift", - (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; -def : InstAlias<"asr $dst, $src, $shift", - (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; -def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; -def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; -def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; -def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; -def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; - -def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), - (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; -def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), - (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; - -def : InstAlias<"lsr $dst, $src, $shift", - (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; -def : InstAlias<"lsr $dst, $src, $shift", - (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; -def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; -def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; -def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; -def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; -def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; - -//===----------------------------------------------------------------------===// -// Conditionally set flags instructions. -//===----------------------------------------------------------------------===// -defm CCMN : CondSetFlagsImm<0, "ccmn">; -defm CCMP : CondSetFlagsImm<1, "ccmp">; - -defm CCMN : CondSetFlagsReg<0, "ccmn">; -defm CCMP : CondSetFlagsReg<1, "ccmp">; - -//===----------------------------------------------------------------------===// -// Conditional select instructions. -//===----------------------------------------------------------------------===// -defm CSEL : CondSelect<0, 0b00, "csel">; - -def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; -defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; -defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; -defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; - -def : Pat<(ARM64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR), - (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR), - (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR), - (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR), - (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR), - (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR), - (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; - -def : Pat<(ARM64csel (i32 0), (i32 1), (i32 imm:$cc), CPSR), - (CSINCWr WZR, WZR, (i32 imm:$cc))>; -def : Pat<(ARM64csel (i64 0), (i64 1), (i32 imm:$cc), CPSR), - (CSINCXr XZR, XZR, (i32 imm:$cc))>; -def : Pat<(ARM64csel (i32 0), (i32 -1), (i32 imm:$cc), CPSR), - (CSINVWr WZR, WZR, (i32 imm:$cc))>; -def : Pat<(ARM64csel (i64 0), (i64 -1), (i32 imm:$cc), CPSR), - (CSINVXr XZR, XZR, (i32 imm:$cc))>; - -// The inverse of the condition code from the alias instruction is what is used -// in the aliased instruction. The parser all ready inverts the condition code -// for these aliases. -// FIXME: Is this the correct way to handle these aliases? -def : InstAlias<"cset $dst, $cc", (CSINCWr GPR32:$dst, WZR, WZR, ccode:$cc)>; -def : InstAlias<"cset $dst, $cc", (CSINCXr GPR64:$dst, XZR, XZR, ccode:$cc)>; - -def : InstAlias<"csetm $dst, $cc", (CSINVWr GPR32:$dst, WZR, WZR, ccode:$cc)>; -def : InstAlias<"csetm $dst, $cc", (CSINVXr GPR64:$dst, XZR, XZR, ccode:$cc)>; - -def : InstAlias<"cinc $dst, $src, $cc", - (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>; -def : InstAlias<"cinc $dst, $src, $cc", - (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>; - -def : InstAlias<"cinv $dst, $src, $cc", - (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>; -def : InstAlias<"cinv $dst, $src, $cc", - (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>; - -def : InstAlias<"cneg $dst, $src, $cc", - (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>; -def : InstAlias<"cneg $dst, $src, $cc", - (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>; - -//===----------------------------------------------------------------------===// -// PC-relative instructions. -//===----------------------------------------------------------------------===// -let isReMaterializable = 1 in { -let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in { -def ADR : ADRI<0, "adr", adrlabel, []>; -} // neverHasSideEffects = 1 - -def ADRP : ADRI<1, "adrp", adrplabel, - [(set GPR64:$Xd, (ARM64adrp tglobaladdr:$label))]>; -} // isReMaterializable = 1 - -// page address of a constant pool entry, block address -def : Pat<(ARM64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; -def : Pat<(ARM64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; - -//===----------------------------------------------------------------------===// -// Unconditional branch (register) instructions. -//===----------------------------------------------------------------------===// - -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { -def RET : BranchReg<0b0010, "ret", []>; -def DRPS : SpecialReturn<0b0101, "drps">; -def ERET : SpecialReturn<0b0100, "eret">; -} // isReturn = 1, isTerminator = 1, isBarrier = 1 - -// Default to the LR register. -def : InstAlias<"ret", (RET LR)>; - -let isCall = 1, Defs = [LR], Uses = [SP] in { -def BLR : BranchReg<0b0001, "blr", [(ARM64call GPR64:$Rn)]>; -} // isCall - -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { -def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; -} // isBranch, isTerminator, isBarrier, isIndirectBranch - -// Create a separate pseudo-instruction for codegen to use so that we don't -// flag lr as used in every function. It'll be restored before the RET by the -// epilogue if it's legitimately used. -def RET_ReallyLR : Pseudo<(outs), (ins), [(ARM64retflag)]> { - let isTerminator = 1; - let isBarrier = 1; - let isReturn = 1; -} - -// This is a directive-like pseudo-instruction. The purpose is to insert an -// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction -// (which in the usual case is a BLR). -let hasSideEffects = 1 in -def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { - let AsmString = ".tlsdesccall $sym"; -} - -// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It -// gets expanded to two MCInsts during lowering. -let isCall = 1, Defs = [LR] in -def TLSDESC_BLR - : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym), - [(ARM64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; - -def : Pat<(ARM64tlsdesc_call GPR64:$dest, texternalsym:$sym), - (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>; -//===----------------------------------------------------------------------===// -// Conditional branch (immediate) instruction. -//===----------------------------------------------------------------------===// -def Bcc : BranchCond; - -//===----------------------------------------------------------------------===// -// Compare-and-branch instructions. -//===----------------------------------------------------------------------===// -defm CBZ : CmpBranch<0, "cbz", ARM64cbz>; -defm CBNZ : CmpBranch<1, "cbnz", ARM64cbnz>; - -//===----------------------------------------------------------------------===// -// Test-bit-and-branch instructions. -//===----------------------------------------------------------------------===// -def TBZ : TestBranch<0, "tbz", ARM64tbz>; -def TBNZ : TestBranch<1, "tbnz", ARM64tbnz>; - -//===----------------------------------------------------------------------===// -// Unconditional branch (immediate) instructions. -//===----------------------------------------------------------------------===// -let isBranch = 1, isTerminator = 1, isBarrier = 1 in { -def B : BranchImm<0, "b", [(br bb:$addr)]>; -} // isBranch, isTerminator, isBarrier - -let isCall = 1, Defs = [LR], Uses = [SP] in { -def BL : CallImm<1, "bl", [(ARM64call tglobaladdr:$addr)]>; -} // isCall -def : Pat<(ARM64call texternalsym:$func), (BL texternalsym:$func)>; - -//===----------------------------------------------------------------------===// -// Exception generation instructions. -//===----------------------------------------------------------------------===// -def BRK : ExceptionGeneration<0b001, 0b00, "brk">; -def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; -def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; -def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">; -def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; -def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; -def SMC : ExceptionGeneration<0b000, 0b11, "smc">; -def SVC : ExceptionGeneration<0b000, 0b01, "svc">; - -// DCPSn defaults to an immediate operand of zero if unspecified. -def : InstAlias<"dcps1", (DCPS1 0)>; -def : InstAlias<"dcps2", (DCPS2 0)>; -def : InstAlias<"dcps3", (DCPS3 0)>; - -//===----------------------------------------------------------------------===// -// Load instructions. -//===----------------------------------------------------------------------===// - -// Pair (indexed, offset) -def LDPWi : LoadPairOffset<0b00, 0, GPR32, am_indexed32simm7, "ldp">; -def LDPXi : LoadPairOffset<0b10, 0, GPR64, am_indexed64simm7, "ldp">; -def LDPSi : LoadPairOffset<0b00, 1, FPR32, am_indexed32simm7, "ldp">; -def LDPDi : LoadPairOffset<0b01, 1, FPR64, am_indexed64simm7, "ldp">; -def LDPQi : LoadPairOffset<0b10, 1, FPR128, am_indexed128simm7, "ldp">; - -def LDPSWi : LoadPairOffset<0b01, 0, GPR64, am_indexed32simm7, "ldpsw">; - -// Pair (pre-indexed) -def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, am_indexed32simm7, "ldp">; -def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, am_indexed64simm7, "ldp">; -def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, am_indexed32simm7, "ldp">; -def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, am_indexed64simm7, "ldp">; -def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, am_indexed128simm7, "ldp">; - -def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, am_indexed32simm7, "ldpsw">; - -// Pair (post-indexed) -def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">; -def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">; -def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">; -def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">; -def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">; - -def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; - - -// Pair (no allocate) -def LDNPWi : LoadPairNoAlloc<0b00, 0, GPR32, am_indexed32simm7, "ldnp">; -def LDNPXi : LoadPairNoAlloc<0b10, 0, GPR64, am_indexed64simm7, "ldnp">; -def LDNPSi : LoadPairNoAlloc<0b00, 1, FPR32, am_indexed32simm7, "ldnp">; -def LDNPDi : LoadPairNoAlloc<0b01, 1, FPR64, am_indexed64simm7, "ldnp">; -def LDNPQi : LoadPairNoAlloc<0b10, 1, FPR128, am_indexed128simm7, "ldnp">; - -//--- -// (register offset) -//--- - -let AddedComplexity = 10 in { -// Integer -def LDRBBro : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", - [(set GPR32:$Rt, (zextloadi8 ro_indexed8:$addr))]>; -def LDRHHro : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", - [(set GPR32:$Rt, (zextloadi16 ro_indexed16:$addr))]>; -def LDRWro : Load32RO<0b10, 0, 0b01, GPR32, "ldr", - [(set GPR32:$Rt, (load ro_indexed32:$addr))]>; -def LDRXro : Load64RO<0b11, 0, 0b01, GPR64, "ldr", - [(set GPR64:$Rt, (load ro_indexed64:$addr))]>; - -// Floating-point -def LDRBro : Load8RO<0b00, 1, 0b01, FPR8, "ldr", - [(set FPR8:$Rt, (load ro_indexed8:$addr))]>; -def LDRHro : Load16RO<0b01, 1, 0b01, FPR16, "ldr", - [(set FPR16:$Rt, (load ro_indexed16:$addr))]>; -def LDRSro : Load32RO<0b10, 1, 0b01, FPR32, "ldr", - [(set (f32 FPR32:$Rt), (load ro_indexed32:$addr))]>; -def LDRDro : Load64RO<0b11, 1, 0b01, FPR64, "ldr", - [(set (f64 FPR64:$Rt), (load ro_indexed64:$addr))]>; -def LDRQro : Load128RO<0b00, 1, 0b11, FPR128, "ldr", []> { - let mayLoad = 1; -} - -// For regular load, we do not have any alignment requirement. -// Thus, it is safe to directly map the vector loads with interesting -// addressing modes. -// FIXME: We could do the same for bitconvert to floating point vectors. -def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 ro_indexed8:$addr)))), - (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub)>; -def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 ro_indexed8:$addr)))), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub)>; -def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 ro_indexed16:$addr)))), - (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub)>; -def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 ro_indexed16:$addr)))), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub)>; -def : Pat <(v2i32 (scalar_to_vector (i32 (load ro_indexed32:$addr)))), - (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), ssub)>; -def : Pat <(v4i32 (scalar_to_vector (i32 (load ro_indexed32:$addr)))), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), ssub)>; -def : Pat <(v1i64 (scalar_to_vector (i64 (load ro_indexed64:$addr)))), - (LDRDro ro_indexed64:$addr)>; -def : Pat <(v2i64 (scalar_to_vector (i64 (load ro_indexed64:$addr)))), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - (LDRDro ro_indexed64:$addr), dsub)>; - -// Match all load 64 bits width whose type is compatible with FPR64 -def : Pat<(v2f32 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v1f64 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v8i8 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v4i16 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v2i32 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v1i64 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; - -// Match all load 128 bits width whose type is compatible with FPR128 -def : Pat<(v4f32 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(v2f64 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(v16i8 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(v8i16 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(v4i32 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(v2i64 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(f128 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; - -// Load sign-extended half-word -def LDRSHWro : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", - [(set GPR32:$Rt, (sextloadi16 ro_indexed16:$addr))]>; -def LDRSHXro : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", - [(set GPR64:$Rt, (sextloadi16 ro_indexed16:$addr))]>; - -// Load sign-extended byte -def LDRSBWro : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", - [(set GPR32:$Rt, (sextloadi8 ro_indexed8:$addr))]>; -def LDRSBXro : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", - [(set GPR64:$Rt, (sextloadi8 ro_indexed8:$addr))]>; - -// Load sign-extended word -def LDRSWro : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", - [(set GPR64:$Rt, (sextloadi32 ro_indexed32:$addr))]>; - -// Pre-fetch. -def PRFMro : PrefetchRO<0b11, 0, 0b10, "prfm", - [(ARM64Prefetch imm:$Rt, ro_indexed64:$addr)]>; - -// zextload -> i64 -def : Pat<(i64 (zextloadi8 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 ro_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHro ro_indexed16:$addr), sub_32)>; - -// zextloadi1 -> zextloadi8 -def : Pat<(i32 (zextloadi1 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>; -def : Pat<(i64 (zextloadi1 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; - -// extload -> zextload -def : Pat<(i32 (extloadi16 ro_indexed16:$addr)), (LDRHHro ro_indexed16:$addr)>; -def : Pat<(i32 (extloadi8 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>; -def : Pat<(i32 (extloadi1 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>; -def : Pat<(i64 (extloadi32 ro_indexed32:$addr)), - (SUBREG_TO_REG (i64 0), (LDRWro ro_indexed32:$addr), sub_32)>; -def : Pat<(i64 (extloadi16 ro_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHro ro_indexed16:$addr), sub_32)>; -def : Pat<(i64 (extloadi8 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; -def : Pat<(i64 (extloadi1 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; - -} // AddedComplexity = 10 - -//--- -// (unsigned immediate) -//--- -def LDRXui : LoadUI<0b11, 0, 0b01, GPR64, am_indexed64, "ldr", - [(set GPR64:$Rt, (load am_indexed64:$addr))]>; -def LDRWui : LoadUI<0b10, 0, 0b01, GPR32, am_indexed32, "ldr", - [(set GPR32:$Rt, (load am_indexed32:$addr))]>; -def LDRBui : LoadUI<0b00, 1, 0b01, FPR8, am_indexed8, "ldr", - [(set FPR8:$Rt, (load am_indexed8:$addr))]>; -def LDRHui : LoadUI<0b01, 1, 0b01, FPR16, am_indexed16, "ldr", - [(set FPR16:$Rt, (load am_indexed16:$addr))]>; -def LDRSui : LoadUI<0b10, 1, 0b01, FPR32, am_indexed32, "ldr", - [(set (f32 FPR32:$Rt), (load am_indexed32:$addr))]>; -def LDRDui : LoadUI<0b11, 1, 0b01, FPR64, am_indexed64, "ldr", - [(set (f64 FPR64:$Rt), (load am_indexed64:$addr))]>; -def LDRQui : LoadUI<0b00, 1, 0b11, FPR128, am_indexed128, "ldr", - [(set (f128 FPR128:$Rt), (load am_indexed128:$addr))]>; - -// For regular load, we do not have any alignment requirement. -// Thus, it is safe to directly map the vector loads with interesting -// addressing modes. -// FIXME: We could do the same for bitconvert to floating point vectors. -def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 am_indexed8:$addr)))), - (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub)>; -def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 am_indexed8:$addr)))), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub)>; -def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 am_indexed16:$addr)))), - (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub)>; -def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 am_indexed16:$addr)))), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub)>; -def : Pat <(v2i32 (scalar_to_vector (i32 (load am_indexed32:$addr)))), - (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), ssub)>; -def : Pat <(v4i32 (scalar_to_vector (i32 (load am_indexed32:$addr)))), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), ssub)>; -def : Pat <(v1i64 (scalar_to_vector (i64 (load am_indexed64:$addr)))), - (LDRDui am_indexed64:$addr)>; -def : Pat <(v2i64 (scalar_to_vector (i64 (load am_indexed64:$addr)))), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - (LDRDui am_indexed64:$addr), dsub)>; - -// Match all load 64 bits width whose type is compatible with FPR64 -def : Pat<(v2f32 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v1f64 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v8i8 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v4i16 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v2i32 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v1i64 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; - -// Match all load 128 bits width whose type is compatible with FPR128 -def : Pat<(v4f32 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(v2f64 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(v16i8 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(v8i16 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(v4i32 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(v2i64 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(f128 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; - -def LDRHHui : LoadUI<0b01, 0, 0b01, GPR32, am_indexed16, "ldrh", - [(set GPR32:$Rt, (zextloadi16 am_indexed16:$addr))]>; -def LDRBBui : LoadUI<0b00, 0, 0b01, GPR32, am_indexed8, "ldrb", - [(set GPR32:$Rt, (zextloadi8 am_indexed8:$addr))]>; -// zextload -> i64 -def : Pat<(i64 (zextloadi8 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 am_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHui am_indexed16:$addr), sub_32)>; - -// zextloadi1 -> zextloadi8 -def : Pat<(i32 (zextloadi1 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>; -def : Pat<(i64 (zextloadi1 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; - -// extload -> zextload -def : Pat<(i32 (extloadi16 am_indexed16:$addr)), (LDRHHui am_indexed16:$addr)>; -def : Pat<(i32 (extloadi8 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>; -def : Pat<(i32 (extloadi1 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>; -def : Pat<(i64 (extloadi32 am_indexed32:$addr)), - (SUBREG_TO_REG (i64 0), (LDRWui am_indexed32:$addr), sub_32)>; -def : Pat<(i64 (extloadi16 am_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHui am_indexed16:$addr), sub_32)>; -def : Pat<(i64 (extloadi8 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; -def : Pat<(i64 (extloadi1 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; - -// load sign-extended half-word -def LDRSHWui : LoadUI<0b01, 0, 0b11, GPR32, am_indexed16, "ldrsh", - [(set GPR32:$Rt, (sextloadi16 am_indexed16:$addr))]>; -def LDRSHXui : LoadUI<0b01, 0, 0b10, GPR64, am_indexed16, "ldrsh", - [(set GPR64:$Rt, (sextloadi16 am_indexed16:$addr))]>; - -// load sign-extended byte -def LDRSBWui : LoadUI<0b00, 0, 0b11, GPR32, am_indexed8, "ldrsb", - [(set GPR32:$Rt, (sextloadi8 am_indexed8:$addr))]>; -def LDRSBXui : LoadUI<0b00, 0, 0b10, GPR64, am_indexed8, "ldrsb", - [(set GPR64:$Rt, (sextloadi8 am_indexed8:$addr))]>; - -// load sign-extended word -def LDRSWui : LoadUI<0b10, 0, 0b10, GPR64, am_indexed32, "ldrsw", - [(set GPR64:$Rt, (sextloadi32 am_indexed32:$addr))]>; - -// load zero-extended word -def : Pat<(i64 (zextloadi32 am_indexed32:$addr)), - (SUBREG_TO_REG (i64 0), (LDRWui am_indexed32:$addr), sub_32)>; - -// Pre-fetch. -def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", - [(ARM64Prefetch imm:$Rt, am_indexed64:$addr)]>; - -//--- -// (literal) -def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">; -def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">; -def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">; -def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">; -def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">; - -// load sign-extended word -def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">; - -// prefetch -def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; -// [(ARM64Prefetch imm:$Rt, tglobaladdr:$label)]>; - -//--- -// (unscaled immediate) -def LDURXi : LoadUnscaled<0b11, 0, 0b01, GPR64, am_unscaled64, "ldur", - [(set GPR64:$Rt, (load am_unscaled64:$addr))]>; -def LDURWi : LoadUnscaled<0b10, 0, 0b01, GPR32, am_unscaled32, "ldur", - [(set GPR32:$Rt, (load am_unscaled32:$addr))]>; -def LDURBi : LoadUnscaled<0b00, 1, 0b01, FPR8, am_unscaled8, "ldur", - [(set FPR8:$Rt, (load am_unscaled8:$addr))]>; -def LDURHi : LoadUnscaled<0b01, 1, 0b01, FPR16, am_unscaled16, "ldur", - [(set FPR16:$Rt, (load am_unscaled16:$addr))]>; -def LDURSi : LoadUnscaled<0b10, 1, 0b01, FPR32, am_unscaled32, "ldur", - [(set (f32 FPR32:$Rt), (load am_unscaled32:$addr))]>; -def LDURDi : LoadUnscaled<0b11, 1, 0b01, FPR64, am_unscaled64, "ldur", - [(set (f64 FPR64:$Rt), (load am_unscaled64:$addr))]>; -def LDURQi : LoadUnscaled<0b00, 1, 0b11, FPR128, am_unscaled128, "ldur", - [(set (v2f64 FPR128:$Rt), (load am_unscaled128:$addr))]>; - -def LDURHHi - : LoadUnscaled<0b01, 0, 0b01, GPR32, am_unscaled16, "ldurh", - [(set GPR32:$Rt, (zextloadi16 am_unscaled16:$addr))]>; -def LDURBBi - : LoadUnscaled<0b00, 0, 0b01, GPR32, am_unscaled8, "ldurb", - [(set GPR32:$Rt, (zextloadi8 am_unscaled8:$addr))]>; - -// Match all load 64 bits width whose type is compatible with FPR64 -def : Pat<(v2f32 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v1f64 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v8i8 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v4i16 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v2i32 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v1i64 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; - -// Match all load 128 bits width whose type is compatible with FPR128 -def : Pat<(v4f32 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(v2f64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(v16i8 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(v8i16 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(v4i32 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(v2i64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(f128 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; - -// anyext -> zext -def : Pat<(i32 (extloadi16 am_unscaled16:$addr)), (LDURHHi am_unscaled16:$addr)>; -def : Pat<(i32 (extloadi8 am_unscaled8:$addr)), (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i32 (extloadi1 am_unscaled8:$addr)), (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i64 (extloadi32 am_unscaled32:$addr)), - (SUBREG_TO_REG (i64 0), (LDURWi am_unscaled32:$addr), sub_32)>; -def : Pat<(i64 (extloadi16 am_unscaled16:$addr)), - (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>; -def : Pat<(i64 (extloadi8 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -def : Pat<(i64 (extloadi1 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -// unscaled zext -def : Pat<(i32 (zextloadi16 am_unscaled16:$addr)), - (LDURHHi am_unscaled16:$addr)>; -def : Pat<(i32 (zextloadi8 am_unscaled8:$addr)), - (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i32 (zextloadi1 am_unscaled8:$addr)), - (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i64 (zextloadi32 am_unscaled32:$addr)), - (SUBREG_TO_REG (i64 0), (LDURWi am_unscaled32:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 am_unscaled16:$addr)), - (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>; -def : Pat<(i64 (zextloadi8 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi1 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; - - -//--- -// LDR mnemonics fall back to LDUR for negative or unaligned offsets. - -// Define new assembler match classes as we want to only match these when -// the don't otherwise match the scaled addressing mode for LDR/STR. Don't -// associate a DiagnosticType either, as we want the diagnostic for the -// canonical form (the scaled operand) to take precedence. -def MemoryUnscaledFB8Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB8"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB16Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB16"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB32Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB32"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB64Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB64"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB128Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB128"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def am_unscaled_fb8 : Operand<i64> { - let ParserMatchClass = MemoryUnscaledFB8Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled_fb16 : Operand<i64> { - let ParserMatchClass = MemoryUnscaledFB16Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled_fb32 : Operand<i64> { - let ParserMatchClass = MemoryUnscaledFB32Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled_fb64 : Operand<i64> { - let ParserMatchClass = MemoryUnscaledFB64Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled_fb128 : Operand<i64> { - let ParserMatchClass = MemoryUnscaledFB128Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def : InstAlias<"ldr $Rt, $addr", (LDURXi GPR64:$Rt, am_unscaled_fb64:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURWi GPR32:$Rt, am_unscaled_fb32:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURBi FPR8:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURHi FPR16:$Rt, am_unscaled_fb16:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURSi FPR32:$Rt, am_unscaled_fb32:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURDi FPR64:$Rt, am_unscaled_fb64:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURQi FPR128:$Rt, am_unscaled_fb128:$addr)>; - -// zextload -> i64 -def : Pat<(i64 (zextloadi8 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 am_unscaled16:$addr)), - (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>; - -// load sign-extended half-word -def LDURSHWi - : LoadUnscaled<0b01, 0, 0b11, GPR32, am_unscaled16, "ldursh", - [(set GPR32:$Rt, (sextloadi16 am_unscaled16:$addr))]>; -def LDURSHXi - : LoadUnscaled<0b01, 0, 0b10, GPR64, am_unscaled16, "ldursh", - [(set GPR64:$Rt, (sextloadi16 am_unscaled16:$addr))]>; - -// load sign-extended byte -def LDURSBWi - : LoadUnscaled<0b00, 0, 0b11, GPR32, am_unscaled8, "ldursb", - [(set GPR32:$Rt, (sextloadi8 am_unscaled8:$addr))]>; -def LDURSBXi - : LoadUnscaled<0b00, 0, 0b10, GPR64, am_unscaled8, "ldursb", - [(set GPR64:$Rt, (sextloadi8 am_unscaled8:$addr))]>; - -// load sign-extended word -def LDURSWi - : LoadUnscaled<0b10, 0, 0b10, GPR64, am_unscaled32, "ldursw", - [(set GPR64:$Rt, (sextloadi32 am_unscaled32:$addr))]>; - -// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. -def : InstAlias<"ldrb $Rt, $addr", (LDURBBi GPR32:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"ldrh $Rt, $addr", (LDURHHi GPR32:$Rt, am_unscaled_fb16:$addr)>; -def : InstAlias<"ldrsb $Rt, $addr", (LDURSBWi GPR32:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"ldrsb $Rt, $addr", (LDURSBXi GPR64:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"ldrsh $Rt, $addr", (LDURSHWi GPR32:$Rt, am_unscaled_fb16:$addr)>; -def : InstAlias<"ldrsh $Rt, $addr", (LDURSHXi GPR64:$Rt, am_unscaled_fb16:$addr)>; -def : InstAlias<"ldrsw $Rt, $addr", (LDURSWi GPR64:$Rt, am_unscaled_fb32:$addr)>; - -// Pre-fetch. -def PRFUMi : PrefetchUnscaled<0b11, 0, 0b10, "prfum", - [(ARM64Prefetch imm:$Rt, am_unscaled64:$addr)]>; - -//--- -// (unscaled immediate, unprivileged) -def LDTRXi : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; -def LDTRWi : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; - -def LDTRHi : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; -def LDTRBi : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; - -// load sign-extended half-word -def LDTRSHWi : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; -def LDTRSHXi : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; - -// load sign-extended byte -def LDTRSBWi : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; -def LDTRSBXi : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; - -// load sign-extended word -def LDTRSWi : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; - -//--- -// (immediate pre-indexed) -def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">; -def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">; -def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8, "ldr">; -def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">; -def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">; -def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">; -def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">; - -// load sign-extended half-word -def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">; -def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">; - -// load sign-extended byte -def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">; -def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">; - -// load zero-extended byte -def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">; -def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">; - -// load sign-extended word -def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">; - -// ISel pseudos and patterns. See expanded comment on LoadPreIdxPseudo. -def LDRDpre_isel : LoadPreIdxPseudo<FPR64>; -def LDRSpre_isel : LoadPreIdxPseudo<FPR32>; -def LDRXpre_isel : LoadPreIdxPseudo<GPR64>; -def LDRWpre_isel : LoadPreIdxPseudo<GPR32>; -def LDRHHpre_isel : LoadPreIdxPseudo<GPR32>; -def LDRBBpre_isel : LoadPreIdxPseudo<GPR32>; - -def LDRSWpre_isel : LoadPreIdxPseudo<GPR64>; -def LDRSHWpre_isel : LoadPreIdxPseudo<GPR32>; -def LDRSHXpre_isel : LoadPreIdxPseudo<GPR64>; -def LDRSBWpre_isel : LoadPreIdxPseudo<GPR32>; -def LDRSBXpre_isel : LoadPreIdxPseudo<GPR64>; - -//--- -// (immediate post-indexed) -def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">; -def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">; -def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8, "ldr">; -def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">; -def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">; -def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">; -def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">; - -// load sign-extended half-word -def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">; -def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">; - -// load sign-extended byte -def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">; -def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">; - -// load zero-extended byte -def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">; -def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">; - -// load sign-extended word -def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">; - -// ISel pseudos and patterns. See expanded comment on LoadPostIdxPseudo. -def LDRDpost_isel : LoadPostIdxPseudo<FPR64>; -def LDRSpost_isel : LoadPostIdxPseudo<FPR32>; -def LDRXpost_isel : LoadPostIdxPseudo<GPR64>; -def LDRWpost_isel : LoadPostIdxPseudo<GPR32>; -def LDRHHpost_isel : LoadPostIdxPseudo<GPR32>; -def LDRBBpost_isel : LoadPostIdxPseudo<GPR32>; - -def LDRSWpost_isel : LoadPostIdxPseudo<GPR64>; -def LDRSHWpost_isel : LoadPostIdxPseudo<GPR32>; -def LDRSHXpost_isel : LoadPostIdxPseudo<GPR64>; -def LDRSBWpost_isel : LoadPostIdxPseudo<GPR32>; -def LDRSBXpost_isel : LoadPostIdxPseudo<GPR64>; - -//===----------------------------------------------------------------------===// -// Store instructions. -//===----------------------------------------------------------------------===// - -// Pair (indexed, offset) -// FIXME: Use dedicated range-checked addressing mode operand here. -def STPWi : StorePairOffset<0b00, 0, GPR32, am_indexed32simm7, "stp">; -def STPXi : StorePairOffset<0b10, 0, GPR64, am_indexed64simm7, "stp">; -def STPSi : StorePairOffset<0b00, 1, FPR32, am_indexed32simm7, "stp">; -def STPDi : StorePairOffset<0b01, 1, FPR64, am_indexed64simm7, "stp">; -def STPQi : StorePairOffset<0b10, 1, FPR128, am_indexed128simm7, "stp">; - -// Pair (pre-indexed) -def STPWpre : StorePairPreIdx<0b00, 0, GPR32, am_indexed32simm7, "stp">; -def STPXpre : StorePairPreIdx<0b10, 0, GPR64, am_indexed64simm7, "stp">; -def STPSpre : StorePairPreIdx<0b00, 1, FPR32, am_indexed32simm7, "stp">; -def STPDpre : StorePairPreIdx<0b01, 1, FPR64, am_indexed64simm7, "stp">; -def STPQpre : StorePairPreIdx<0b10, 1, FPR128, am_indexed128simm7, "stp">; - -// Pair (pre-indexed) -def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">; -def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">; -def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">; -def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">; -def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">; - -// Pair (no allocate) -def STNPWi : StorePairNoAlloc<0b00, 0, GPR32, am_indexed32simm7, "stnp">; -def STNPXi : StorePairNoAlloc<0b10, 0, GPR64, am_indexed64simm7, "stnp">; -def STNPSi : StorePairNoAlloc<0b00, 1, FPR32, am_indexed32simm7, "stnp">; -def STNPDi : StorePairNoAlloc<0b01, 1, FPR64, am_indexed64simm7, "stnp">; -def STNPQi : StorePairNoAlloc<0b10, 1, FPR128, am_indexed128simm7, "stnp">; - -//--- -// (Register offset) - -let AddedComplexity = 10 in { - -// Integer -def STRHHro : Store16RO<0b01, 0, 0b00, GPR32, "strh", - [(truncstorei16 GPR32:$Rt, ro_indexed16:$addr)]>; -def STRBBro : Store8RO<0b00, 0, 0b00, GPR32, "strb", - [(truncstorei8 GPR32:$Rt, ro_indexed8:$addr)]>; -def STRWro : Store32RO<0b10, 0, 0b00, GPR32, "str", - [(store GPR32:$Rt, ro_indexed32:$addr)]>; -def STRXro : Store64RO<0b11, 0, 0b00, GPR64, "str", - [(store GPR64:$Rt, ro_indexed64:$addr)]>; - -// truncstore i64 -def : Pat<(truncstorei8 GPR64:$Rt, ro_indexed8:$addr), - (STRBBro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed8:$addr)>; -def : Pat<(truncstorei16 GPR64:$Rt, ro_indexed16:$addr), - (STRHHro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed16:$addr)>; -def : Pat<(truncstorei32 GPR64:$Rt, ro_indexed32:$addr), - (STRWro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed32:$addr)>; - - -// Floating-point -def STRBro : Store8RO<0b00, 1, 0b00, FPR8, "str", - [(store FPR8:$Rt, ro_indexed8:$addr)]>; -def STRHro : Store16RO<0b01, 1, 0b00, FPR16, "str", - [(store FPR16:$Rt, ro_indexed16:$addr)]>; -def STRSro : Store32RO<0b10, 1, 0b00, FPR32, "str", - [(store (f32 FPR32:$Rt), ro_indexed32:$addr)]>; -def STRDro : Store64RO<0b11, 1, 0b00, FPR64, "str", - [(store (f64 FPR64:$Rt), ro_indexed64:$addr)]>; -def STRQro : Store128RO<0b00, 1, 0b10, FPR128, "str", []> { - let mayStore = 1; -} - -// Match all store 64 bits width whose type is compatible with FPR64 -def : Pat<(store (v2f32 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v1f64 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v8i8 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v4i16 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v2i32 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v1i64 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; - -// Match all store 128 bits width whose type is compatible with FPR128 -def : Pat<(store (v4f32 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (v2f64 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (v16i8 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (v8i16 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (v4i32 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (v2i64 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (f128 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; - -//--- -// (unsigned immediate) -def STRXui : StoreUI<0b11, 0, 0b00, GPR64, am_indexed64, "str", - [(store GPR64:$Rt, am_indexed64:$addr)]>; -def STRWui : StoreUI<0b10, 0, 0b00, GPR32, am_indexed32, "str", - [(store GPR32:$Rt, am_indexed32:$addr)]>; -def STRBui : StoreUI<0b00, 1, 0b00, FPR8, am_indexed8, "str", - [(store FPR8:$Rt, am_indexed8:$addr)]>; -def STRHui : StoreUI<0b01, 1, 0b00, FPR16, am_indexed16, "str", - [(store FPR16:$Rt, am_indexed16:$addr)]>; -def STRSui : StoreUI<0b10, 1, 0b00, FPR32, am_indexed32, "str", - [(store (f32 FPR32:$Rt), am_indexed32:$addr)]>; -def STRDui : StoreUI<0b11, 1, 0b00, FPR64, am_indexed64, "str", - [(store (f64 FPR64:$Rt), am_indexed64:$addr)]>; -def STRQui : StoreUI<0b00, 1, 0b10, FPR128, am_indexed128, "str", []> { - let mayStore = 1; -} - -// Match all store 64 bits width whose type is compatible with FPR64 -def : Pat<(store (v2f32 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v1f64 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v8i8 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v4i16 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v2i32 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v1i64 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; - -// Match all store 128 bits width whose type is compatible with FPR128 -def : Pat<(store (v4f32 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (v2f64 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (v16i8 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (v8i16 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (v4i32 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (v2i64 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (f128 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; - -def STRHHui : StoreUI<0b01, 0, 0b00, GPR32, am_indexed16, "strh", - [(truncstorei16 GPR32:$Rt, am_indexed16:$addr)]>; -def STRBBui : StoreUI<0b00, 0, 0b00, GPR32, am_indexed8, "strb", - [(truncstorei8 GPR32:$Rt, am_indexed8:$addr)]>; - -// truncstore i64 -def : Pat<(truncstorei32 GPR64:$Rt, am_indexed32:$addr), - (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed32:$addr)>; -def : Pat<(truncstorei16 GPR64:$Rt, am_indexed16:$addr), - (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed16:$addr)>; -def : Pat<(truncstorei8 GPR64:$Rt, am_indexed8:$addr), - (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed8:$addr)>; - -} // AddedComplexity = 10 - -//--- -// (unscaled immediate) -def STURXi : StoreUnscaled<0b11, 0, 0b00, GPR64, am_unscaled64, "stur", - [(store GPR64:$Rt, am_unscaled64:$addr)]>; -def STURWi : StoreUnscaled<0b10, 0, 0b00, GPR32, am_unscaled32, "stur", - [(store GPR32:$Rt, am_unscaled32:$addr)]>; -def STURBi : StoreUnscaled<0b00, 1, 0b00, FPR8, am_unscaled8, "stur", - [(store FPR8:$Rt, am_unscaled8:$addr)]>; -def STURHi : StoreUnscaled<0b01, 1, 0b00, FPR16, am_unscaled16, "stur", - [(store FPR16:$Rt, am_unscaled16:$addr)]>; -def STURSi : StoreUnscaled<0b10, 1, 0b00, FPR32, am_unscaled32, "stur", - [(store (f32 FPR32:$Rt), am_unscaled32:$addr)]>; -def STURDi : StoreUnscaled<0b11, 1, 0b00, FPR64, am_unscaled64, "stur", - [(store (f64 FPR64:$Rt), am_unscaled64:$addr)]>; -def STURQi : StoreUnscaled<0b00, 1, 0b10, FPR128, am_unscaled128, "stur", - [(store (v2f64 FPR128:$Rt), am_unscaled128:$addr)]>; -def STURHHi : StoreUnscaled<0b01, 0, 0b00, GPR32, am_unscaled16, "sturh", - [(truncstorei16 GPR32:$Rt, am_unscaled16:$addr)]>; -def STURBBi : StoreUnscaled<0b00, 0, 0b00, GPR32, am_unscaled8, "sturb", - [(truncstorei8 GPR32:$Rt, am_unscaled8:$addr)]>; - -// Match all store 64 bits width whose type is compatible with FPR64 -def : Pat<(store (v2f32 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v1f64 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v8i8 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v4i16 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v2i32 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v1i64 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; - -// Match all store 128 bits width whose type is compatible with FPR128 -def : Pat<(store (v4f32 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (v2f64 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (v16i8 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (v8i16 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (v4i32 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (v2i64 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (f128 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; - -// unscaled i64 truncating stores -def : Pat<(truncstorei32 GPR64:$Rt, am_unscaled32:$addr), - (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled32:$addr)>; -def : Pat<(truncstorei16 GPR64:$Rt, am_unscaled16:$addr), - (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled16:$addr)>; -def : Pat<(truncstorei8 GPR64:$Rt, am_unscaled8:$addr), - (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled8:$addr)>; - -//--- -// STR mnemonics fall back to STUR for negative or unaligned offsets. -def : InstAlias<"str $Rt, $addr", (STURXi GPR64:$Rt, am_unscaled_fb64:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURWi GPR32:$Rt, am_unscaled_fb32:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURBi FPR8:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURHi FPR16:$Rt, am_unscaled_fb16:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURSi FPR32:$Rt, am_unscaled_fb32:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURDi FPR64:$Rt, am_unscaled_fb64:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURQi FPR128:$Rt, am_unscaled_fb128:$addr)>; - -def : InstAlias<"strb $Rt, $addr", (STURBBi GPR32:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"strh $Rt, $addr", (STURHHi GPR32:$Rt, am_unscaled_fb16:$addr)>; - -//--- -// (unscaled immediate, unprivileged) -def STTRWi : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; -def STTRXi : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; - -def STTRHi : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; -def STTRBi : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; - -//--- -// (immediate pre-indexed) -def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str">; -def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str">; -def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8, "str">; -def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str">; -def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str">; -def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str">; -def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str">; - -def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb">; -def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh">; - -// ISel pseudos and patterns. See expanded comment on StorePreIdxPseudo. -defm STRDpre : StorePreIdxPseudo<FPR64, f64, pre_store>; -defm STRSpre : StorePreIdxPseudo<FPR32, f32, pre_store>; -defm STRXpre : StorePreIdxPseudo<GPR64, i64, pre_store>; -defm STRWpre : StorePreIdxPseudo<GPR32, i32, pre_store>; -defm STRHHpre : StorePreIdxPseudo<GPR32, i32, pre_truncsti16>; -defm STRBBpre : StorePreIdxPseudo<GPR32, i32, pre_truncsti8>; -// truncstore i64 -def : Pat<(pre_truncsti32 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRWpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; -def : Pat<(pre_truncsti16 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRHHpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; -def : Pat<(pre_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRBBpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; - -//--- -// (immediate post-indexed) -def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str">; -def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str">; -def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8, "str">; -def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str">; -def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str">; -def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str">; -def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str">; - -def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb">; -def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh">; - -// ISel pseudos and patterns. See expanded comment on StorePostIdxPseudo. -defm STRDpost : StorePostIdxPseudo<FPR64, f64, post_store, STRDpost>; -defm STRSpost : StorePostIdxPseudo<FPR32, f32, post_store, STRSpost>; -defm STRXpost : StorePostIdxPseudo<GPR64, i64, post_store, STRXpost>; -defm STRWpost : StorePostIdxPseudo<GPR32, i32, post_store, STRWpost>; -defm STRHHpost : StorePostIdxPseudo<GPR32, i32, post_truncsti16, STRHHpost>; -defm STRBBpost : StorePostIdxPseudo<GPR32, i32, post_truncsti8, STRBBpost>; -// truncstore i64 -def : Pat<(post_truncsti32 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRWpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; -def : Pat<(post_truncsti16 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRHHpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; -def : Pat<(post_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRBBpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; - - -//===----------------------------------------------------------------------===// -// Load/store exclusive instructions. -//===----------------------------------------------------------------------===// - -def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; -def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; -def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; -def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; - -def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; -def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; -def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; -def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; - -def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; -def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; -def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; -def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; - -def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; -def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; -def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; -def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; - -def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; -def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; -def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; -def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; - -def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; -def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; -def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; -def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; - -def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; -def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; - -def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; -def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; - -def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; -def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; - -def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; -def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; - -//===----------------------------------------------------------------------===// -// Scaled floating point to integer conversion instructions. -//===----------------------------------------------------------------------===// - -defm FCVTAS : FPToInteger<0b00, 0b100, "fcvtas", int_arm64_neon_fcvtas>; -defm FCVTAU : FPToInteger<0b00, 0b101, "fcvtau", int_arm64_neon_fcvtau>; -defm FCVTMS : FPToInteger<0b10, 0b000, "fcvtms", int_arm64_neon_fcvtms>; -defm FCVTMU : FPToInteger<0b10, 0b001, "fcvtmu", int_arm64_neon_fcvtmu>; -defm FCVTNS : FPToInteger<0b00, 0b000, "fcvtns", int_arm64_neon_fcvtns>; -defm FCVTNU : FPToInteger<0b00, 0b001, "fcvtnu", int_arm64_neon_fcvtnu>; -defm FCVTPS : FPToInteger<0b01, 0b000, "fcvtps", int_arm64_neon_fcvtps>; -defm FCVTPU : FPToInteger<0b01, 0b001, "fcvtpu", int_arm64_neon_fcvtpu>; -defm FCVTZS : FPToInteger<0b11, 0b000, "fcvtzs", fp_to_sint>; -defm FCVTZU : FPToInteger<0b11, 0b001, "fcvtzu", fp_to_uint>; -let isCodeGenOnly = 1 in { -defm FCVTZS_Int : FPToInteger<0b11, 0b000, "fcvtzs", int_arm64_neon_fcvtzs>; -defm FCVTZU_Int : FPToInteger<0b11, 0b001, "fcvtzu", int_arm64_neon_fcvtzu>; -} - -//===----------------------------------------------------------------------===// -// Scaled integer to floating point conversion instructions. -//===----------------------------------------------------------------------===// - -defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>; -defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; - -//===----------------------------------------------------------------------===// -// Unscaled integer to floating point conversion instruction. -//===----------------------------------------------------------------------===// - -defm FMOV : UnscaledConversion<"fmov">; - -def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>; -def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>; - -def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; - -def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), (FMOVDXr V64:$Vn)>; - -def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), (COPY_TO_REGCLASS GPR32:$Xn, - FPR32)>; -def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), (COPY_TO_REGCLASS FPR32:$Xn, - GPR32)>; -def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), (COPY_TO_REGCLASS GPR64:$Xn, - FPR64)>; -def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), (COPY_TO_REGCLASS FPR64:$Xn, - GPR64)>; - -//===----------------------------------------------------------------------===// -// Floating point conversion instruction. -//===----------------------------------------------------------------------===// - -defm FCVT : FPConversion<"fcvt">; - -def : Pat<(f32_to_f16 FPR32:$Rn), - (i32 (COPY_TO_REGCLASS - (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)), - GPR32))>; - -def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn), - [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>; - -//===----------------------------------------------------------------------===// -// Floating point single operand instructions. -//===----------------------------------------------------------------------===// - -defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; -defm FMOV : SingleOperandFPData<0b0000, "fmov">; -defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; -defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>; -defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; -defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; -defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_arm64_neon_frintn>; -defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; - -def : Pat<(v1f64 (int_arm64_neon_frintn (v1f64 FPR64:$Rn))), - (FRINTNDr FPR64:$Rn)>; - -// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior -// in the C spec. Setting hasSideEffects ensures it is not DCE'd. -// <rdar://problem/13715968> -// TODO: We should really model the FPSR flags correctly. This is really ugly. -let hasSideEffects = 1 in { -defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; -} - -defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; - -let SchedRW = [WriteFDiv] in { -defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; -} - -//===----------------------------------------------------------------------===// -// Floating point two operand instructions. -//===----------------------------------------------------------------------===// - -defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; -let SchedRW = [WriteFDiv] in { -defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; -} -defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_arm64_neon_fmaxnm>; -defm FMAX : TwoOperandFPData<0b0100, "fmax", ARM64fmax>; -defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_arm64_neon_fminnm>; -defm FMIN : TwoOperandFPData<0b0101, "fmin", ARM64fmin>; -let SchedRW = [WriteFMul] in { -defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; -defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; -} -defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; - -def : Pat<(v1f64 (ARM64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (ARM64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMINDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (int_arm64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (int_arm64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; - -//===----------------------------------------------------------------------===// -// Floating point three operand instructions. -//===----------------------------------------------------------------------===// - -defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; -defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", - TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; -defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", - TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; -defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", - TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; - -// The following def pats catch the case where the LHS of an FMA is negated. -// The TriOpFrag above catches the case where the middle operand is negated. -def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Rd)), - (FMSUBSrrr FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; - -def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Rd)), - (FMSUBDrrr FPR64:$Rd, FPR64:$Rn, FPR64:$Rm)>; - -//===----------------------------------------------------------------------===// -// Floating point comparison instructions. -//===----------------------------------------------------------------------===// - -defm FCMPE : FPComparison<1, "fcmpe">; -defm FCMP : FPComparison<0, "fcmp", ARM64fcmp>; - -//===----------------------------------------------------------------------===// -// Floating point conditional comparison instructions. -//===----------------------------------------------------------------------===// - -defm FCCMPE : FPCondComparison<1, "fccmpe">; -defm FCCMP : FPCondComparison<0, "fccmp">; - -//===----------------------------------------------------------------------===// -// Floating point conditional select instruction. -//===----------------------------------------------------------------------===// - -defm FCSEL : FPCondSelect<"fcsel">; - -// CSEL instructions providing f128 types need to be handled by a -// pseudo-instruction since the eventual code will need to introduce basic -// blocks and control flow. -def F128CSEL : Pseudo<(outs FPR128:$Rd), - (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), - [(set (f128 FPR128:$Rd), - (ARM64csel FPR128:$Rn, FPR128:$Rm, - (i32 imm:$cond), CPSR))]> { - let Uses = [CPSR]; - let usesCustomInserter = 1; -} - - -//===----------------------------------------------------------------------===// -// Floating point immediate move. -//===----------------------------------------------------------------------===// - -let isReMaterializable = 1 in { -defm FMOV : FPMoveImmediate<"fmov">; -} - -//===----------------------------------------------------------------------===// -// Advanced SIMD two vector instructions. -//===----------------------------------------------------------------------===// - -defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_arm64_neon_abs>; -defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_arm64_neon_cls>; -defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; -defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", ARM64cmeqz>; -defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", ARM64cmgez>; -defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", ARM64cmgtz>; -defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", ARM64cmlez>; -defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", ARM64cmltz>; -defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; -defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; - -defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>; -defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", ARM64fcmgez>; -defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>; -defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", ARM64fcmlez>; -defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", ARM64fcmltz>; -defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_arm64_neon_fcvtas>; -defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_arm64_neon_fcvtau>; -defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; -def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (v4i16 V64:$Rn))), - (FCVTLv4i16 V64:$Rn)>; -def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), - (i64 4)))), - (FCVTLv8i16 V128:$Rn)>; -def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; -def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn), - (i64 2))))), - (FCVTLv4i32 V128:$Rn)>; - -defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_arm64_neon_fcvtms>; -defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_arm64_neon_fcvtmu>; -defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_arm64_neon_fcvtns>; -defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_arm64_neon_fcvtnu>; -defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; -def : Pat<(v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn))), - (FCVTNv4i16 V128:$Rn)>; -def : Pat<(concat_vectors V64:$Rd, - (v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), - (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; -def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))), - (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_arm64_neon_fcvtps>; -defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_arm64_neon_fcvtpu>; -defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", - int_arm64_neon_fcvtxn>; -defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; -defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; -let isCodeGenOnly = 1 in { -defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", - int_arm64_neon_fcvtzs>; -defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", - int_arm64_neon_fcvtzu>; -} -defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; -defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_arm64_neon_frecpe>; -defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>; -defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; -defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; -defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_arm64_neon_frintn>; -defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; -defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; -defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; -defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_arm64_neon_frsqrte>; -defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; -defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", - UnOpFrag<(sub immAllZerosV, node:$LHS)> >; -defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; -// Aliases for MVN -> NOT. -def : InstAlias<"mvn.8b $Vd, $Vn", (NOTv8i8 V64:$Vd, V64:$Vn)>; -def : InstAlias<"mvn.16b $Vd, $Vn", (NOTv16i8 V128:$Vd, V128:$Vn)>; -def : InstAlias<"mvn $Vd.8b, $Vn.8b", (NOTv8i8 V64:$Vd, V64:$Vn)>; -def : InstAlias<"mvn $Vd.16b, $Vn.16b", (NOTv16i8 V128:$Vd, V128:$Vn)>; - -def : Pat<(ARM64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>; -def : Pat<(ARM64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>; -def : Pat<(ARM64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>; -def : Pat<(ARM64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>; -def : Pat<(ARM64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>; -def : Pat<(ARM64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; -def : Pat<(ARM64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; - -def : Pat<(ARM64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(ARM64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(ARM64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(ARM64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(ARM64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(ARM64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(ARM64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; - -def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; - -defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_arm64_neon_rbit>; -defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", ARM64rev16>; -defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", ARM64rev32>; -defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", ARM64rev64>; -defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", - BinOpFrag<(add node:$LHS, (int_arm64_neon_saddlp node:$RHS))> >; -defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_arm64_neon_saddlp>; -defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; -defm SHLL : SIMDVectorLShiftLongBySizeBHS; -defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_arm64_neon_sqabs>; -defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_arm64_neon_sqneg>; -defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_arm64_neon_sqxtn>; -defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_arm64_neon_sqxtun>; -defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_arm64_neon_suqadd>; -defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", - BinOpFrag<(add node:$LHS, (int_arm64_neon_uaddlp node:$RHS))> >; -defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", - int_arm64_neon_uaddlp>; -defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; -defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_arm64_neon_uqxtn>; -defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_arm64_neon_urecpe>; -defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_arm64_neon_ursqrte>; -defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_arm64_neon_usqadd>; -defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; - -def : Pat<(v2f32 (ARM64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; -def : Pat<(v4f32 (ARM64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; - -// Patterns for vector long shift (by element width). These need to match all -// three of zext, sext and anyext so it's easier to pull the patterns out of the -// definition. -multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { - def : Pat<(ARM64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), - (SHLLv8i8 V64:$Rn)>; - def : Pat<(ARM64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), - (SHLLv16i8 V128:$Rn)>; - def : Pat<(ARM64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), - (SHLLv4i16 V64:$Rn)>; - def : Pat<(ARM64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), - (SHLLv8i16 V128:$Rn)>; - def : Pat<(ARM64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), - (SHLLv2i32 V64:$Rn)>; - def : Pat<(ARM64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), - (SHLLv4i32 V128:$Rn)>; -} - -defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; -defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; -defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD three vector instructions. -//===----------------------------------------------------------------------===// - -defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; -defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_arm64_neon_addp>; -defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", ARM64cmeq>; -defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", ARM64cmge>; -defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", ARM64cmgt>; -defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", ARM64cmhi>; -defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", ARM64cmhs>; -defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", ARM64cmtst>; -defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_arm64_neon_fabd>; -defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_arm64_neon_facge>; -defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_arm64_neon_facgt>; -defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_arm64_neon_addp>; -defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>; -defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>; -defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>; -defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>; -defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>; -defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_arm64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_arm64_neon_fmaxnm>; -defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_arm64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", ARM64fmax>; -defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_arm64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_arm64_neon_fminnm>; -defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_arm64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", ARM64fmin>; - -// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the -// instruction expects the addend first, while the fma intrinsic puts it last. -defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; -defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; - -// The following def pats catch the case where the LHS of an FMA is negated. -// The TriOpFrag above catches the case where the middle operand is negated. -def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)), - (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>; - -def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), - (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>; - -def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), - (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; - -defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_arm64_neon_fmulx>; -defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>; -defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_arm64_neon_frecps>; -defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_arm64_neon_frsqrts>; -defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>; -defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", - TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; -defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", - TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >; -defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; -defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_arm64_neon_pmul>; -defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", - TriOpFrag<(add node:$LHS, (int_arm64_neon_sabd node:$MHS, node:$RHS))> >; -defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_arm64_neon_sabd>; -defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_arm64_neon_shadd>; -defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_arm64_neon_shsub>; -defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_arm64_neon_smaxp>; -defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_arm64_neon_smax>; -defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_arm64_neon_sminp>; -defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_arm64_neon_smin>; -defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_arm64_neon_sqadd>; -defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_arm64_neon_sqdmulh>; -defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_arm64_neon_sqrdmulh>; -defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_arm64_neon_sqrshl>; -defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_arm64_neon_sqshl>; -defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_arm64_neon_sqsub>; -defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_arm64_neon_srhadd>; -defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_arm64_neon_srshl>; -defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_arm64_neon_sshl>; -defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; -defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", - TriOpFrag<(add node:$LHS, (int_arm64_neon_uabd node:$MHS, node:$RHS))> >; -defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_arm64_neon_uabd>; -defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_arm64_neon_uhadd>; -defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_arm64_neon_uhsub>; -defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_arm64_neon_umaxp>; -defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_arm64_neon_umax>; -defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_arm64_neon_uminp>; -defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_arm64_neon_umin>; -defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_arm64_neon_uqadd>; -defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_arm64_neon_uqrshl>; -defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_arm64_neon_uqshl>; -defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_arm64_neon_uqsub>; -defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_arm64_neon_urhadd>; -defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_arm64_neon_urshl>; -defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_arm64_neon_ushl>; - -defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; -defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", - BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; -defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">; -defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", ARM64bit>; -defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl", - TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>; -defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; -defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", - BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; -defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; - -// FIXME: the .16b and .8b variantes should be emitted by the -// AsmWriter. TableGen's AsmWriter-generator doesn't deal with variant syntaxes -// in aliases yet though. -def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; -def : InstAlias<"{mov\t$dst.8h, $src.8h|mov.8h\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; -def : InstAlias<"{mov\t$dst.4s, $src.4s|mov.4s\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; -def : InstAlias<"{mov\t$dst.2d, $src.2d|mov.2d\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; - -def : InstAlias<"{mov\t$dst.8b, $src.8b|mov.8b\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; -def : InstAlias<"{mov\t$dst.4h, $src.4h|mov.4h\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; -def : InstAlias<"{mov\t$dst.2s, $src.2s|mov.2s\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; -def : InstAlias<"{mov\t$dst.1d, $src.1d|mov.1d\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; - -def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # - "|cmls.8b\t$dst, $src1, $src2}", - (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # - "|cmls.16b\t$dst, $src1, $src2}", - (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # - "|cmls.4h\t$dst, $src1, $src2}", - (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # - "|cmls.8h\t$dst, $src1, $src2}", - (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # - "|cmls.2s\t$dst, $src1, $src2}", - (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # - "|cmls.4s\t$dst, $src1, $src2}", - (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # - "|cmls.2d\t$dst, $src1, $src2}", - (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # - "|cmlo.8b\t$dst, $src1, $src2}", - (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # - "|cmlo.16b\t$dst, $src1, $src2}", - (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # - "|cmlo.4h\t$dst, $src1, $src2}", - (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # - "|cmlo.8h\t$dst, $src1, $src2}", - (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # - "|cmlo.2s\t$dst, $src1, $src2}", - (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # - "|cmlo.4s\t$dst, $src1, $src2}", - (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # - "|cmlo.2d\t$dst, $src1, $src2}", - (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # - "|cmle.8b\t$dst, $src1, $src2}", - (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # - "|cmle.16b\t$dst, $src1, $src2}", - (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # - "|cmle.4h\t$dst, $src1, $src2}", - (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # - "|cmle.8h\t$dst, $src1, $src2}", - (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # - "|cmle.2s\t$dst, $src1, $src2}", - (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # - "|cmle.4s\t$dst, $src1, $src2}", - (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # - "|cmle.2d\t$dst, $src1, $src2}", - (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # - "|cmlt.8b\t$dst, $src1, $src2}", - (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # - "|cmlt.16b\t$dst, $src1, $src2}", - (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # - "|cmlt.4h\t$dst, $src1, $src2}", - (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # - "|cmlt.8h\t$dst, $src1, $src2}", - (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # - "|cmlt.2s\t$dst, $src1, $src2}", - (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # - "|cmlt.4s\t$dst, $src1, $src2}", - (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # - "|cmlt.2d\t$dst, $src1, $src2}", - (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # - "|fcmle.2s\t$dst, $src1, $src2}", - (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # - "|fcmle.4s\t$dst, $src1, $src2}", - (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # - "|fcmle.2d\t$dst, $src1, $src2}", - (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # - "|fcmlt.2s\t$dst, $src1, $src2}", - (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # - "|fcmlt.4s\t$dst, $src1, $src2}", - (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # - "|fcmlt.2d\t$dst, $src1, $src2}", - (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # - "|facle.2s\t$dst, $src1, $src2}", - (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # - "|facle.4s\t$dst, $src1, $src2}", - (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # - "|facle.2d\t$dst, $src1, $src2}", - (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # - "|faclt.2s\t$dst, $src1, $src2}", - (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # - "|faclt.4s\t$dst, $src1, $src2}", - (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # - "|faclt.2d\t$dst, $src1, $src2}", - (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD three scalar instructions. -//===----------------------------------------------------------------------===// - -defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; -defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", ARM64cmeq>; -defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", ARM64cmge>; -defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", ARM64cmgt>; -defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", ARM64cmhi>; -defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", ARM64cmhs>; -defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", ARM64cmtst>; -defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_arm64_sisd_fabd>; -def : Pat<(v1f64 (int_arm64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FABD64 FPR64:$Rn, FPR64:$Rm)>; -defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge", - int_arm64_neon_facge>; -defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt", - int_arm64_neon_facgt>; -defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>; -defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>; -defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>; -defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_arm64_neon_fmulx>; -defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_arm64_neon_frecps>; -defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_arm64_neon_frsqrts>; -defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_arm64_neon_sqadd>; -defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_arm64_neon_sqdmulh>; -defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_arm64_neon_sqrdmulh>; -defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_arm64_neon_sqrshl>; -defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_arm64_neon_sqshl>; -defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_arm64_neon_sqsub>; -defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_arm64_neon_srshl>; -defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_arm64_neon_sshl>; -defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; -defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_arm64_neon_uqadd>; -defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_arm64_neon_uqrshl>; -defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_arm64_neon_uqshl>; -defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_arm64_neon_uqsub>; -defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_arm64_neon_urshl>; -defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_arm64_neon_ushl>; - -def : InstAlias<"cmls $dst, $src1, $src2", - (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"cmle $dst, $src1, $src2", - (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"cmlo $dst, $src1, $src2", - (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"cmlt $dst, $src1, $src2", - (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"fcmle $dst, $src1, $src2", - (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>; -def : InstAlias<"fcmle $dst, $src1, $src2", - (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"fcmlt $dst, $src1, $src2", - (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>; -def : InstAlias<"fcmlt $dst, $src1, $src2", - (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"facle $dst, $src1, $src2", - (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>; -def : InstAlias<"facle $dst, $src1, $src2", - (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"faclt $dst, $src1, $src2", - (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>; -def : InstAlias<"faclt $dst, $src1, $src2", - (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD three scalar instructions (mixed operands). -//===----------------------------------------------------------------------===// -defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", - int_arm64_neon_sqdmulls_scalar>; -defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; -defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; - -def : Pat<(i64 (int_arm64_neon_sqadd (i64 FPR64:$Rd), - (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn), - (i32 FPR32:$Rm))))), - (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; -def : Pat<(i64 (int_arm64_neon_sqsub (i64 FPR64:$Rd), - (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn), - (i32 FPR32:$Rm))))), - (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD two scalar instructions. -//===----------------------------------------------------------------------===// - -defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_arm64_neon_abs>; -defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", ARM64cmeqz>; -defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", ARM64cmgez>; -defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", ARM64cmgtz>; -defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", ARM64cmlez>; -defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", ARM64cmltz>; -defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>; -defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", ARM64fcmgez>; -defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>; -defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", ARM64fcmlez>; -defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", ARM64fcmltz>; -defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">; -defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">; -defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">; -defm FCVTMU : SIMDTwoScalarSD< 1, 0, 0b11011, "fcvtmu">; -defm FCVTNS : SIMDTwoScalarSD< 0, 0, 0b11010, "fcvtns">; -defm FCVTNU : SIMDTwoScalarSD< 1, 0, 0b11010, "fcvtnu">; -defm FCVTPS : SIMDTwoScalarSD< 0, 1, 0b11010, "fcvtps">; -defm FCVTPU : SIMDTwoScalarSD< 1, 1, 0b11010, "fcvtpu">; -def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; -defm FCVTZS : SIMDTwoScalarSD< 0, 1, 0b11011, "fcvtzs">; -defm FCVTZU : SIMDTwoScalarSD< 1, 1, 0b11011, "fcvtzu">; -defm FRECPE : SIMDTwoScalarSD< 0, 1, 0b11101, "frecpe">; -defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">; -defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">; -defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", - UnOpFrag<(sub immAllZerosV, node:$LHS)> >; -defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", ARM64sitof>; -defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_arm64_neon_sqabs>; -defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_arm64_neon_sqneg>; -defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_arm64_neon_scalar_sqxtn>; -defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_arm64_neon_scalar_sqxtun>; -defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", - int_arm64_neon_suqadd>; -defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", ARM64uitof>; -defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_arm64_neon_scalar_uqxtn>; -defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", - int_arm64_neon_usqadd>; - -def : Pat<(v1i64 (int_arm64_neon_fcvtas (v1f64 FPR64:$Rn))), - (FCVTASv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtau (v1f64 FPR64:$Rn))), - (FCVTAUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtms (v1f64 FPR64:$Rn))), - (FCVTMSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtmu (v1f64 FPR64:$Rn))), - (FCVTMUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtns (v1f64 FPR64:$Rn))), - (FCVTNSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtnu (v1f64 FPR64:$Rn))), - (FCVTNUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtps (v1f64 FPR64:$Rn))), - (FCVTPSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtpu (v1f64 FPR64:$Rn))), - (FCVTPUv1i64 FPR64:$Rn)>; - -def : Pat<(f32 (int_arm64_neon_frecpe (f32 FPR32:$Rn))), - (FRECPEv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_arm64_neon_frecpe (f64 FPR64:$Rn))), - (FRECPEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (int_arm64_neon_frecpe (v1f64 FPR64:$Rn))), - (FRECPEv1i64 FPR64:$Rn)>; - -def : Pat<(f32 (int_arm64_neon_frecpx (f32 FPR32:$Rn))), - (FRECPXv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_arm64_neon_frecpx (f64 FPR64:$Rn))), - (FRECPXv1i64 FPR64:$Rn)>; - -def : Pat<(f32 (int_arm64_neon_frsqrte (f32 FPR32:$Rn))), - (FRSQRTEv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_arm64_neon_frsqrte (f64 FPR64:$Rn))), - (FRSQRTEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (int_arm64_neon_frsqrte (v1f64 FPR64:$Rn))), - (FRSQRTEv1i64 FPR64:$Rn)>; - -// If an integer is about to be converted to a floating point value, -// just load it on the floating point unit. -// Here are the patterns for 8 and 16-bits to float. -// 8-bits -> float. -def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 ro_indexed8:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 am_indexed8:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 am_unscaled8:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDURBi am_unscaled8:$addr), bsub))>; -// 16-bits -> float. -def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 ro_indexed16:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_indexed16:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), hsub))>; -// 32-bits are handled in target specific dag combine: -// performIntToFpCombine. -// 64-bits integer to 32-bits floating point, not possible with -// UCVTF on floating point registers (both source and destination -// must have the same size). - -// Here are the patterns for 8, 16, 32, and 64-bits to double. -// 8-bits -> double. -def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 ro_indexed8:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 am_indexed8:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 am_unscaled8:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURBi am_unscaled8:$addr), bsub))>; -// 16-bits -> double. -def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 ro_indexed16:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 am_indexed16:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), hsub))>; -// 32-bits -> double. -def : Pat <(f64 (uint_to_fp (i32 (load ro_indexed32:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), ssub))>; -def : Pat <(f64 (uint_to_fp (i32 (load am_indexed32:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), ssub))>; -def : Pat <(f64 (uint_to_fp (i32 (load am_unscaled32:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURSi am_unscaled32:$addr), ssub))>; -// 64-bits -> double are handled in target specific dag combine: -// performIntToFpCombine. - -//===----------------------------------------------------------------------===// -// Advanced SIMD three different-sized vector instructions. -//===----------------------------------------------------------------------===// - -defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_arm64_neon_addhn>; -defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_arm64_neon_subhn>; -defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_arm64_neon_raddhn>; -defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_arm64_neon_rsubhn>; -defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_arm64_neon_pmull>; -defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", - int_arm64_neon_sabd>; -defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", - int_arm64_neon_sabd>; -defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", - BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; -defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", - BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; -defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; -defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; -defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_arm64_neon_smull>; -defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", - int_arm64_neon_sqadd>; -defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", - int_arm64_neon_sqsub>; -defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", - int_arm64_neon_sqdmull>; -defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", - BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; -defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", - BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; -defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", - int_arm64_neon_uabd>; -defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", - int_arm64_neon_uabd>; -defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", - BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; -defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", - BinOpFrag<(add node:$LHS, (zext node:$RHS))>>; -defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; -defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; -defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_arm64_neon_umull>; -defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", - BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>; -defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", - BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; - -// Patterns for 64-bit pmull -def : Pat<(int_arm64_neon_pmull64 V64:$Rn, V64:$Rm), - (PMULLv1i64 V64:$Rn, V64:$Rm)>; -def : Pat<(int_arm64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)), - (vector_extract (v2i64 V128:$Rm), (i64 1))), - (PMULLv2i64 V128:$Rn, V128:$Rm)>; - -// CodeGen patterns for addhn and subhn instructions, which can actually be -// written in LLVM IR without too much difficulty. - -// ADDHN -def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), - (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm), - (i32 16))))), - (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm), - (i32 32))))), - (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v8i8 V64:$Rd), - (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm), - (i32 8))))), - (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v4i16 V64:$Rd), - (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm), - (i32 16))))), - (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v2i32 V64:$Rd), - (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm), - (i32 32))))), - (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - -// SUBHN -def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), - (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm), - (i32 16))))), - (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm), - (i32 32))))), - (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v8i8 V64:$Rd), - (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm), - (i32 8))))), - (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v4i16 V64:$Rd), - (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm), - (i32 16))))), - (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v2i32 V64:$Rd), - (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm), - (i32 32))))), - (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - -//---------------------------------------------------------------------------- -// AdvSIMD bitwise extract from vector instruction. -//---------------------------------------------------------------------------- - -defm EXT : SIMDBitwiseExtract<"ext">; - -def : Pat<(v4i16 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), - (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; -def : Pat<(v8i16 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v2i32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), - (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; -def : Pat<(v2f32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), - (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; -def : Pat<(v4i32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v4f32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v2i64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v2f64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; - -// We use EXT to handle extract_subvector to copy the upper 64-bits of a -// 128-bit vector. -def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; -def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; -def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; -def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; -def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; -def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; - - -//---------------------------------------------------------------------------- -// AdvSIMD zip vector -//---------------------------------------------------------------------------- - -defm TRN1 : SIMDZipVector<0b010, "trn1", ARM64trn1>; -defm TRN2 : SIMDZipVector<0b110, "trn2", ARM64trn2>; -defm UZP1 : SIMDZipVector<0b001, "uzp1", ARM64uzp1>; -defm UZP2 : SIMDZipVector<0b101, "uzp2", ARM64uzp2>; -defm ZIP1 : SIMDZipVector<0b011, "zip1", ARM64zip1>; -defm ZIP2 : SIMDZipVector<0b111, "zip2", ARM64zip2>; - -//---------------------------------------------------------------------------- -// AdvSIMD TBL/TBX instructions -//---------------------------------------------------------------------------- - -defm TBL : SIMDTableLookup< 0, "tbl">; -defm TBX : SIMDTableLookupTied<1, "tbx">; - -def : Pat<(v8i8 (int_arm64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), - (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; -def : Pat<(v16i8 (int_arm64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), - (TBLv16i8One V128:$Ri, V128:$Rn)>; - -def : Pat<(v8i8 (int_arm64_neon_tbx1 (v8i8 V64:$Rd), - (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), - (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; -def : Pat<(v16i8 (int_arm64_neon_tbx1 (v16i8 V128:$Rd), - (v16i8 V128:$Ri), (v16i8 V128:$Rn))), - (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; - - -//---------------------------------------------------------------------------- -// AdvSIMD scalar CPY instruction -//---------------------------------------------------------------------------- - -defm CPY : SIMDScalarCPY<"cpy">; - -//---------------------------------------------------------------------------- -// AdvSIMD scalar pairwise instructions -//---------------------------------------------------------------------------- - -defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; -defm FADDP : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">; -defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">; -defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">; -defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">; -defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">; -def : Pat<(i64 (int_arm64_neon_saddv (v2i64 V128:$Rn))), - (ADDPv2i64p V128:$Rn)>; -def : Pat<(i64 (int_arm64_neon_uaddv (v2i64 V128:$Rn))), - (ADDPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_faddv (v2f32 V64:$Rn))), - (FADDPv2i32p V64:$Rn)>; -def : Pat<(f32 (int_arm64_neon_faddv (v4f32 V128:$Rn))), - (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; -def : Pat<(f64 (int_arm64_neon_faddv (v2f64 V128:$Rn))), - (FADDPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))), - (FMAXNMPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fmaxnmv (v2f64 V128:$Rn))), - (FMAXNMPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))), - (FMAXPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fmaxv (v2f64 V128:$Rn))), - (FMAXPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))), - (FMINNMPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fminnmv (v2f64 V128:$Rn))), - (FMINNMPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))), - (FMINPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fminv (v2f64 V128:$Rn))), - (FMINPv2i64p V128:$Rn)>; - -//---------------------------------------------------------------------------- -// AdvSIMD INS/DUP instructions -//---------------------------------------------------------------------------- - -def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>; -def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>; -def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>; -def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>; -def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>; -def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>; -def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>; - -def DUPv2i64lane : SIMDDup64FromElement; -def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; -def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; -def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; -def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; -def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; -def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; - -def : Pat<(v2f32 (ARM64dup (f32 FPR32:$Rn))), - (v2f32 (DUPv2i32lane - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), - (i64 0)))>; -def : Pat<(v4f32 (ARM64dup (f32 FPR32:$Rn))), - (v4f32 (DUPv4i32lane - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), - (i64 0)))>; -def : Pat<(v2f64 (ARM64dup (f64 FPR64:$Rn))), - (v2f64 (DUPv2i64lane - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), - (i64 0)))>; - -def : Pat<(v2f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), - (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; -def : Pat<(v4f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), - (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; -def : Pat<(v2f64 (ARM64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), - (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; - -defm SMOV : SMov; -defm UMOV : UMov; - -def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), - (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; -def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), - (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; -def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), - (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; -def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), - (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; -def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), - (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; -def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), - (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; - -// Extracting i8 or i16 elements will have the zero-extend transformed to -// an 'and' mask by type legalization since neither i8 nor i16 are legal types -// for ARM64. Match these patterns here since UMOV already zeroes out the high -// bits of the destination register. -def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), - (i32 0xff)), - (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; -def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), - (i32 0xffff)), - (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; - -defm INS : SIMDIns; - -def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), - (INSvi8gpr (v16i8 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn)>; -def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), - (EXTRACT_SUBREG - (INSvi8gpr (v16i8 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn), dsub)>; - -def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), - (INSvi16gpr (v8i16 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn)>; -def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), - (EXTRACT_SUBREG - (INSvi16gpr (v8i16 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn), dsub)>; - -def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), - (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (i32 FPR32:$Rn), ssub))>; -def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), - (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (i32 FPR32:$Rn), ssub))>; -def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), - (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - (i64 FPR64:$Rn), dsub))>; - -def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; -def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; -def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; - -def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), - (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), - (EXTRACT_SUBREG - (INSvi32lane - (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), - VectorIndexS:$imm, - (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), - (i64 0)), - dsub)>; -def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), - (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), - (INSvi32lane - V128:$Rn, VectorIndexS:$imm, - (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), - (i64 0))>; -def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), - (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), - (INSvi64lane - V128:$Rn, VectorIndexD:$imm, - (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), - (i64 0))>; - -// Copy an element at a constant index in one vector into a constant indexed -// element of another. -// FIXME refactor to a shared class/dev parameterized on vector type, vector -// index type and INS extension -def : Pat<(v16i8 (int_arm64_neon_vcopy_lane - (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), - VectorIndexB:$idx2)), - (v16i8 (INSvi8lane - V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) - )>; -def : Pat<(v8i16 (int_arm64_neon_vcopy_lane - (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), - VectorIndexH:$idx2)), - (v8i16 (INSvi16lane - V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) - )>; -def : Pat<(v4i32 (int_arm64_neon_vcopy_lane - (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), - VectorIndexS:$idx2)), - (v4i32 (INSvi32lane - V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) - )>; -def : Pat<(v2i64 (int_arm64_neon_vcopy_lane - (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), - VectorIndexD:$idx2)), - (v2i64 (INSvi64lane - V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) - )>; - -// Floating point vector extractions are codegen'd as either a sequence of -// subregister extractions, possibly fed by an INS if the lane number is -// anything other than zero. -def : Pat<(vector_extract (v2f64 V128:$Rn), 0), - (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; -def : Pat<(vector_extract (v4f32 V128:$Rn), 0), - (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; -def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), - (f64 (EXTRACT_SUBREG - (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0, - V128:$Rn, VectorIndexD:$idx), - dsub))>; -def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), - (f32 (EXTRACT_SUBREG - (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0, - V128:$Rn, VectorIndexS:$idx), - ssub))>; - -// All concat_vectors operations are canonicalised to act on i64 vectors for -// ARM64. In the general case we need an instruction, which had just as well be -// INS. -class ConcatPat<ValueType DstTy, ValueType SrcTy> - : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), - (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; - -def : ConcatPat<v2i64, v1i64>; -def : ConcatPat<v2f64, v1f64>; -def : ConcatPat<v4i32, v2i32>; -def : ConcatPat<v4f32, v2f32>; -def : ConcatPat<v8i16, v4i16>; -def : ConcatPat<v16i8, v8i8>; - -// If the high lanes are undef, though, we can just ignore them: -class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> - : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; - -def : ConcatUndefPat<v2i64, v1i64>; -def : ConcatUndefPat<v2f64, v1f64>; -def : ConcatUndefPat<v4i32, v2i32>; -def : ConcatUndefPat<v4f32, v2f32>; -def : ConcatUndefPat<v8i16, v4i16>; -def : ConcatUndefPat<v16i8, v8i8>; - -//---------------------------------------------------------------------------- -// AdvSIMD across lanes instructions -//---------------------------------------------------------------------------- - -defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; -defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; -defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; -defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; -defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; -defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; -defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; -defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_arm64_neon_fmaxnmv>; -defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_arm64_neon_fmaxv>; -defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_arm64_neon_fminnmv>; -defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_arm64_neon_fminv>; - -multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> { -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it - def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; - -def : Pat<(i32 (intOp (v4i32 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), - ssub))>; -} - -multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> { -// If there is a masking operation keeping only what has been actually -// generated, consume it. - def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - ssub))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - ssub))>; - -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - ssub))>; -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - ssub))>; - -def : Pat<(i32 (intOp (v4i32 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), - ssub))>; - -} - -multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), - (i64 0)))>; - -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), - ssub))>; - -def : Pat<(i64 (intOp (v4i32 V128:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), - dsub))>; -} - -multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, - Intrinsic intOp> { - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), - ssub))>; - -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), - ssub))>; - -def : Pat<(i64 (intOp (v4i32 V128:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), - dsub))>; -} - -defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_arm64_neon_saddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_arm64_neon_saddv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_arm64_neon_uaddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_arm64_neon_uaddv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_arm64_neon_smaxv>; -def : Pat<(i32 (int_arm64_neon_smaxv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_arm64_neon_sminv>; -def : Pat<(i32 (int_arm64_neon_sminv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_arm64_neon_umaxv>; -def : Pat<(i32 (int_arm64_neon_umaxv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_arm64_neon_uminv>; -def : Pat<(i32 (int_arm64_neon_uminv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_arm64_neon_saddlv>; -defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_arm64_neon_uaddlv>; - -// The vaddlv_s32 intrinsic gets mapped to SADDLP. -def : Pat<(i64 (int_arm64_neon_saddlv (v2i32 V64:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (SADDLPv2i32_v1i64 V64:$Rn), dsub), - dsub))>; -// The vaddlv_u32 intrinsic gets mapped to UADDLP. -def : Pat<(i64 (int_arm64_neon_uaddlv (v2i32 V64:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (UADDLPv2i32_v1i64 V64:$Rn), dsub), - dsub))>; - -//------------------------------------------------------------------------------ -// AdvSIMD modified immediate instructions -//------------------------------------------------------------------------------ - -// AdvSIMD BIC -defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", ARM64bici>; -// AdvSIMD ORR -defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", ARM64orri>; - - -// AdvSIMD FMOV -def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8, - "fmov", ".2d", - [(set (v2f64 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>; -def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8, - "fmov", ".2s", - [(set (v2f32 V64:$Rd), (ARM64fmov imm0_255:$imm8))]>; -def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8, - "fmov", ".4s", - [(set (v4f32 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>; - -// AdvSIMD MOVI - -// EDIT byte mask: scalar -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", - [(set FPR64:$Rd, simdimmtype10:$imm8)]>; -// The movi_edit node has the immediate value already encoded, so we use -// a plain imm0_255 here. -def : Pat<(f64 (ARM64movi_edit imm0_255:$shift)), - (MOVID imm0_255:$shift)>; - -def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>; -def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>; -def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>; -def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>; - -def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>; -def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>; -def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>; -def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; - -// EDIT byte mask: 2d - -// The movi_edit node has the immediate value already encoded, so we use -// a plain imm0_255 in the pattern -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128, - simdimmtype10, - "movi", ".2d", - [(set (v2i64 V128:$Rd), (ARM64movi_edit imm0_255:$imm8))]>; - - -// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing. -// Complexity is added to break a tie with a plain MOVI. -let AddedComplexity = 1 in { -def : Pat<(f32 fpimm0), - (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>, - Requires<[HasZCZ]>; -def : Pat<(f64 fpimm0), - (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>, - Requires<[HasZCZ]>; -} - -def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; -def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; -def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; -def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; - -def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; - -// EDIT per word & halfword: 2s, 4h, 4s, & 8h -defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; -def : Pat<(v2i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v8i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; - -// EDIT per word: 2s & 4s with MSL shifter -def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", - [(set (v2i32 V64:$Rd), - (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; -def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", - [(set (v4i32 V128:$Rd), - (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; - -// Per byte: 8b & 16b -def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255, - "movi", ".8b", - [(set (v8i8 V64:$Rd), (ARM64movi imm0_255:$imm8))]>; -def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255, - "movi", ".16b", - [(set (v16i8 V128:$Rd), (ARM64movi imm0_255:$imm8))]>; - -// AdvSIMD MVNI - -// EDIT per word & halfword: 2s, 4h, 4s, & 8h -defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; -def : Pat<(v2i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v8i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; - -// EDIT per word: 2s & 4s with MSL shifter -def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", - [(set (v2i32 V64:$Rd), - (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; -def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", - [(set (v4i32 V128:$Rd), - (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; - -//---------------------------------------------------------------------------- -// AdvSIMD indexed element -//---------------------------------------------------------------------------- - -let neverHasSideEffects = 1 in { - defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">; - defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">; -} - -// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the -// instruction expects the addend first, while the intrinsic expects it last. - -// On the other hand, there are quite a few valid combinatorial options due to -// the commutativity of multiplication and the fact that (-x) * y = x * (-y). -defm : SIMDFPIndexedSDTiedPatterns<"FMLA", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; -defm : SIMDFPIndexedSDTiedPatterns<"FMLA", - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; - -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", - TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; - -multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { - // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit - // and DUP scalar. - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64duplane32 (v4f32 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (v2f32 (ARM64duplane32 - (v4f32 (insert_subvector undef, - (v2f32 (fneg V64:$Rm)), - (i32 0))), - VectorIndexS:$idx)))), - (FMLSv2i32_indexed V64:$Rd, V64:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), - VectorIndexS:$idx)>; - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64dup (f32 (fneg FPR32Op:$Rm))))), - (FMLSv2i32_indexed V64:$Rd, V64:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit - // and DUP scalar. - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64duplane32 (v4f32 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, - VectorIndexS:$idx)>; - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (v4f32 (ARM64duplane32 - (v4f32 (insert_subvector undef, - (v2f32 (fneg V64:$Rm)), - (i32 0))), - VectorIndexS:$idx)))), - (FMLSv4i32_indexed V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), - VectorIndexS:$idx)>; - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64dup (f32 (fneg FPR32Op:$Rm))))), - (FMLSv4i32_indexed V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar - // (DUPLANE from 64-bit would be trivial). - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64duplane64 (v2f64 (fneg V128:$Rm)), - VectorIndexD:$idx))), - (FMLSv2i64_indexed - V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64dup (f64 (fneg FPR64Op:$Rm))))), - (FMLSv2i64_indexed V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; - - // 2 variants for 32-bit scalar version: extract from .2s or from .4s - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v4f32 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, - V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v2f32 (fneg V64:$Rm)), - VectorIndexS:$idx))), - (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; - - // 1 variant for 64-bit scalar version: extract from .1d or from .2d - def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), - (vector_extract (v2f64 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, - V128:$Rm, VectorIndexS:$idx)>; -} - -defm : FMLSIndexedAfterNegPatterns< - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; -defm : FMLSIndexedAfterNegPatterns< - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; - -defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_arm64_neon_fmulx>; -defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>; - -def : Pat<(v2f32 (fmul V64:$Rn, (ARM64dup (f32 FPR32:$Rm)))), - (FMULv2i32_indexed V64:$Rn, - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), - (i64 0))>; -def : Pat<(v4f32 (fmul V128:$Rn, (ARM64dup (f32 FPR32:$Rm)))), - (FMULv4i32_indexed V128:$Rn, - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), - (i64 0))>; -def : Pat<(v2f64 (fmul V128:$Rn, (ARM64dup (f64 FPR64:$Rm)))), - (FMULv2i64_indexed V128:$Rn, - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), - (i64 0))>; - -defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_arm64_neon_sqdmulh>; -defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_arm64_neon_sqrdmulh>; -defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", - TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; -defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", - TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>; -defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; -defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; -defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; -defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", - int_arm64_neon_smull>; -defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", - int_arm64_neon_sqadd>; -defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", - int_arm64_neon_sqsub>; -defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_arm64_neon_sqdmull>; -defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; -defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; -defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", - int_arm64_neon_umull>; - -// A scalar sqdmull with the second operand being a vector lane can be -// handled directly with the indexed instruction encoding. -def : Pat<(int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn), - (vector_extract (v4i32 V128:$Vm), - VectorIndexS:$idx)), - (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; - -//---------------------------------------------------------------------------- -// AdvSIMD scalar shift instructions -//---------------------------------------------------------------------------- -defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">; -defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">; -defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">; -defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">; -// Codegen patterns for the above. We don't put these directly on the -// instructions because TableGen's type inference can't handle the truth. -// Having the same base pattern for fp <--> int totally freaks it out. -def : Pat<(int_arm64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), - (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(int_arm64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), - (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(i64 (int_arm64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), - (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(i64 (int_arm64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), - (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), - vecshiftR64:$imm)), - (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), - vecshiftR64:$imm)), - (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(int_arm64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), - (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(int_arm64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), - (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(f64 (int_arm64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), - (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(f64 (int_arm64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), - (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1f64 (int_arm64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), - vecshiftR64:$imm)), - (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1f64 (int_arm64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), - vecshiftR64:$imm)), - (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; - -defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", ARM64vshl>; -defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; -defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", - int_arm64_neon_sqrshrn>; -defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", - int_arm64_neon_sqrshrun>; -defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>; -defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>; -defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", - int_arm64_neon_sqshrn>; -defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", - int_arm64_neon_sqshrun>; -defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; -defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", ARM64srshri>; -defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", - TriOpFrag<(add node:$LHS, - (ARM64srshri node:$MHS, node:$RHS))>>; -defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", ARM64vashr>; -defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", - TriOpFrag<(add node:$LHS, - (ARM64vashr node:$MHS, node:$RHS))>>; -defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", - int_arm64_neon_uqrshrn>; -defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>; -defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", - int_arm64_neon_uqshrn>; -defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", ARM64urshri>; -defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", - TriOpFrag<(add node:$LHS, - (ARM64urshri node:$MHS, node:$RHS))>>; -defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", ARM64vlshr>; -defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", - TriOpFrag<(add node:$LHS, - (ARM64vlshr node:$MHS, node:$RHS))>>; - -//---------------------------------------------------------------------------- -// AdvSIMD vector shift instructions -//---------------------------------------------------------------------------- -defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_arm64_neon_vcvtfp2fxs>; -defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_arm64_neon_vcvtfp2fxu>; -defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf", - int_arm64_neon_vcvtfxs2fp>; -defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", - int_arm64_neon_rshrn>; -defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", ARM64vshl>; -defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", - BinOpFrag<(trunc (ARM64vashr node:$LHS, node:$RHS))>>; -defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_arm64_neon_vsli>; -def : Pat<(v1i64 (int_arm64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), - (i32 vecshiftL64:$imm))), - (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; -defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", - int_arm64_neon_sqrshrn>; -defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", - int_arm64_neon_sqrshrun>; -defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>; -defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>; -defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", - int_arm64_neon_sqshrn>; -defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", - int_arm64_neon_sqshrun>; -defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_arm64_neon_vsri>; -def : Pat<(v1i64 (int_arm64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), - (i32 vecshiftR64:$imm))), - (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; -defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", ARM64srshri>; -defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", - TriOpFrag<(add node:$LHS, - (ARM64srshri node:$MHS, node:$RHS))> >; -defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", - BinOpFrag<(ARM64vshl (sext node:$LHS), node:$RHS)>>; - -defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", ARM64vashr>; -defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", - TriOpFrag<(add node:$LHS, (ARM64vashr node:$MHS, node:$RHS))>>; -defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf", - int_arm64_neon_vcvtfxu2fp>; -defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", - int_arm64_neon_uqrshrn>; -defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>; -defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", - int_arm64_neon_uqshrn>; -defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", ARM64urshri>; -defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", - TriOpFrag<(add node:$LHS, - (ARM64urshri node:$MHS, node:$RHS))> >; -defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", - BinOpFrag<(ARM64vshl (zext node:$LHS), node:$RHS)>>; -defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", ARM64vlshr>; -defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", - TriOpFrag<(add node:$LHS, (ARM64vlshr node:$MHS, node:$RHS))> >; - -// SHRN patterns for when a logical right shift was used instead of arithmetic -// (the immediate guarantees no sign bits actually end up in the result so it -// doesn't matter). -def : Pat<(v8i8 (trunc (ARM64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), - (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; -def : Pat<(v4i16 (trunc (ARM64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), - (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; -def : Pat<(v2i32 (trunc (ARM64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), - (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; - -def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), - (trunc (ARM64vlshr (v8i16 V128:$Rn), - vecshiftR16Narrow:$imm)))), - (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR16Narrow:$imm)>; -def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), - (trunc (ARM64vlshr (v4i32 V128:$Rn), - vecshiftR32Narrow:$imm)))), - (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR32Narrow:$imm)>; -def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), - (trunc (ARM64vlshr (v2i64 V128:$Rn), - vecshiftR64Narrow:$imm)))), - (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR32Narrow:$imm)>; - -// Vector sign and zero extensions are implemented with SSHLL and USSHLL. -// Anyexts are implemented as zexts. -def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; -def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; -def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; -def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; -def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; -def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; -def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; -def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; -def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; -// Also match an extend from the upper half of a 128 bit source register. -def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), - (USHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), - (USHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), - (SSHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), - (USHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), - (USHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), - (SSHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), - (USHLLv4i32_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), - (USHLLv4i32_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), - (SSHLLv4i32_shift V128:$Rn, (i32 0))>; - -// Vector shift sxtl aliases -def : InstAlias<"sxtl.8h $dst, $src1", - (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl $dst.8h, $src1.8b", - (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl.4s $dst, $src1", - (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl $dst.4s, $src1.4h", - (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl.2d $dst, $src1", - (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl $dst.2d, $src1.2s", - (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; - -// Vector shift sxtl2 aliases -def : InstAlias<"sxtl2.8h $dst, $src1", - (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2 $dst.8h, $src1.16b", - (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2.4s $dst, $src1", - (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2 $dst.4s, $src1.8h", - (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2.2d $dst, $src1", - (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2 $dst.2d, $src1.4s", - (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; - -// Vector shift uxtl aliases -def : InstAlias<"uxtl.8h $dst, $src1", - (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl $dst.8h, $src1.8b", - (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl.4s $dst, $src1", - (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl $dst.4s, $src1.4h", - (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl.2d $dst, $src1", - (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl $dst.2d, $src1.2s", - (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; - -// Vector shift uxtl2 aliases -def : InstAlias<"uxtl2.8h $dst, $src1", - (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2 $dst.8h, $src1.16b", - (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2.4s $dst, $src1", - (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2 $dst.4s, $src1.8h", - (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2.2d $dst, $src1", - (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2 $dst.2d, $src1.4s", - (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; - -// If an integer is about to be converted to a floating point value, -// just load it on the floating point unit. -// These patterns are more complex because floating point loads do not -// support sign extension. -// The sign extension has to be explicitly added and is only supported for -// one step: byte-to-half, half-to-word, word-to-doubleword. -// SCVTF GPR -> FPR is 9 cycles. -// SCVTF FPR -> FPR is 4 cyclces. -// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. -// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR -// and still being faster. -// However, this is not good for code size. -// 8-bits -> float. 2 sizes step-up. -def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 ro_indexed8:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv8i8_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), - bsub), - 0), - dsub)), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 am_indexed8:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv8i8_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), - bsub), - 0), - dsub)), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 am_unscaled8:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv8i8_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURBi am_unscaled8:$addr), - bsub), - 0), - dsub)), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -// 16-bits -> float. 1 size step-up. -def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 ro_indexed16:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), - hsub), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_indexed16:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), - hsub), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), - hsub), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -// 32-bits to 32-bits are handled in target specific dag combine: -// performIntToFpCombine. -// 64-bits integer to 32-bits floating point, not possible with -// SCVTF on floating point registers (both source and destination -// must have the same size). - -// Here are the patterns for 8, 16, 32, and 64-bits to double. -// 8-bits -> double. 3 size step-up: give up. -// 16-bits -> double. 2 size step. -def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 ro_indexed16:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), - hsub), - 0), - dsub)), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 am_indexed16:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), - hsub), - 0), - dsub)), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), - hsub), - 0), - dsub)), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -// 32-bits -> double. 1 size step-up. -def : Pat <(f64 (sint_to_fp (i32 (load ro_indexed32:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), - ssub), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (load am_indexed32:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), - ssub), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (load am_unscaled32:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURSi am_unscaled32:$addr), - ssub), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -// 64-bits -> double are handled in target specific dag combine: -// performIntToFpCombine. - - -//---------------------------------------------------------------------------- -// AdvSIMD Load-Store Structure -//---------------------------------------------------------------------------- -defm LD1 : SIMDLd1Multiple<"ld1">; -defm LD2 : SIMDLd2Multiple<"ld2">; -defm LD3 : SIMDLd3Multiple<"ld3">; -defm LD4 : SIMDLd4Multiple<"ld4">; - -defm ST1 : SIMDSt1Multiple<"st1">; -defm ST2 : SIMDSt2Multiple<"st2">; -defm ST3 : SIMDSt3Multiple<"st3">; -defm ST4 : SIMDSt4Multiple<"st4">; - -class Ld1Pat<ValueType ty, Instruction INST> - : Pat<(ty (load am_simdnoindex:$vaddr)), (INST am_simdnoindex:$vaddr)>; - -def : Ld1Pat<v16i8, LD1Onev16b>; -def : Ld1Pat<v8i16, LD1Onev8h>; -def : Ld1Pat<v4i32, LD1Onev4s>; -def : Ld1Pat<v2i64, LD1Onev2d>; -def : Ld1Pat<v8i8, LD1Onev8b>; -def : Ld1Pat<v4i16, LD1Onev4h>; -def : Ld1Pat<v2i32, LD1Onev2s>; -def : Ld1Pat<v1i64, LD1Onev1d>; - -class St1Pat<ValueType ty, Instruction INST> - : Pat<(store ty:$Vt, am_simdnoindex:$vaddr), - (INST ty:$Vt, am_simdnoindex:$vaddr)>; - -def : St1Pat<v16i8, ST1Onev16b>; -def : St1Pat<v8i16, ST1Onev8h>; -def : St1Pat<v4i32, ST1Onev4s>; -def : St1Pat<v2i64, ST1Onev2d>; -def : St1Pat<v8i8, ST1Onev8b>; -def : St1Pat<v4i16, ST1Onev4h>; -def : St1Pat<v2i32, ST1Onev2s>; -def : St1Pat<v1i64, ST1Onev1d>; - -//--- -// Single-element -//--- - -defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; -defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; -defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; -defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; -let mayLoad = 1, neverHasSideEffects = 1 in { -defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; -defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; -defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; -defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; -defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; -defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; -defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; -defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; -defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; -defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; -defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; -defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; -defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; -defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; -defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; -defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; -} - -def : Pat<(v8i8 (ARM64dup (i32 (extloadi8 am_simdnoindex:$vaddr)))), - (LD1Rv8b am_simdnoindex:$vaddr)>; -def : Pat<(v16i8 (ARM64dup (i32 (extloadi8 am_simdnoindex:$vaddr)))), - (LD1Rv16b am_simdnoindex:$vaddr)>; -def : Pat<(v4i16 (ARM64dup (i32 (extloadi16 am_simdnoindex:$vaddr)))), - (LD1Rv4h am_simdnoindex:$vaddr)>; -def : Pat<(v8i16 (ARM64dup (i32 (extloadi16 am_simdnoindex:$vaddr)))), - (LD1Rv8h am_simdnoindex:$vaddr)>; -def : Pat<(v2i32 (ARM64dup (i32 (load am_simdnoindex:$vaddr)))), - (LD1Rv2s am_simdnoindex:$vaddr)>; -def : Pat<(v4i32 (ARM64dup (i32 (load am_simdnoindex:$vaddr)))), - (LD1Rv4s am_simdnoindex:$vaddr)>; -def : Pat<(v2i64 (ARM64dup (i64 (load am_simdnoindex:$vaddr)))), - (LD1Rv2d am_simdnoindex:$vaddr)>; -def : Pat<(v1i64 (ARM64dup (i64 (load am_simdnoindex:$vaddr)))), - (LD1Rv1d am_simdnoindex:$vaddr)>; -// Grab the floating point version too -def : Pat<(v2f32 (ARM64dup (f32 (load am_simdnoindex:$vaddr)))), - (LD1Rv2s am_simdnoindex:$vaddr)>; -def : Pat<(v4f32 (ARM64dup (f32 (load am_simdnoindex:$vaddr)))), - (LD1Rv4s am_simdnoindex:$vaddr)>; -def : Pat<(v2f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))), - (LD1Rv2d am_simdnoindex:$vaddr)>; -def : Pat<(v1f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))), - (LD1Rv1d am_simdnoindex:$vaddr)>; - -class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, - ValueType VTy, ValueType STy, Instruction LD1> - : Pat<(vector_insert (VTy VecListOne128:$Rd), - (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx), - (LD1 VecListOne128:$Rd, VecIndex:$idx, am_simdnoindex:$vaddr)>; - -def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; -def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; -def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; -def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; -def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; -def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; - -class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, - ValueType VTy, ValueType STy, Instruction LD1> - : Pat<(vector_insert (VTy VecListOne64:$Rd), - (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx), - (EXTRACT_SUBREG - (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), - VecIndex:$idx, am_simdnoindex:$vaddr), - dsub)>; - -def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; -def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; -def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; -def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; - - -defm LD1 : SIMDLdSt1SingleAliases<"ld1">; -defm LD2 : SIMDLdSt2SingleAliases<"ld2">; -defm LD3 : SIMDLdSt3SingleAliases<"ld3">; -defm LD4 : SIMDLdSt4SingleAliases<"ld4">; - -// Stores -defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; -defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; -defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; -defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; - -let AddedComplexity = 8 in -class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, - ValueType VTy, ValueType STy, Instruction ST1> - : Pat<(scalar_store - (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), - am_simdnoindex:$vaddr), - (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr)>; - -def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; -def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; -def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; -def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; -def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; -def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; - -let AddedComplexity = 8 in -class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, - ValueType VTy, ValueType STy, Instruction ST1> - : Pat<(scalar_store - (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), - am_simdnoindex:$vaddr), - (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), - VecIndex:$idx, am_simdnoindex:$vaddr)>; - -def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; -def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; -def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; -def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; - -let mayStore = 1, neverHasSideEffects = 1 in { -defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; -defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; -defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; -defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; -defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; -defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; -defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; -defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; -defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; -defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; -defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; -defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; -} - -defm ST1 : SIMDLdSt1SingleAliases<"st1">; -defm ST2 : SIMDLdSt2SingleAliases<"st2">; -defm ST3 : SIMDLdSt3SingleAliases<"st3">; -defm ST4 : SIMDLdSt4SingleAliases<"st4">; - -//---------------------------------------------------------------------------- -// Crypto extensions -//---------------------------------------------------------------------------- - -def AESErr : AESTiedInst<0b0100, "aese", int_arm64_crypto_aese>; -def AESDrr : AESTiedInst<0b0101, "aesd", int_arm64_crypto_aesd>; -def AESMCrr : AESInst< 0b0110, "aesmc", int_arm64_crypto_aesmc>; -def AESIMCrr : AESInst< 0b0111, "aesimc", int_arm64_crypto_aesimc>; - -def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_arm64_crypto_sha1c>; -def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_arm64_crypto_sha1p>; -def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_arm64_crypto_sha1m>; -def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_arm64_crypto_sha1su0>; -def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_arm64_crypto_sha256h>; -def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_arm64_crypto_sha256h2>; -def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_arm64_crypto_sha256su1>; - -def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_arm64_crypto_sha1h>; -def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_arm64_crypto_sha1su1>; -def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_arm64_crypto_sha256su0>; - -//---------------------------------------------------------------------------- -// Compiler-pseudos -//---------------------------------------------------------------------------- -// FIXME: Like for X86, these should go in their own separate .td file. - -// Any instruction that defines a 32-bit result leaves the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. But any other 32-bit operation will zero-extend -// up to 64 bits. -// FIXME: X86 also checks for CMOV here. Do we need something similar? -def def32 : PatLeaf<(i32 GPR32:$src), [{ - return N->getOpcode() != ISD::TRUNCATE && - N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && - N->getOpcode() != ISD::CopyFromReg; -}]>; - -// In the case of a 32-bit def that is known to implicitly zero-extend, -// we can use a SUBREG_TO_REG. -def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; - -// For an anyext, we don't care what the high bits are, so we can perform an -// INSERT_SUBREF into an IMPLICIT_DEF. -def : Pat<(i64 (anyext GPR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; - -// When we need to explicitly zero-extend, we use an unsigned bitfield move -// instruction (UBFM) on the enclosing super-reg. -def : Pat<(i64 (zext GPR32:$src)), - (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; - -// To sign extend, we use a signed bitfield move instruction (SBFM) on the -// containing super-reg. -def : Pat<(i64 (sext GPR32:$src)), - (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; -def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; -def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; -def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; - -def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), - (i64 (i32shift_sext_i8 imm0_31:$imm)))>; -def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), - (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i8 imm0_63:$imm)))>; - -def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), - (i64 (i32shift_sext_i16 imm0_31:$imm)))>; -def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), - (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i16 imm0_63:$imm)))>; - -def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), - (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), - (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i32 imm0_63:$imm)))>; - -// sra patterns have an AddedComplexity of 10, so make sure we have a higher -// AddedComplexity for the following patterns since we want to match sext + sra -// patterns before we attempt to match a single sra node. -let AddedComplexity = 20 in { -// We support all sext + sra combinations which preserve at least one bit of the -// original value which is to be sign extended. E.g. we support shifts up to -// bitwidth-1 bits. -def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), - (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; -def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), - (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; - -def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), - (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; -def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), - (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; - -def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), - (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), - (i64 imm0_31:$imm), 31)>; -} // AddedComplexity = 20 - -// To truncate, we can simply extract from a subregister. -def : Pat<(i32 (trunc GPR64sp:$src)), - (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; - -// __builtin_trap() uses the BRK instruction on ARM64. -def : Pat<(trap), (BRK 1)>; - -// Conversions within AdvSIMD types in the same register size are free. - -def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; - -def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; - -def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; - -def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; - -def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; - -def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; - - -def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; - -def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; - -def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; - -def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; - -def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; - -def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; -def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; -def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; -def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; - -// A 64-bit subvector insert to the first 128-bit vector position -// is a subregister copy that needs no instruction. -def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - -// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 -// or v2f32. -def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), - (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), - (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; -def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), - (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), - (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; - // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, - // so we match on v4f32 here, not v2f32. This will also catch adding - // the low two lanes of a true v4f32 vector. -def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), - (vector_extract (v4f32 FPR128:$Rn), (i64 1))), - (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; - -// Scalar 64-bit shifts in FPR64 registers. -def : Pat<(i64 (int_arm64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_arm64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_arm64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_arm64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; - -// Tail call return handling. These are all compiler pseudo-instructions, -// so no encoding information or anything like that. -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { - def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst), []>; - def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst), []>; -} - -def : Pat<(ARM64tcret tcGPR64:$dst), (TCRETURNri tcGPR64:$dst)>; -def : Pat<(ARM64tcret (i64 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>; -def : Pat<(ARM64tcret (i64 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>; - -include "ARM64InstrAtomics.td" diff --git a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp b/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp deleted file mode 100644 index c0031a4..0000000 --- a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp +++ /dev/null @@ -1,947 +0,0 @@ -//===-- ARM64LoadStoreOptimizer.cpp - ARM64 load/store opt. pass --*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that performs load / store related peephole -// optimizations. This pass should be run after register allocation. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-ldst-opt" -#include "ARM64InstrInfo.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -/// ARM64AllocLoadStoreOpt - Post-register allocation pass to combine -/// load / store instructions to form ldp / stp instructions. - -STATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); -STATISTIC(NumPostFolded, "Number of post-index updates folded"); -STATISTIC(NumPreFolded, "Number of pre-index updates folded"); -STATISTIC(NumUnscaledPairCreated, - "Number of load/store from unscaled generated"); - -static cl::opt<bool> DoLoadStoreOpt("arm64-load-store-opt", cl::init(true), - cl::Hidden); -static cl::opt<unsigned> ScanLimit("arm64-load-store-scan-limit", cl::init(20), - cl::Hidden); - -// Place holder while testing unscaled load/store combining -static cl::opt<bool> -EnableARM64UnscaledMemOp("arm64-unscaled-mem-op", cl::Hidden, - cl::desc("Allow ARM64 unscaled load/store combining"), - cl::init(true)); - -namespace { -struct ARM64LoadStoreOpt : public MachineFunctionPass { - static char ID; - ARM64LoadStoreOpt() : MachineFunctionPass(ID) {} - - const ARM64InstrInfo *TII; - const TargetRegisterInfo *TRI; - - // Scan the instructions looking for a load/store that can be combined - // with the current instruction into a load/store pair. - // Return the matching instruction if one is found, else MBB->end(). - // If a matching instruction is found, mergeForward is set to true if the - // merge is to remove the first instruction and replace the second with - // a pair-wise insn, and false if the reverse is true. - MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, - bool &mergeForward, - unsigned Limit); - // Merge the two instructions indicated into a single pair-wise instruction. - // If mergeForward is true, erase the first instruction and fold its - // operation into the second. If false, the reverse. Return the instruction - // following the first instruction (which may change during proecessing). - MachineBasicBlock::iterator - mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, bool mergeForward); - - // Scan the instruction list to find a base register update that can - // be combined with the current instruction (a load or store) using - // pre or post indexed addressing with writeback. Scan forwards. - MachineBasicBlock::iterator - findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, - int Value); - - // Scan the instruction list to find a base register update that can - // be combined with the current instruction (a load or store) using - // pre or post indexed addressing with writeback. Scan backwards. - MachineBasicBlock::iterator - findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); - - // Merge a pre-index base register update into a ld/st instruction. - MachineBasicBlock::iterator - mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update); - - // Merge a post-index base register update into a ld/st instruction. - MachineBasicBlock::iterator - mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update); - - bool optimizeBlock(MachineBasicBlock &MBB); - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual const char *getPassName() const { - return "ARM64 load / store optimization pass"; - } - -private: - int getMemSize(MachineInstr *MemMI); -}; -char ARM64LoadStoreOpt::ID = 0; -} - -static bool isUnscaledLdst(unsigned Opc) { - switch (Opc) { - default: - return false; - case ARM64::STURSi: - return true; - case ARM64::STURDi: - return true; - case ARM64::STURQi: - return true; - case ARM64::STURWi: - return true; - case ARM64::STURXi: - return true; - case ARM64::LDURSi: - return true; - case ARM64::LDURDi: - return true; - case ARM64::LDURQi: - return true; - case ARM64::LDURWi: - return true; - case ARM64::LDURXi: - return true; - } -} - -// Size in bytes of the data moved by an unscaled load or store -int ARM64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { - switch (MemMI->getOpcode()) { - default: - llvm_unreachable("Opcode has has unknown size!"); - case ARM64::STRSui: - case ARM64::STURSi: - return 4; - case ARM64::STRDui: - case ARM64::STURDi: - return 8; - case ARM64::STRQui: - case ARM64::STURQi: - return 16; - case ARM64::STRWui: - case ARM64::STURWi: - return 4; - case ARM64::STRXui: - case ARM64::STURXi: - return 8; - case ARM64::LDRSui: - case ARM64::LDURSi: - return 4; - case ARM64::LDRDui: - case ARM64::LDURDi: - return 8; - case ARM64::LDRQui: - case ARM64::LDURQi: - return 16; - case ARM64::LDRWui: - case ARM64::LDURWi: - return 4; - case ARM64::LDRXui: - case ARM64::LDURXi: - return 8; - } -} - -static unsigned getMatchingPairOpcode(unsigned Opc) { - switch (Opc) { - default: - llvm_unreachable("Opcode has no pairwise equivalent!"); - case ARM64::STRSui: - case ARM64::STURSi: - return ARM64::STPSi; - case ARM64::STRDui: - case ARM64::STURDi: - return ARM64::STPDi; - case ARM64::STRQui: - case ARM64::STURQi: - return ARM64::STPQi; - case ARM64::STRWui: - case ARM64::STURWi: - return ARM64::STPWi; - case ARM64::STRXui: - case ARM64::STURXi: - return ARM64::STPXi; - case ARM64::LDRSui: - case ARM64::LDURSi: - return ARM64::LDPSi; - case ARM64::LDRDui: - case ARM64::LDURDi: - return ARM64::LDPDi; - case ARM64::LDRQui: - case ARM64::LDURQi: - return ARM64::LDPQi; - case ARM64::LDRWui: - case ARM64::LDURWi: - return ARM64::LDPWi; - case ARM64::LDRXui: - case ARM64::LDURXi: - return ARM64::LDPXi; - } -} - -static unsigned getPreIndexedOpcode(unsigned Opc) { - switch (Opc) { - default: - llvm_unreachable("Opcode has no pre-indexed equivalent!"); - case ARM64::STRSui: return ARM64::STRSpre; - case ARM64::STRDui: return ARM64::STRDpre; - case ARM64::STRQui: return ARM64::STRQpre; - case ARM64::STRWui: return ARM64::STRWpre; - case ARM64::STRXui: return ARM64::STRXpre; - case ARM64::LDRSui: return ARM64::LDRSpre; - case ARM64::LDRDui: return ARM64::LDRDpre; - case ARM64::LDRQui: return ARM64::LDRQpre; - case ARM64::LDRWui: return ARM64::LDRWpre; - case ARM64::LDRXui: return ARM64::LDRXpre; - } -} - -static unsigned getPostIndexedOpcode(unsigned Opc) { - switch (Opc) { - default: - llvm_unreachable("Opcode has no post-indexed wise equivalent!"); - case ARM64::STRSui: - return ARM64::STRSpost; - case ARM64::STRDui: - return ARM64::STRDpost; - case ARM64::STRQui: - return ARM64::STRQpost; - case ARM64::STRWui: - return ARM64::STRWpost; - case ARM64::STRXui: - return ARM64::STRXpost; - case ARM64::LDRSui: - return ARM64::LDRSpost; - case ARM64::LDRDui: - return ARM64::LDRDpost; - case ARM64::LDRQui: - return ARM64::LDRQpost; - case ARM64::LDRWui: - return ARM64::LDRWpost; - case ARM64::LDRXui: - return ARM64::LDRXpost; - } -} - -MachineBasicBlock::iterator -ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, - bool mergeForward) { - MachineBasicBlock::iterator NextI = I; - ++NextI; - // If NextI is the second of the two instructions to be merged, we need - // to skip one further. Either way we merge will invalidate the iterator, - // and we don't need to scan the new instruction, as it's a pairwise - // instruction, which we're not considering for further action anyway. - if (NextI == Paired) - ++NextI; - - bool IsUnscaled = isUnscaledLdst(I->getOpcode()); - int OffsetStride = IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(I) : 1; - - unsigned NewOpc = getMatchingPairOpcode(I->getOpcode()); - // Insert our new paired instruction after whichever of the paired - // instructions mergeForward indicates. - MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I; - // Also based on mergeForward is from where we copy the base register operand - // so we get the flags compatible with the input code. - MachineOperand &BaseRegOp = - mergeForward ? Paired->getOperand(1) : I->getOperand(1); - - // Which register is Rt and which is Rt2 depends on the offset order. - MachineInstr *RtMI, *Rt2MI; - if (I->getOperand(2).getImm() == - Paired->getOperand(2).getImm() + OffsetStride) { - RtMI = Paired; - Rt2MI = I; - } else { - RtMI = I; - Rt2MI = Paired; - } - // Handle Unscaled - int OffsetImm = RtMI->getOperand(2).getImm(); - if (IsUnscaled && EnableARM64UnscaledMemOp) - OffsetImm /= OffsetStride; - - // Construct the new instruction. - MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, - I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(RtMI->getOperand(0)) - .addOperand(Rt2MI->getOperand(0)) - .addOperand(BaseRegOp) - .addImm(OffsetImm); - (void)MIB; - - // FIXME: Do we need/want to copy the mem operands from the source - // instructions? Probably. What uses them after this? - - DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(Paired->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions. - I->eraseFromParent(); - Paired->eraseFromParent(); - - return NextI; -} - -/// trackRegDefsUses - Remember what registers the specified instruction uses -/// and modifies. -static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs, - BitVector &UsedRegs, - const TargetRegisterInfo *TRI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isRegMask()) - ModifiedRegs.setBitsNotInMask(MO.getRegMask()); - - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (MO.isDef()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - ModifiedRegs.set(*AI); - } else { - assert(MO.isUse() && "Reg operand not a def and not a use?!?"); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedRegs.set(*AI); - } - } -} - -static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { - if (!IsUnscaled && (Offset > 63 || Offset < -64)) - return false; - if (IsUnscaled) { - // Convert the byte-offset used by unscaled into an "element" offset used - // by the scaled pair load/store instructions. - int elemOffset = Offset / OffsetStride; - if (elemOffset > 63 || elemOffset < -64) - return false; - } - return true; -} - -// Do alignment, specialized to power of 2 and for signed ints, -// avoiding having to do a C-style cast from uint_64t to int when -// using RoundUpToAlignment from include/llvm/Support/MathExtras.h. -// FIXME: Move this function to include/MathExtras.h? -static int alignTo(int Num, int PowOf2) { - return (Num + PowOf2 - 1) & ~(PowOf2 - 1); -} - -/// findMatchingInsn - Scan the instructions looking for a load/store that can -/// be combined with the current instruction into a load/store pair. -MachineBasicBlock::iterator -ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, - bool &mergeForward, unsigned Limit) { - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineBasicBlock::iterator MBBI = I; - MachineInstr *FirstMI = I; - ++MBBI; - - int Opc = FirstMI->getOpcode(); - bool mayLoad = FirstMI->mayLoad(); - bool IsUnscaled = isUnscaledLdst(Opc); - unsigned Reg = FirstMI->getOperand(0).getReg(); - unsigned BaseReg = FirstMI->getOperand(1).getReg(); - int Offset = FirstMI->getOperand(2).getImm(); - - // Early exit if the first instruction modifies the base register. - // e.g., ldr x0, [x0] - // Early exit if the offset if not possible to match. (6 bits of positive - // range, plus allow an extra one in case we find a later insn that matches - // with Offset-1 - if (FirstMI->modifiesRegister(BaseReg, TRI)) - return E; - int OffsetStride = - IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(FirstMI) : 1; - if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) - return E; - - // Track which registers have been modified and used between the first insn - // (inclusive) and the second insn. - BitVector ModifiedRegs, UsedRegs; - ModifiedRegs.resize(TRI->getNumRegs()); - UsedRegs.resize(TRI->getNumRegs()); - for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { - MachineInstr *MI = MBBI; - // Skip DBG_VALUE instructions. Otherwise debug info can affect the - // optimization by changing how far we scan. - if (MI->isDebugValue()) - continue; - - // Now that we know this is a real instruction, count it. - ++Count; - - if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) { - // If we've found another instruction with the same opcode, check to see - // if the base and offset are compatible with our starting instruction. - // These instructions all have scaled immediate operands, so we just - // check for +1/-1. Make sure to check the new instruction offset is - // actually an immediate and not a symbolic reference destined for - // a relocation. - // - // Pairwise instructions have a 7-bit signed offset field. Single insns - // have a 12-bit unsigned offset field. To be a valid combine, the - // final offset must be in range. - unsigned MIBaseReg = MI->getOperand(1).getReg(); - int MIOffset = MI->getOperand(2).getImm(); - if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || - (Offset + OffsetStride == MIOffset))) { - int MinOffset = Offset < MIOffset ? Offset : MIOffset; - // If this is a volatile load/store that otherwise matched, stop looking - // as something is going on that we don't have enough information to - // safely transform. Similarly, stop if we see a hint to avoid pairs. - if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) - return E; - // If the resultant immediate offset of merging these instructions - // is out of range for a pairwise instruction, bail and keep looking. - bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode()); - if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - continue; - } - // If the alignment requirements of the paired (scaled) instruction - // can't express the offset of the unscaled input, bail and keep - // looking. - if (IsUnscaled && EnableARM64UnscaledMemOp && - (alignTo(MinOffset, OffsetStride) != MinOffset)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - continue; - } - // If the destination register of the loads is the same register, bail - // and keep looking. A load-pair instruction with both destination - // registers the same is UNPREDICTABLE and will result in an exception. - if (mayLoad && Reg == MI->getOperand(0).getReg()) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - continue; - } - - // If the Rt of the second instruction was not modified or used between - // the two instructions, we can combine the second into the first. - if (!ModifiedRegs[MI->getOperand(0).getReg()] && - !UsedRegs[MI->getOperand(0).getReg()]) { - mergeForward = false; - return MBBI; - } - - // Likewise, if the Rt of the first instruction is not modified or used - // between the two instructions, we can combine the first into the - // second. - if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && - !UsedRegs[FirstMI->getOperand(0).getReg()]) { - mergeForward = true; - return MBBI; - } - // Unable to combine these instructions due to interference in between. - // Keep looking. - } - } - - // If the instruction wasn't a matching load or store, but does (or can) - // modify memory, stop searching, as we don't have alias analysis or - // anything like that to tell us whether the access is tromping on the - // locations we care about. The big one we want to catch is calls. - // - // FIXME: Theoretically, we can do better than that for SP and FP based - // references since we can effectively know where those are touching. It's - // unclear if it's worth the extra code, though. Most paired instructions - // will be sequential, perhaps with a few intervening non-memory related - // instructions. - if (MI->mayStore() || MI->isCall()) - return E; - // Likewise, if we're matching a store instruction, we don't want to - // move across a load, as it may be reading the same location. - if (FirstMI->mayStore() && MI->mayLoad()) - return E; - - // Update modified / uses register lists. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - - // Otherwise, if the base register is modified, we have no match, so - // return early. - if (ModifiedRegs[BaseReg]) - return E; - } - return E; -} - -MachineBasicBlock::iterator -ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update) { - assert((Update->getOpcode() == ARM64::ADDXri || - Update->getOpcode() == ARM64::SUBXri) && - "Unexpected base register update instruction to merge!"); - MachineBasicBlock::iterator NextI = I; - // Return the instruction following the merged instruction, which is - // the instruction following our unmerged load. Unless that's the add/sub - // instruction we're merging, in which case it's the one after that. - if (++NextI == Update) - ++NextI; - - int Value = Update->getOperand(2).getImm(); - assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && - "Can't merge 1 << 12 offset into pre-indexed load / store"); - if (Update->getOpcode() == ARM64::SUBXri) - Value = -Value; - - unsigned NewOpc = getPreIndexedOpcode(I->getOpcode()); - MachineInstrBuilder MIB = - BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(I->getOperand(0)) - .addOperand(I->getOperand(1)) - .addImm(Value); - (void)MIB; - - DEBUG(dbgs() << "Creating pre-indexed load/store."); - DEBUG(dbgs() << " Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(Update->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions for the block. - I->eraseFromParent(); - Update->eraseFromParent(); - - return NextI; -} - -MachineBasicBlock::iterator -ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update) { - assert((Update->getOpcode() == ARM64::ADDXri || - Update->getOpcode() == ARM64::SUBXri) && - "Unexpected base register update instruction to merge!"); - MachineBasicBlock::iterator NextI = I; - // Return the instruction following the merged instruction, which is - // the instruction following our unmerged load. Unless that's the add/sub - // instruction we're merging, in which case it's the one after that. - if (++NextI == Update) - ++NextI; - - int Value = Update->getOperand(2).getImm(); - assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && - "Can't merge 1 << 12 offset into post-indexed load / store"); - if (Update->getOpcode() == ARM64::SUBXri) - Value = -Value; - - unsigned NewOpc = getPostIndexedOpcode(I->getOpcode()); - MachineInstrBuilder MIB = - BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(I->getOperand(0)) - .addOperand(I->getOperand(1)) - .addImm(Value); - (void)MIB; - - DEBUG(dbgs() << "Creating post-indexed load/store."); - DEBUG(dbgs() << " Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(Update->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions for the block. - I->eraseFromParent(); - Update->eraseFromParent(); - - return NextI; -} - -static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, - int Offset) { - switch (MI->getOpcode()) { - default: - break; - case ARM64::SUBXri: - // Negate the offset for a SUB instruction. - Offset *= -1; - // FALLTHROUGH - case ARM64::ADDXri: - // Make sure it's a vanilla immediate operand, not a relocation or - // anything else we can't handle. - if (!MI->getOperand(2).isImm()) - break; - // Watch out for 1 << 12 shifted value. - if (ARM64_AM::getShiftValue(MI->getOperand(3).getImm())) - break; - // If the instruction has the base register as source and dest and the - // immediate will fit in a signed 9-bit integer, then we have a match. - if (MI->getOperand(0).getReg() == BaseReg && - MI->getOperand(1).getReg() == BaseReg && - MI->getOperand(2).getImm() <= 255 && - MI->getOperand(2).getImm() >= -256) { - // If we have a non-zero Offset, we check that it matches the amount - // we're adding to the register. - if (!Offset || Offset == MI->getOperand(2).getImm()) - return true; - } - break; - } - return false; -} - -MachineBasicBlock::iterator -ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, - unsigned Limit, int Value) { - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineInstr *MemMI = I; - MachineBasicBlock::iterator MBBI = I; - const MachineFunction &MF = *MemMI->getParent()->getParent(); - - unsigned DestReg = MemMI->getOperand(0).getReg(); - unsigned BaseReg = MemMI->getOperand(1).getReg(); - int Offset = MemMI->getOperand(2).getImm() * - TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); - - // If the base register overlaps the destination register, we can't - // merge the update. - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; - - // Scan forward looking for post-index opportunities. - // Updating instructions can't be formed if the memory insn already - // has an offset other than the value we're looking for. - if (Offset != Value) - return E; - - // Track which registers have been modified and used between the first insn - // (inclusive) and the second insn. - BitVector ModifiedRegs, UsedRegs; - ModifiedRegs.resize(TRI->getNumRegs()); - UsedRegs.resize(TRI->getNumRegs()); - ++MBBI; - for (unsigned Count = 0; MBBI != E; ++MBBI) { - MachineInstr *MI = MBBI; - // Skip DBG_VALUE instructions. Otherwise debug info can affect the - // optimization by changing how far we scan. - if (MI->isDebugValue()) - continue; - - // Now that we know this is a real instruction, count it. - ++Count; - - // If we found a match, return it. - if (isMatchingUpdateInsn(MI, BaseReg, Value)) - return MBBI; - - // Update the status of what the instruction clobbered and used. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - - // Otherwise, if the base register is used or modified, we have no match, so - // return early. - if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) - return E; - } - return E; -} - -MachineBasicBlock::iterator -ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, - unsigned Limit) { - MachineBasicBlock::iterator B = I->getParent()->begin(); - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineInstr *MemMI = I; - MachineBasicBlock::iterator MBBI = I; - const MachineFunction &MF = *MemMI->getParent()->getParent(); - - unsigned DestReg = MemMI->getOperand(0).getReg(); - unsigned BaseReg = MemMI->getOperand(1).getReg(); - int Offset = MemMI->getOperand(2).getImm(); - unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); - - // If the load/store is the first instruction in the block, there's obviously - // not any matching update. Ditto if the memory offset isn't zero. - if (MBBI == B || Offset != 0) - return E; - // If the base register overlaps the destination register, we can't - // merge the update. - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; - - // Track which registers have been modified and used between the first insn - // (inclusive) and the second insn. - BitVector ModifiedRegs, UsedRegs; - ModifiedRegs.resize(TRI->getNumRegs()); - UsedRegs.resize(TRI->getNumRegs()); - --MBBI; - for (unsigned Count = 0; MBBI != B; --MBBI) { - MachineInstr *MI = MBBI; - // Skip DBG_VALUE instructions. Otherwise debug info can affect the - // optimization by changing how far we scan. - if (MI->isDebugValue()) - continue; - - // Now that we know this is a real instruction, count it. - ++Count; - - // If we found a match, return it. - if (isMatchingUpdateInsn(MI, BaseReg, RegSize)) - return MBBI; - - // Update the status of what the instruction clobbered and used. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - - // Otherwise, if the base register is used or modified, we have no match, so - // return early. - if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) - return E; - } - return E; -} - -bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { - bool Modified = false; - // Two tranformations to do here: - // 1) Find loads and stores that can be merged into a single load or store - // pair instruction. - // e.g., - // ldr x0, [x2] - // ldr x1, [x2, #8] - // ; becomes - // ldp x0, x1, [x2] - // 2) Find base register updates that can be merged into the load or store - // as a base-reg writeback. - // e.g., - // ldr x0, [x2] - // add x2, x2, #4 - // ; becomes - // ldr x0, [x2], #4 - - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - MBBI != E;) { - MachineInstr *MI = MBBI; - switch (MI->getOpcode()) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - case ARM64::STRXui: - case ARM64::STRWui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - case ARM64::LDRXui: - case ARM64::LDRWui: - // do the unscaled versions as well - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURWi: - case ARM64::STURXi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURWi: - case ARM64::LDURXi: { - // If this is a volatile load/store, don't mess with it. - if (MI->hasOrderedMemoryRef()) { - ++MBBI; - break; - } - // Make sure this is a reg+imm (as opposed to an address reloc). - if (!MI->getOperand(2).isImm()) { - ++MBBI; - break; - } - // Check if this load/store has a hint to avoid pair formation. - // MachineMemOperands hints are set by the ARM64StorePairSuppress pass. - if (TII->isLdStPairSuppressed(MI)) { - ++MBBI; - break; - } - // Look ahead up to ScanLimit instructions for a pairable instruction. - bool mergeForward = false; - MachineBasicBlock::iterator Paired = - findMatchingInsn(MBBI, mergeForward, ScanLimit); - if (Paired != E) { - // Merge the loads into a pair. Keeping the iterator straight is a - // pain, so we let the merge routine tell us what the next instruction - // is after it's done mucking about. - MBBI = mergePairedInsns(MBBI, Paired, mergeForward); - - Modified = true; - ++NumPairCreated; - if (isUnscaledLdst(MI->getOpcode())) - ++NumUnscaledPairCreated; - break; - } - ++MBBI; - break; - } - // FIXME: Do the other instructions. - } - } - - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - MBBI != E;) { - MachineInstr *MI = MBBI; - // Do update merging. It's simpler to keep this separate from the above - // switch, though not strictly necessary. - int Opc = MI->getOpcode(); - switch (Opc) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - case ARM64::STRXui: - case ARM64::STRWui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - case ARM64::LDRXui: - case ARM64::LDRWui: - // do the unscaled versions as well - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURWi: - case ARM64::STURXi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURWi: - case ARM64::LDURXi: { - // Make sure this is a reg+imm (as opposed to an address reloc). - if (!MI->getOperand(2).isImm()) { - ++MBBI; - break; - } - // Look ahead up to ScanLimit instructions for a mergable instruction. - MachineBasicBlock::iterator Update = - findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergePostIdxUpdateInsn(MBBI, Update); - Modified = true; - ++NumPostFolded; - break; - } - // Don't know how to handle pre/post-index versions, so move to the next - // instruction. - if (isUnscaledLdst(Opc)) { - ++MBBI; - break; - } - - // Look back to try to find a pre-index instruction. For example, - // add x0, x0, #8 - // ldr x1, [x0] - // merged into: - // ldr x1, [x0, #8]! - Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergePreIdxUpdateInsn(MBBI, Update); - Modified = true; - ++NumPreFolded; - break; - } - - // Look forward to try to find a post-index instruction. For example, - // ldr x1, [x0, #64] - // add x0, x0, #64 - // merged into: - // ldr x1, [x0], #64 - - // The immediate in the load/store is scaled by the size of the register - // being loaded. The immediate in the add we're looking for, - // however, is not, so adjust here. - int Value = MI->getOperand(2).getImm() * - TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent())) - ->getSize(); - Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergePreIdxUpdateInsn(MBBI, Update); - Modified = true; - ++NumPreFolded; - break; - } - - // Nothing found. Just move to the next instruction. - ++MBBI; - break; - } - // FIXME: Do the other instructions. - } - } - - return Modified; -} - -bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - // Early exit if pass disabled. - if (!DoLoadStoreOpt) - return false; - - const TargetMachine &TM = Fn.getTarget(); - TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo()); - TRI = TM.getRegisterInfo(); - - bool Modified = false; - for (auto &MBB : Fn) - Modified |= optimizeBlock(MBB); - - return Modified; -} - -// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep -// loads and stores near one another? - -/// createARMLoadStoreOptimizationPass - returns an instance of the load / store -/// optimization pass. -FunctionPass *llvm::createARM64LoadStoreOptimizationPass() { - return new ARM64LoadStoreOpt(); -} diff --git a/lib/Target/ARM64/ARM64MCInstLower.cpp b/lib/Target/ARM64/ARM64MCInstLower.cpp deleted file mode 100644 index 01dc229..0000000 --- a/lib/Target/ARM64/ARM64MCInstLower.cpp +++ /dev/null @@ -1,201 +0,0 @@ -//===-- ARM64MCInstLower.cpp - Convert ARM64 MachineInstr to an MCInst---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains code to lower ARM64 MachineInstrs to their corresponding -// MCInst records. -// -//===----------------------------------------------------------------------===// - -#include "ARM64MCInstLower.h" -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/IR/Mangler.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/CodeGen.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -ARM64MCInstLower::ARM64MCInstLower(MCContext &ctx, Mangler &mang, - AsmPrinter &printer) - : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {} - -MCSymbol * -ARM64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { - return Printer.getSymbol(MO.getGlobal()); -} - -MCSymbol * -ARM64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const { - return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); -} - -MCOperand ARM64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, - MCSymbol *Sym) const { - // FIXME: We would like an efficient form for this, so we don't have to do a - // lot of extra uniquing. - MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; - if ((MO.getTargetFlags() & ARM64II::MO_GOT) != 0) { - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) - RefKind = MCSymbolRefExpr::VK_GOTPAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == - ARM64II::MO_PAGEOFF) - RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF; - else - assert(0 && "Unexpected target flags with MO_GOT on GV operand"); - } else if ((MO.getTargetFlags() & ARM64II::MO_TLS) != 0) { - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) - RefKind = MCSymbolRefExpr::VK_TLVPPAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == - ARM64II::MO_PAGEOFF) - RefKind = MCSymbolRefExpr::VK_TLVPPAGEOFF; - else - llvm_unreachable("Unexpected target flags with MO_TLS on GV operand"); - } else { - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) - RefKind = MCSymbolRefExpr::VK_PAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == - ARM64II::MO_PAGEOFF) - RefKind = MCSymbolRefExpr::VK_PAGEOFF; - } - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); - if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd( - Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); - return MCOperand::CreateExpr(Expr); -} - -MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, - MCSymbol *Sym) const { - uint32_t RefFlags = 0; - - if (MO.getTargetFlags() & ARM64II::MO_GOT) - RefFlags |= ARM64MCExpr::VK_GOT; - else if (MO.getTargetFlags() & ARM64II::MO_TLS) { - TLSModel::Model Model; - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - Model = Printer.TM.getTLSModel(GV); - } else { - assert(MO.isSymbol() && - StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" && - "unexpected external TLS symbol"); - Model = TLSModel::GeneralDynamic; - } - switch (Model) { - case TLSModel::InitialExec: - RefFlags |= ARM64MCExpr::VK_GOTTPREL; - break; - case TLSModel::LocalExec: - RefFlags |= ARM64MCExpr::VK_TPREL; - break; - case TLSModel::LocalDynamic: - RefFlags |= ARM64MCExpr::VK_DTPREL; - break; - case TLSModel::GeneralDynamic: - RefFlags |= ARM64MCExpr::VK_TLSDESC; - break; - } - } else { - // No modifier means this is a generic reference, classified as absolute for - // the cases where it matters (:abs_g0: etc). - RefFlags |= ARM64MCExpr::VK_ABS; - } - - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) - RefFlags |= ARM64MCExpr::VK_PAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGEOFF) - RefFlags |= ARM64MCExpr::VK_PAGEOFF; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G3) - RefFlags |= ARM64MCExpr::VK_G3; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G2) - RefFlags |= ARM64MCExpr::VK_G2; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G1) - RefFlags |= ARM64MCExpr::VK_G1; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G0) - RefFlags |= ARM64MCExpr::VK_G0; - - if (MO.getTargetFlags() & ARM64II::MO_NC) - RefFlags |= ARM64MCExpr::VK_NC; - - const MCExpr *Expr = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx); - if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd( - Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); - - ARM64MCExpr::VariantKind RefKind; - RefKind = static_cast<ARM64MCExpr::VariantKind>(RefFlags); - Expr = ARM64MCExpr::Create(Expr, RefKind, Ctx); - - return MCOperand::CreateExpr(Expr); -} - -MCOperand ARM64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, - MCSymbol *Sym) const { - if (TargetTriple.isOSDarwin()) - return lowerSymbolOperandDarwin(MO, Sym); - - assert(TargetTriple.isOSBinFormatELF() && "Expect Darwin or ELF target"); - return lowerSymbolOperandELF(MO, Sym); -} - -bool ARM64MCInstLower::lowerOperand(const MachineOperand &MO, - MCOperand &MCOp) const { - switch (MO.getType()) { - default: - assert(0 && "unknown operand type"); - case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) - return false; - MCOp = MCOperand::CreateReg(MO.getReg()); - break; - case MachineOperand::MO_RegisterMask: - // Regmasks are like implicit defs. - return false; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr( - MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx)); - break; - case MachineOperand::MO_GlobalAddress: - MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); - break; - case MachineOperand::MO_ExternalSymbol: - MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); - break; - case MachineOperand::MO_JumpTableIndex: - MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex())); - break; - case MachineOperand::MO_ConstantPoolIndex: - MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex())); - break; - case MachineOperand::MO_BlockAddress: - MCOp = LowerSymbolOperand( - MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress())); - break; - } - return true; -} - -void ARM64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { - OutMI.setOpcode(MI->getOpcode()); - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MCOperand MCOp; - if (lowerOperand(MI->getOperand(i), MCOp)) - OutMI.addOperand(MCOp); - } -} diff --git a/lib/Target/ARM64/ARM64MCInstLower.h b/lib/Target/ARM64/ARM64MCInstLower.h deleted file mode 100644 index 7e3a2c8..0000000 --- a/lib/Target/ARM64/ARM64MCInstLower.h +++ /dev/null @@ -1,52 +0,0 @@ -//===-- ARM64MCInstLower.h - Lower MachineInstr to MCInst ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64_MCINSTLOWER_H -#define ARM64_MCINSTLOWER_H - -#include "llvm/ADT/Triple.h" -#include "llvm/Support/Compiler.h" - -namespace llvm { -class AsmPrinter; -class MCAsmInfo; -class MCContext; -class MCInst; -class MCOperand; -class MCSymbol; -class MachineInstr; -class MachineModuleInfoMachO; -class MachineOperand; -class Mangler; - -/// ARM64MCInstLower - This class is used to lower an MachineInstr -/// into an MCInst. -class LLVM_LIBRARY_VISIBILITY ARM64MCInstLower { - MCContext &Ctx; - AsmPrinter &Printer; - Triple TargetTriple; - -public: - ARM64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer); - - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; - void Lower(const MachineInstr *MI, MCInst &OutMI) const; - - MCOperand lowerSymbolOperandDarwin(const MachineOperand &MO, - MCSymbol *Sym) const; - MCOperand lowerSymbolOperandELF(const MachineOperand &MO, - MCSymbol *Sym) const; - MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; - - MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; - MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; -}; -} - -#endif diff --git a/lib/Target/ARM64/ARM64MachineFunctionInfo.h b/lib/Target/ARM64/ARM64MachineFunctionInfo.h deleted file mode 100644 index 02bf7cf..0000000 --- a/lib/Target/ARM64/ARM64MachineFunctionInfo.h +++ /dev/null @@ -1,139 +0,0 @@ -//===- ARM64MachineFuctionInfo.h - ARM64 machine function info --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares ARM64-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64MACHINEFUNCTIONINFO_H -#define ARM64MACHINEFUNCTIONINFO_H - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/MC/MCLinkerOptimizationHint.h" - -namespace llvm { - -/// ARM64FunctionInfo - This class is derived from MachineFunctionInfo and -/// contains private ARM64-specific information for each MachineFunction. -class ARM64FunctionInfo : public MachineFunctionInfo { - - /// HasStackFrame - True if this function has a stack frame. Set by - /// processFunctionBeforeCalleeSavedScan(). - bool HasStackFrame; - - /// \brief Amount of stack frame size, not including callee-saved registers. - unsigned LocalStackSize; - - /// \brief Number of TLS accesses using the special (combinable) - /// _TLS_MODULE_BASE_ symbol. - unsigned NumLocalDynamicTLSAccesses; - - /// \brief FrameIndex for start of varargs area for arguments passed on the - /// stack. - int VarArgsStackIndex; - - /// \brief FrameIndex for start of varargs area for arguments passed in - /// general purpose registers. - int VarArgsGPRIndex; - - /// \brief Size of the varargs area for arguments passed in general purpose - /// registers. - unsigned VarArgsGPRSize; - - /// \brief FrameIndex for start of varargs area for arguments passed in - /// floating-point registers. - int VarArgsFPRIndex; - - /// \brief Size of the varargs area for arguments passed in floating-point - /// registers. - unsigned VarArgsFPRSize; - -public: - ARM64FunctionInfo() - : HasStackFrame(false), NumLocalDynamicTLSAccesses(0), - VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), - VarArgsFPRIndex(0), VarArgsFPRSize(0) {} - - explicit ARM64FunctionInfo(MachineFunction &MF) - : HasStackFrame(false), NumLocalDynamicTLSAccesses(0), - VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), - VarArgsFPRIndex(0), VarArgsFPRSize(0) { - (void)MF; - } - - bool hasStackFrame() const { return HasStackFrame; } - void setHasStackFrame(bool s) { HasStackFrame = s; } - - void setLocalStackSize(unsigned Size) { LocalStackSize = Size; } - unsigned getLocalStackSize() const { return LocalStackSize; } - - void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; } - unsigned getNumLocalDynamicTLSAccesses() const { - return NumLocalDynamicTLSAccesses; - } - - int getVarArgsStackIndex() const { return VarArgsStackIndex; } - void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; } - - int getVarArgsGPRIndex() const { return VarArgsGPRIndex; } - void setVarArgsGPRIndex(int Index) { VarArgsGPRIndex = Index; } - - unsigned getVarArgsGPRSize() const { return VarArgsGPRSize; } - void setVarArgsGPRSize(unsigned Size) { VarArgsGPRSize = Size; } - - int getVarArgsFPRIndex() const { return VarArgsFPRIndex; } - void setVarArgsFPRIndex(int Index) { VarArgsFPRIndex = Index; } - - unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; } - void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; } - - typedef SmallPtrSet<const MachineInstr *, 16> SetOfInstructions; - - const SetOfInstructions &getLOHRelated() const { return LOHRelated; } - - // Shortcuts for LOH related types. - class MILOHDirective { - MCLOHType Kind; - - /// Arguments of this directive. Order matters. - SmallVector<const MachineInstr *, 3> Args; - - public: - typedef SmallVectorImpl<const MachineInstr *> LOHArgs; - - MILOHDirective(MCLOHType Kind, const LOHArgs &Args) - : Kind(Kind), Args(Args.begin(), Args.end()) { - assert(isValidMCLOHType(Kind) && "Invalid LOH directive type!"); - } - - MCLOHType getKind() const { return Kind; } - const LOHArgs &getArgs() const { return Args; } - }; - - typedef MILOHDirective::LOHArgs MILOHArgs; - typedef SmallVector<MILOHDirective, 32> MILOHContainer; - - const MILOHContainer &getLOHContainer() const { return LOHContainerSet; } - - /// Add a LOH directive of this @p Kind and this @p Args. - void addLOHDirective(MCLOHType Kind, const MILOHArgs &Args) { - LOHContainerSet.push_back(MILOHDirective(Kind, Args)); - LOHRelated.insert(Args.begin(), Args.end()); - } - -private: - // Hold the lists of LOHs. - MILOHContainer LOHContainerSet; - SetOfInstructions LOHRelated; -}; -} // End llvm namespace - -#endif // ARM64MACHINEFUNCTIONINFO_H diff --git a/lib/Target/ARM64/ARM64PerfectShuffle.h b/lib/Target/ARM64/ARM64PerfectShuffle.h deleted file mode 100644 index 6759236..0000000 --- a/lib/Target/ARM64/ARM64PerfectShuffle.h +++ /dev/null @@ -1,6586 +0,0 @@ -//===-- ARM64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file, which was autogenerated by llvm-PerfectShuffle, contains data -// for the optimal way to build a perfect shuffle using AdvSIMD instructions. -// -//===----------------------------------------------------------------------===// - -// 31 entries have cost 0 -// 242 entries have cost 1 -// 1447 entries have cost 2 -// 3602 entries have cost 3 -// 1237 entries have cost 4 -// 2 entries have cost 5 - -// This table is 6561*4 = 26244 bytes in size. -static const unsigned PerfectShuffleTable[6561+1] = { - 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS - 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS - 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0> - 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0> - 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS - 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3> - 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3> - 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0> - 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS - 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0> - 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS - 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS - 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0> - 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5> - 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7> - 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1> - 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1> - 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS - 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0> - 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1> - 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS - 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0> - 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6> - 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6> - 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7> - 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2> - 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS - 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> - 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0> - 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0> - 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3> - 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6> - 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6> - 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7> - 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0> - 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> - 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1> - 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS - 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS - 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4> - 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6> - 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS - 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS - 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5> - 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS - 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7> - 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3> - 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7> - 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5> - 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6> - 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5> - 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7> - 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7> - 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7> - 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7> - 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS - 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3> - 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7> - 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS - 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0> - 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6> - 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0> - 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0> - 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> - 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0> - 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7> - 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0> - 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6> - 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0> - 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7> - 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7> - 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> - 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS - 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS - 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS - 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u> - 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS - 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS - 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS - 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u> - 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS - 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1> - 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS - 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1> - 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0> - 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5> - 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7> - 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1> - 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0> - 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS - 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1> - 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1> - 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0> - 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3> - 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS - 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7> - 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3> - 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1> - 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS - 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS - 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1> - 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2> - 835584U, // <0,1,2,3>: Cost 0 copy LHS - 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS - 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7> - 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7> - 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2> - 835584U, // <0,1,2,u>: Cost 0 copy LHS - 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0> - 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3> - 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0> - 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0> - 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS - 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7> - 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0> - 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1> - 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3> - 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS - 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1> - 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1> - 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4> - 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS - 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS - 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS - 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4> - 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS - 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1> - 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1> - 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0> - 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7> - 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6> - 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1> - 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1> - 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1> - 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7> - 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS - 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7> - 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1> - 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7> - 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS - 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7> - 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1> - 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1> - 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1> - 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0> - 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1> - 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0> - 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1> - 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6> - 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0> - 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0> - 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7> - 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2> - 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS - 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS - 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS - 835584U, // <0,1,u,3>: Cost 0 copy LHS - 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS - 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS - 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS - 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u> - 835584U, // <0,1,u,u>: Cost 0 copy LHS - 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0> - 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS - 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS - 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0> - 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6> - 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7> - 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7> - 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0> - 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS - 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2> - 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1> - 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2> - 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> - 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS - 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7> - 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7> - 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7> - 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2> - 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2> - 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2> - 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2> - 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3> - 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS - 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3> - 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7> - 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2> - 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS - 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2> - 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> - 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3> - 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> - 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6> - 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> - 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3> - 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0> - 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> - 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS - 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3> - 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4> - 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4> - 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS - 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS - 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS - 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4> - 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS - 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7> - 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3> - 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7> - 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6> - 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6> - 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5> - 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0> - 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS - 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS - 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1> - 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2> - 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3> - 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7> - 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5> - 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6> - 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6> - 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2> - 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7> - 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> - 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2> - 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2> - 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0> - 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6> - 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2> - 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2> - 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7> - 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> - 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u> - 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS - 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS - 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3> - 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS - 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS - 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS - 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS - 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS - 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0> - 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2> - 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0> - 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3> - 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS - 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6> - 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7> - 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0> - 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS - 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2> - 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1> - 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3> - 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3> - 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6> - 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6> - 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1> - 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3> - 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2> - 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS - 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2> - 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2> - 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3> - 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS - 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6> - 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3> - 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2> - 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS - 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2> - 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3> - 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3> - 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3> - 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6> - 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6> - 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7> - 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7> - 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3> - 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2> - 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4> - 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4> - 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3> - 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6> - 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6> - 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS - 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4> - 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6> - 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS - 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2> - 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2> - 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7> - 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5> - 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7> - 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7> - 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0> - 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5> - 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7> - 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3> - 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7> - 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7> - 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7> - 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6> - 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6> - 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0> - 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0> - 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1> - 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3> - 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7> - 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7> - 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5> - 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7> - 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7> - 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0> - 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3> - 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS - 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u> - 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2> - 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3> - 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS - 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6> - 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3> - 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0> - 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS - 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4> - 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS - 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4> - 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0> - 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6> - 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1> - 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS - 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0> - 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS - 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS - 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1> - 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0> - 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1> - 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS - 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS - 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS - 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1> - 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS - 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS - 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2> - 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2> - 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4> - 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS - 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS - 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS - 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2> - 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS - 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2> - 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2> - 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4> - 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3> - 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6> - 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6> - 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS - 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4> - 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2> - 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4> - 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0> - 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3> - 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4> - 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4> - 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS - 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS - 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4> - 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS - 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS - 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0> - 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5> - 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5> - 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS - 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6> - 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS - 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5> - 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS - 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6> - 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2> - 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6> - 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0> - 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6> - 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6> - 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0> - 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4> - 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2> - 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS - 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1> - 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4> - 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4> - 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS - 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> - 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0> - 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2> - 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> - 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS - 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS - 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS - 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u> - 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS - 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS - 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS - 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u> - 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS - 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0> - 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS - 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2> - 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5> - 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS - 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1> - 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1> - 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS - 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS - 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS - 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3> - 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2> - 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2> - 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1> - 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5> - 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0> - 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3> - 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS - 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS - 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2> - 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7> - 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2> - 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS - 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5> - 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6> - 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS - 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS - 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2> - 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3> - 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1> - 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3> - 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5> - 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0> - 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7> - 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0> - 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0> - 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1> - 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4> - 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4> - 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5> - 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6> - 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS - 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5> - 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6> - 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS - 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0> - 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0> - 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5> - 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0> - 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5> - 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5> - 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0> - 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7> - 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7> - 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS - 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0> - 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3> - 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4> - 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS - 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0> - 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7> - 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0> - 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0> - 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS - 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0> - 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7> - 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2> - 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS - 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7> - 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0> - 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0> - 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS - 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS - 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS - 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0> - 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u> - 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u> - 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS - 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u> - 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0> - 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0> - 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS - 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS - 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2> - 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4> - 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6> - 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0> - 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0> - 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS - 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS - 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS - 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1> - 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3> - 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3> - 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS - 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1> - 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6> - 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS - 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS - 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS - 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2> - 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2> - 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1> - 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2> - 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3> - 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6> - 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS - 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS - 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2> - 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7> - 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0> - 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3> - 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6> - 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7> - 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0> - 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0> - 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0> - 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS - 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2> - 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4> - 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2> - 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS - 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS - 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0> - 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS - 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS - 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> - 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0> - 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7> - 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0> - 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5> - 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0> - 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7> - 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS - 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> - 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0> - 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3> - 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6> - 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0> - 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4> - 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6> - 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6> - 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7> - 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7> - 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1> - 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0> - 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7> - 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0> - 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5> - 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6> - 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2> - 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0> - 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1> - 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS - 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS - 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u> - 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0> - 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u> - 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS - 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0> - 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS - 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS - 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0> - 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS - 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0> - 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0> - 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5> - 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6> - 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7> - 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7> - 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS - 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1> - 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1> - 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0> - 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5> - 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1> - 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3> - 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7> - 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7> - 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1> - 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS - 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2> - 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2> - 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0> - 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS - 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7> - 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2> - 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7> - 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2> - 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2> - 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3> - 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3> - 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3> - 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6> - 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7> - 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> - 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0> - 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7> - 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS - 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4> - 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4> - 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7> - 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6> - 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS - 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7> - 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5> - 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS - 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0> - 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7> - 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5> - 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7> - 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5> - 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7> - 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7> - 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0> - 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7> - 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0> - 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6> - 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7> - 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7> - 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS - 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7> - 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6> - 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0> - 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7> - 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1> - 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0> - 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7> - 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0> - 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS - 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7> - 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7> - 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7> - 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7> - 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u> - 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u> - 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u> - 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0> - 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u> - 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS - 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7> - 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0> - 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u> - 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS - 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS - 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS - 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2> - 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS - 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6> - 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS - 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0> - 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS - 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1> - 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS - 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS - 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> - 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS - 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS - 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7> - 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS - 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS - 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS - 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2> - 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS - 835584U, // <0,u,2,3>: Cost 0 copy LHS - 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS - 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6> - 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS - 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2> - 835584U, // <0,u,2,u>: Cost 0 copy LHS - 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2> - 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> - 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u> - 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> - 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6> - 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> - 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> - 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u> - 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> - 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS - 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS - 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS - 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4> - 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS - 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS - 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS - 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6> - 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS - 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS - 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0> - 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7> - 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7> - 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS - 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u> - 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS - 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS - 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS - 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS - 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6> - 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u> - 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7> - 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS - 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u> - 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u> - 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u> - 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u> - 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS - 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0> - 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7> - 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u> - 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS - 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6> - 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7> - 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7> - 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS - 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS - 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS - 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS - 835584U, // <0,u,u,3>: Cost 0 copy LHS - 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS - 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS - 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS - 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u> - 835584U, // <0,u,u,u>: Cost 0 copy LHS - 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0> - 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1> - 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2> - 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0> - 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1> - 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0> - 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7> - 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0> - 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1> - 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS - 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1> - 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS - 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3> - 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS - 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1> - 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7> - 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2> - 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS - 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1> - 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1> - 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0> - 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1> - 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6> - 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7> - 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0> - 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2> - 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1> - 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0> - 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1> - 67944550U, // <1,0,3,2>: Cost 1 vrev LHS - 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3> - 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS - 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7> - 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7> - 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3> - 68386972U, // <1,0,3,u>: Cost 1 vrev LHS - 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1> - 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5> - 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6> - 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1> - 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1> - 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS - 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1> - 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4> - 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS - 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0> - 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS - 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS - 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5> - 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5> - 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0> - 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0> - 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS - 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS - 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1> - 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7> - 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7> - 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6> - 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1> - 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0> - 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0> - 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0> - 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0> - 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0> - 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1> - 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7> - 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0> - 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6> - 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0> - 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0> - 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7> - 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0> - 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0> - 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1> - 67985515U, // <1,0,u,2>: Cost 1 vrev LHS - 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1> - 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6> - 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS - 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0> - 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u> - 68427937U, // <1,0,u,u>: Cost 1 vrev LHS - 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1> - 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS - 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1> - 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2> - 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5> - 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1> - 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7> - 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0> - 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1> - 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS - 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0> - 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3> - 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS - 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7> - 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7> - 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1> - 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS - 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2> - 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1> - 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2> - 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1> - 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS - 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7> - 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7> - 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0> - 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1> - 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2> - 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1> - 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2> - 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS - 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6> - 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7> - 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7> - 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3> - 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS - 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS - 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4> - 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0> - 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5> - 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS - 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS - 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS - 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4> - 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS - 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1> - 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3> - 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2> - 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7> - 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5> - 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5> - 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0> - 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS - 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7> - 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2> - 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7> - 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3> - 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7> - 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6> - 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5> - 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6> - 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0> - 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0> - 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1> - 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> - 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3> - 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS - 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6> - 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7> - 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0> - 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7> - 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> - 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS - 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3> - 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS - 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS - 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS - 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7> - 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS - 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS - 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0> - 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS - 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2> - 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1> - 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5> - 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7> - 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2> - 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1> - 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS - 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2> - 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1> - 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0> - 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS - 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS - 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7> - 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7> - 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0> - 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS - 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2> - 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2> - 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2> - 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3> - 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5> - 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7> - 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7> - 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1> - 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3> - 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS - 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> - 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3> - 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS - 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> - 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> - 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS - 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2> - 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6> - 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4> - 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2> - 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4> - 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS - 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6> - 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0> - 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS - 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS - 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7> - 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2> - 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS - 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS - 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5> - 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0> - 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7> - 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS - 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1> - 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2> - 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3> - 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7> - 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5> - 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7> - 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6> - 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2> - 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7> - 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2> - 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2> - 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3> - 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1> - 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6> - 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0> - 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1> - 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1> - 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2> - 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS - 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2> - 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2> - 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2> - 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS - 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS - 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> - 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS - 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0> - 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS - 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2> - 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3> - 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5> - 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6> - 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7> - 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1> - 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS - 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2> - 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1> - 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3> - 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS - 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS - 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7> - 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7> - 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1> - 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS - 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> - 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3> - 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> - 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> - 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> - 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5> - 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> - 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3> - 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> - 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS - 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3> - 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2> - 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3> - 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS - 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5> - 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3> - 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7> - 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS - 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS - 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3> - 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3> - 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4> - 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS - 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS - 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6> - 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4> - 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS - 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS - 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7> - 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5> - 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5> - 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS - 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5> - 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4> - 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS - 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS - 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0> - 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1> - 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3> - 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7> - 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3> - 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5> - 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3> - 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> - 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> - 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1> - 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3> - 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7> - 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3> - 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6> - 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5> - 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1> - 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7> - 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1> - 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS - 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u> - 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2> - 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS - 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS - 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS - 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6> - 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS - 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS - 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4> - 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS - 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4> - 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> - 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5> - 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1> - 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2> - 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1> - 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1> - 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2> - 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4> - 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0> - 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6> - 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1> - 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS - 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS - 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0> - 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS - 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2> - 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3> - 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2> - 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1> - 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4> - 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS - 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7> - 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2> - 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS - 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS - 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4> - 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3> - 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3> - 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6> - 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5> - 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6> - 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3> - 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u> - 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1> - 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4> - 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4> - 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4> - 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4> - 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS - 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6> - 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1> - 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS - 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS - 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5> - 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1> - 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2> - 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS - 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS - 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS - 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS - 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS - 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS - 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1> - 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2> - 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2> - 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS - 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7> - 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7> - 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> - 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS - 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4> - 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1> - 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4> - 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4> - 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS - 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0> - 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1> - 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2> - 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4> - 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS - 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS - 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u> - 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1> - 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6> - 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1> - 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS - 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> - 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS - 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0> - 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS - 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5> - 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4> - 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> - 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5> - 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1> - 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1> - 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS - 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2> - 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1> - 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0> - 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7> - 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5> - 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1> - 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7> - 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS - 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3> - 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1> - 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5> - 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2> - 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1> - 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5> - 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7> - 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7> - 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS - 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1> - 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2> - 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7> - 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2> - 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3> - 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6> - 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5> - 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6> - 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS - 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS - 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1> - 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> - 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3> - 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4> - 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4> - 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS - 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS - 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6> - 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS - 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS - 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5> - 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5> - 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3> - 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS - 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5> - 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0> - 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7> - 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS - 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1> - 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5> - 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3> - 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4> - 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6> - 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6> - 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6> - 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0> - 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0> - 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS - 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1> - 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1> - 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7> - 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS - 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7> - 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0> - 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1> - 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS - 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2> - 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS - 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3> - 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1> - 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5> - 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS - 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7> - 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS - 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS - 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0> - 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS - 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6> - 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1> - 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5> - 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6> - 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6> - 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS - 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS - 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2> - 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1> - 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0> - 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3> - 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6> - 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7> - 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1> - 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS - 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS - 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1> - 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0> - 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2> - 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1> - 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS - 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7> - 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7> - 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS - 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS - 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS - 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1> - 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3> - 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3> - 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS - 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5> - 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3> - 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7> - 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u> - 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1> - 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0> - 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4> - 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4> - 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5> - 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS - 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS - 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS - 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS - 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2> - 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5> - 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5> - 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5> - 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6> - 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5> - 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0> - 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS - 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS - 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1> - 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6> - 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7> - 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0> - 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS - 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7> - 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6> - 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7> - 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7> - 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1> - 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1> - 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3> - 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7> - 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5> - 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1> - 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7> - 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS - 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1> - 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1> - 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS - 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2> - 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1> - 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5> - 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS - 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7> - 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7> - 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1> - 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1> - 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS - 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1> - 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1> - 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1> - 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0> - 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7> - 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7> - 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS - 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS - 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1> - 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0> - 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1> - 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS - 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7> - 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7> - 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1> - 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7> - 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS - 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7> - 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2> - 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1> - 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS - 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7> - 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2> - 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2> - 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS - 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS - 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7> - 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2> - 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3> - 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS - 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3> - 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3> - 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2> - 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS - 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1> - 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1> - 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4> - 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4> - 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4> - 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS - 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0> - 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6> - 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS - 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS - 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7> - 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2> - 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5> - 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS - 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6> - 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7> - 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS - 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS - 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1> - 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0> - 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7> - 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6> - 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS - 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6> - 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6> - 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0> - 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0> - 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1> - 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1> - 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1> - 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0> - 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS - 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7> - 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7> - 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7> - 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1> - 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS - 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS - 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2> - 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u> - 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS - 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u> - 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3> - 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2> - 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS - 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u> - 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS - 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2> - 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u> - 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u> - 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1> - 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2> - 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1> - 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS - 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS - 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS - 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS - 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS - 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7> - 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS - 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS - 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS - 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> - 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS - 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> - 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> - 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> - 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS - 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> - 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS - 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> - 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS - 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 115726126U, // <1,u,3,2>: Cost 1 vrev LHS - 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS - 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS - 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3> - 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS - 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS - 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1> - 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0> - 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4> - 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4> - 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS - 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS - 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6> - 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6> - 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS - 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS - 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7> - 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS - 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS - 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS - 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS - 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS - 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS - 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS - 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u> - 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0> - 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3> - 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7> - 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4> - 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7> - 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u> - 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> - 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> - 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u> - 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1> - 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1> - 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7> - 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS - 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u> - 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7> - 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7> - 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u> - 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS - 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS - 115767091U, // <1,u,u,2>: Cost 1 vrev LHS - 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS - 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS - 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS - 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS - 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS - 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS - 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0> - 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1> - 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2> - 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0> - 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS - 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5> - 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0> - 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7> - 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2> - 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1> - 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0> - 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS - 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS - 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS - 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7> - 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1> - 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2> - 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS - 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS - 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2> - 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0> - 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2> - 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS - 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3> - 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2> - 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2> - 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS - 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0> - 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1> - 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2> - 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3> - 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS - 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5> - 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6> - 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7> - 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u> - 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS - 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5> - 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6> - 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2> - 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS - 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS - 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS - 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5> - 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6> - 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS - 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5> - 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7> - 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5> - 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5> - 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5> - 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0> - 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS - 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5> - 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0> - 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS - 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7> - 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6> - 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6> - 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0> - 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6> - 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7> - 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6> - 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> - 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3> - 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1> - 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0> - 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6> - 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2> - 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0> - 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7> - 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> - 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS - 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1> - 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS - 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2> - 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS - 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS - 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS - 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS - 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS - 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS - 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS - 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0> - 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2> - 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS - 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2> - 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0> - 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1> - 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2> - 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS - 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1> - 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0> - 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3> - 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS - 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7> - 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1> - 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2> - 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3> - 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS - 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1> - 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2> - 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0> - 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS - 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3> - 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7> - 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0> - 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0> - 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS - 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1> - 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2> - 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3> - 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS - 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5> - 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> - 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7> - 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u> - 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5> - 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6> - 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5> - 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4> - 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS - 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS - 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS - 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6> - 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4> - 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS - 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7> - 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1> - 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7> - 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS - 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5> - 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5> - 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS - 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7> - 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS - 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2> - 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7> - 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS - 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS - 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5> - 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6> - 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1> - 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS - 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2> - 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1> - 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0> - 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7> - 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6> - 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7> - 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1> - 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u> - 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2> - 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS - 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1> - 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2> - 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u> - 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS - 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5> - 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> - 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1> - 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u> - 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2> - 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS - 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0> - 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2> - 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS - 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7> - 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6> - 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2> - 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2> - 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2> - 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1> - 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0> - 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS - 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS - 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7> - 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3> - 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1> - 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS - 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS - 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2> - 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS - 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3> - 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS - 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7> - 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7> - 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2> - 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS - 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1> - 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0> - 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2> - 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS - 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5> - 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5> - 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6> - 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3> - 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS - 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS - 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2> - 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5> - 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5> - 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS - 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS - 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS - 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4> - 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS - 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2> - 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3> - 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7> - 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS - 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5> - 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5> - 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0> - 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS - 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS - 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6> - 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3> - 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3> - 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7> - 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS - 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7> - 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6> - 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2> - 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7> - 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1> - 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5> - 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2> - 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS - 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6> - 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7> - 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7> - 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7> - 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1> - 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS - 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS - 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS - 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS - 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS - 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS - 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6> - 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS - 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS - 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> - 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS - 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3> - 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7> - 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7> - 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0> - 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS - 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> - 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> - 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS - 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7> - 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0> - 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> - 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2> - 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1> - 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> - 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1> - 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5> - 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7> - 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1> - 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1> - 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2> - 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3> - 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3> - 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3> - 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6> - 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7> - 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3> - 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7> - 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2> - 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS - 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4> - 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0> - 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2> - 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS - 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS - 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6> - 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4> - 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS - 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS - 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3> - 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4> - 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6> - 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> - 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> - 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> - 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7> - 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7> - 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2> - 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6> - 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3> - 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5> - 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6> - 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7> - 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> - 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1> - 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1> - 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2> - 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3> - 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3> - 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1> - 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6> - 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2> - 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1> - 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> - 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2> - 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2> - 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS - 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0> - 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1> - 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6> - 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS - 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7> - 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1> - 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS - 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4> - 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS - 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4> - 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4> - 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2> - 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1> - 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> - 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1> - 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> - 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2> - 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1> - 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4> - 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS - 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4> - 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7> - 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3> - 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1> - 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS - 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4> - 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3> - 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4> - 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5> - 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4> - 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS - 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS - 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0> - 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS - 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4> - 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1> - 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4> - 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3> - 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4> - 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5> - 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6> - 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7> - 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u> - 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2> - 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1> - 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2> - 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3> - 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4> - 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS - 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS - 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7> - 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS - 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5> - 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2> - 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5> - 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5> - 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS - 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5> - 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS - 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS - 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS - 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS - 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2> - 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4> - 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2> - 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS - 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS - 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6> - 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1> - 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS - 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> - 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2> - 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4> - 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4> - 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6> - 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0> - 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1> - 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7> - 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> - 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS - 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS - 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2> - 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2> - 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS - 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5> - 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS - 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS - 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS - 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0> - 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS - 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2> - 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5> - 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5> - 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5> - 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7> - 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS - 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS - 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2> - 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1> - 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0> - 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7> - 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5> - 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7> - 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2> - 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3> - 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5> - 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2> - 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1> - 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2> - 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1> - 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5> - 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1> - 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7> - 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS - 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS - 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS - 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3> - 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5> - 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3> - 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6> - 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3> - 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6> - 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7> - 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u> - 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS - 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4> - 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5> - 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4> - 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS - 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS - 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6> - 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS - 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS - 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS - 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7> - 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5> - 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5> - 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS - 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5> - 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6> - 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7> - 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7> - 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS - 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6> - 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3> - 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6> - 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5> - 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7> - 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6> - 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS - 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS - 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS - 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2> - 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7> - 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2> - 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS - 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7> - 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2> - 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS - 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS - 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS - 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS - 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u> - 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u> - 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6> - 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS - 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6> - 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7> - 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS - 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0> - 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS - 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2> - 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0> - 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6> - 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6> - 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1> - 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2> - 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS - 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2> - 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1> - 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0> - 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS - 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6> - 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7> - 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7> - 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2> - 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS - 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1> - 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3> - 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2> - 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1> - 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6> - 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7> - 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6> - 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS - 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1> - 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2> - 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1> - 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7> - 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3> - 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6> - 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5> - 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6> - 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS - 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS - 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2> - 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3> - 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0> - 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4> - 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4> - 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS - 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6> - 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS - 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS - 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2> - 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3> - 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7> - 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5> - 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5> - 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5> - 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0> - 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS - 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS - 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS - 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1> - 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6> - 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3> - 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS - 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5> - 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6> - 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS - 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS - 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1> - 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2> - 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2> - 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0> - 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5> - 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7> - 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2> - 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0> - 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1> - 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2> - 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS - 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3> - 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS - 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6> - 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS - 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7> - 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS - 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS - 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2> - 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2> - 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2> - 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0> - 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2> - 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7> - 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7> - 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2> - 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2> - 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> - 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1> - 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0> - 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7> - 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5> - 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7> - 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7> - 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0> - 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> - 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2> - 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7> - 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2> - 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1> - 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6> - 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7> - 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7> - 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1> - 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7> - 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS - 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7> - 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2> - 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3> - 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS - 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3> - 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3> - 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2> - 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS - 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6> - 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4> - 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7> - 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4> - 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4> - 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS - 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u> - 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0> - 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS - 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2> - 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7> - 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7> - 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5> - 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS - 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5> - 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7> - 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6> - 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS - 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS - 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7> - 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3> - 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6> - 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS - 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6> - 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6> - 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0> - 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS - 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1> - 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2> - 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7> - 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7> - 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS - 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5> - 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7> - 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7> - 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1> - 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS - 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2> - 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2> - 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u> - 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS - 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS - 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u> - 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2> - 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS - 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0> - 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS - 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2> - 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7> - 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2> - 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS - 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS - 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1> - 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0> - 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5> - 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7> - 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3> - 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3> - 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS - 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1> - 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS - 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1> - 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS - 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6> - 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS - 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS - 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2> - 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1> - 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3> - 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS - 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6> - 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5> - 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3> - 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS - 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS - 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS - 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4> - 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5> - 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4> - 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS - 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS - 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6> - 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS - 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS - 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5> - 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3> - 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5> - 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7> - 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6> - 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5> - 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS - 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7> - 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS - 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS - 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2> - 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3> - 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7> - 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS - 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS - 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6> - 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1> - 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS - 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2> - 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u> - 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7> - 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1> - 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6> - 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2> - 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7> - 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7> - 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2> - 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS - 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS - 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS - 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS - 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS - 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS - 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS - 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS - 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS - 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0> - 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> - 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> - 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1> - 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1> - 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7> - 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1> - 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0> - 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2> - 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS - 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0> - 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS - 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3> - 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS - 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7> - 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7> - 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1> - 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS - 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> - 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> - 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0> - 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1> - 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> - 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7> - 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7> - 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7> - 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> - 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2> - 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0> - 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1> - 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3> - 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6> - 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7> - 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7> - 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1> - 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3> - 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4> - 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> - 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> - 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1> - 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6> - 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS - 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6> - 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4> - 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> - 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7> - 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3> - 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS - 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6> - 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6> - 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5> - 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7> - 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7> - 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS - 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7> - 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7> - 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7> - 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2> - 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1> - 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0> - 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6> - 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0> - 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7> - 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2> - 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7> - 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0> - 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7> - 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6> - 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7> - 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3> - 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7> - 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0> - 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2> - 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> - 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS - 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1> - 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> - 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS - 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7> - 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u> - 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS - 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS - 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS - 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1> - 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> - 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS - 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1> - 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6> - 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0> - 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> - 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1> - 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1> - 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1> - 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> - 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5> - 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> - 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5> - 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3> - 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3> - 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1> - 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3> - 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2> - 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0> - 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS - 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3> - 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7> - 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0> - 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0> - 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS - 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> - 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> - 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1> - 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS - 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> - 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> - 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3> - 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> - 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS - 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5> - 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5> - 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5> - 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS - 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS - 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS - 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4> - 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5> - 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1> - 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7> - 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5> - 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> - 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5> - 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7> - 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0> - 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS - 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> - 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1> - 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> - 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7> - 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7> - 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5> - 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> - 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7> - 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0> - 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7> - 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS - 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1> - 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2> - 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS - 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS - 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7> - 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7> - 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7> - 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS - 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS - 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3> - 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0> - 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0> - 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS - 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> - 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7> - 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS - 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3> - 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0> - 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS - 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0> - 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0> - 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5> - 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7> - 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> - 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0> - 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS - 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2> - 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1> - 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0> - 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> - 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS - 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7> - 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> - 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1> - 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1> - 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1> - 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3> - 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2> - 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> - 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5> - 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> - 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6> - 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3> - 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3> - 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1> - 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0> - 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2> - 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3> - 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5> - 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1> - 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2> - 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0> - 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1> - 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS - 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4> - 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4> - 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5> - 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS - 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS - 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0> - 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4> - 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS - 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS - 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5> - 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> - 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> - 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5> - 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5> - 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7> - 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7> - 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5> - 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1> - 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3> - 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6> - 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> - 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5> - 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> - 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> - 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1> - 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> - 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2> - 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3> - 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2> - 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS - 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6> - 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7> - 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7> - 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7> - 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS - 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1> - 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS - 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2> - 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> - 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5> - 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS - 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0> - 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1> - 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1> - 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0> - 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2> - 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0> - 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2> - 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1> - 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2> - 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2> - 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7> - 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2> - 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3> - 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3> - 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3> - 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1> - 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS - 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3> - 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3> - 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3> - 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3> - 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS - 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3> - 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3> - 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0> - 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS - 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4> - 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3> - 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3> - 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3> - 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS - 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3> - 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3> - 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS - 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS - 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5> - 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7> - 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3> - 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS - 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS - 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4> - 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4> - 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6> - 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4> - 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6> - 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS - 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7> - 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6> - 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS - 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5> - 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5> - 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5> - 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS - 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5> - 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0> - 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS - 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS - 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7> - 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7> - 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7> - 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7> - 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7> - 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7> - 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6> - 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3> - 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3> - 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS - 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7> - 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7> - 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3> - 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS - 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7> - 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3> - 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7> - 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS - 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS - 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2> - 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3> - 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS - 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS - 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6> - 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3> - 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS - 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS - 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0> - 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS - 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2> - 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4> - 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5> - 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1> - 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2> - 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0> - 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS - 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2> - 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1> - 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4> - 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3> - 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS - 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0> - 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3> - 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4> - 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4> - 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS - 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3> - 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2> - 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1> - 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3> - 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3> - 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0> - 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4> - 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0> - 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2> - 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4> - 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4> - 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3> - 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1> - 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS - 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS - 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1> - 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2> - 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS - 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4> - 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4> - 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4> - 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4> - 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS - 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6> - 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4> - 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS - 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS - 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3> - 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5> - 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2> - 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS - 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7> - 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS - 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5> - 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS - 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1> - 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6> - 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3> - 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6> - 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6> - 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> - 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7> - 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4> - 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2> - 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2> - 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5> - 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4> - 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7> - 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6> - 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0> - 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0> - 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7> - 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4> - 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS - 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS - 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u> - 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1> - 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS - 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS - 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS - 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u> - 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS - 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0> - 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS - 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5> - 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4> - 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1> - 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> - 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1> - 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0> - 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS - 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS - 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1> - 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5> - 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5> - 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5> - 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3> - 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7> - 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3> - 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3> - 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3> - 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5> - 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2> - 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5> - 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5> - 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3> - 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7> - 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3> - 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5> - 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2> - 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5> - 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4> - 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3> - 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6> - 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5> - 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6> - 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS - 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS - 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS - 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5> - 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4> - 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0> - 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS - 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS - 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS - 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6> - 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6> - 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS - 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3> - 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5> - 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5> - 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS - 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5> - 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6> - 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7> - 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7> - 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1> - 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7> - 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6> - 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4> - 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5> - 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7> - 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7> - 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0> - 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0> - 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS - 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7> - 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2> - 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2> - 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS - 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7> - 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0> - 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7> - 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS - 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS - 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u> - 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2> - 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2> - 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS - 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7> - 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS - 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3> - 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS - 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS - 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2> - 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4> - 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4> - 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2> - 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7> - 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0> - 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS - 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2> - 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3> - 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1> - 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6> - 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1> - 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6> - 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3> - 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3> - 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3> - 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6> - 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS - 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3> - 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6> - 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0> - 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6> - 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6> - 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3> - 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3> - 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3> - 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2> - 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3> - 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3> - 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3> - 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6> - 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6> - 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2> - 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS - 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS - 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6> - 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3> - 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5> - 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6> - 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6> - 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6> - 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0> - 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS - 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6> - 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS - 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2> - 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7> - 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> - 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5> - 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6> - 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6> - 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5> - 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS - 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1> - 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3> - 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3> - 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6> - 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4> - 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7> - 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7> - 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7> - 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1> - 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7> - 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7> - 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3> - 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5> - 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1> - 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2> - 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS - 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1> - 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1> - 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2> - 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u> - 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6> - 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5> - 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6> - 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3> - 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1> - 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0> - 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS - 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2> - 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0> - 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5> - 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0> - 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0> - 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1> - 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS - 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2> - 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1> - 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0> - 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7> - 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS - 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7> - 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7> - 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3> - 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7> - 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS - 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3> - 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2> - 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1> - 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS - 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7> - 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7> - 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3> - 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7> - 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2> - 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3> - 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3> - 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3> - 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6> - 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7> - 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7> - 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7> - 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2> - 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS - 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7> - 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7> - 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7> - 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS - 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS - 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4> - 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6> - 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS - 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2> - 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3> - 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3> - 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0> - 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5> - 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5> - 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7> - 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS - 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS - 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1> - 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0> - 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3> - 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0> - 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5> - 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4> - 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6> - 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0> - 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7> - 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS - 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7> - 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7> - 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7> - 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS - 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7> - 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7> - 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7> - 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2> - 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS - 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0> - 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS - 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6> - 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS - 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7> - 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS - 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0> - 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2> - 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2> - 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2> - 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1> - 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1> - 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2> - 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS - 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2> - 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u> - 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u> - 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS - 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3> - 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS - 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u> - 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u> - 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3> - 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS - 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2> - 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3> - 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u> - 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0> - 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6> - 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7> - 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u> - 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3> - 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0> - 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1> - 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3> - 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2> - 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS - 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5> - 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7> - 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7> - 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS - 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS - 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS - 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5> - 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6> - 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5> - 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4> - 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6> - 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6> - 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6> - 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6> - 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS - 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7> - 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5> - 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7> - 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS - 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5> - 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS - 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7> - 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS - 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1> - 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6> - 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7> - 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7> - 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6> - 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7> - 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0> - 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7> - 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS - 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7> - 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2> - 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS - 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS - 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7> - 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2> - 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS - 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1> - 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2> - 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS - 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS - 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5> - 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6> - 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS - 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0> - 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS - 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0> - 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1> - 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2> - 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4> - 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4> - 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0> - 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0> - 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0> - 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1> - 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS - 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4> - 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1> - 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS - 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4> - 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1> - 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1> - 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4> - 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4> - 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4> - 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0> - 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6> - 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7> - 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> - 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2> - 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4> - 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS - 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> - 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4> - 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4> - 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS - 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5> - 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7> - 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0> - 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> - 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS - 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5> - 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6> - 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2> - 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS - 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS - 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2> - 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4> - 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS - 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS - 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS - 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2> - 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5> - 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS - 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7> - 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7> - 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5> - 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS - 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS - 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1> - 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS - 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6> - 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS - 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6> - 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6> - 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0> - 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS - 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2> - 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS - 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS - 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0> - 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5> - 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5> - 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0> - 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7> - 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS - 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS - 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS - 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u> - 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS - 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS - 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> - 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u> - 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1> - 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS - 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6> - 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2> - 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5> - 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4> - 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1> - 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4> - 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS - 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2> - 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4> - 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4> - 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3> - 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5> - 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1> - 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7> - 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1> - 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3> - 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS - 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4> - 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2> - 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4> - 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS - 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3> - 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7> - 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2> - 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4> - 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS - 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3> - 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4> - 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4> - 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS - 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7> - 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2> - 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3> - 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1> - 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0> - 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5> - 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS - 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5> - 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS - 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4> - 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0> - 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS - 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS - 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2> - 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5> - 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2> - 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS - 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7> - 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3> - 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2> - 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS - 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS - 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1> - 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2> - 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS - 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS - 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6> - 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7> - 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1> - 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS - 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> - 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1> - 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1> - 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4> - 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6> - 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4> - 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7> - 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7> - 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> - 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS - 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2> - 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2> - 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4> - 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS - 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS - 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2> - 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS - 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2> - 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS - 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6> - 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2> - 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6> - 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7> - 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4> - 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2> - 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS - 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2> - 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1> - 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0> - 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS - 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> - 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7> - 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3> - 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3> - 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> - 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4> - 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3> - 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2> - 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3> - 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0> - 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7> - 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6> - 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2> - 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3> - 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1> - 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1> - 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2> - 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4> - 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5> - 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4> - 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4> - 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4> - 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1> - 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS - 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4> - 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4> - 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4> - 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS - 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS - 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4> - 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0> - 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS - 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS - 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0> - 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2> - 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS - 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS - 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7> - 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7> - 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS - 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS - 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS - 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2> - 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2> - 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS - 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS - 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6> - 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3> - 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2> - 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS - 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> - 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2> - 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2> - 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4> - 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4> - 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7> - 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4> - 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7> - 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> - 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS - 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2> - 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2> - 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS - 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS - 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS - 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u> - 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2> - 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS - 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0> - 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2> - 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4> - 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3> - 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1> - 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0> - 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0> - 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0> - 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2> - 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1> - 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1> - 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4> - 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4> - 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0> - 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3> - 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1> - 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3> - 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4> - 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS - 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3> - 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2> - 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4> - 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS - 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4> - 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3> - 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3> - 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3> - 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1> - 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1> - 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3> - 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> - 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4> - 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7> - 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7> - 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7> - 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> - 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1> - 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2> - 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4> - 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0> - 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5> - 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6> - 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4> - 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2> - 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1> - 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS - 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5> - 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5> - 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3> - 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS - 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5> - 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5> - 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4> - 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS - 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS - 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6> - 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6> - 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3> - 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS - 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6> - 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6> - 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4> - 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6> - 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1> - 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5> - 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7> - 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7> - 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5> - 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7> - 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7> - 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4> - 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1> - 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS - 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2> - 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u> - 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4> - 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS - 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u> - 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u> - 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4> - 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u> - 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4> - 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS - 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS - 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1> - 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0> - 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1> - 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2> - 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0> - 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS - 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2> - 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1> - 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3> - 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3> - 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3> - 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4> - 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3> - 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3> - 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3> - 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4> - 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4> - 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2> - 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4> - 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4> - 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7> - 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4> - 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4> - 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4> - 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2> - 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4> - 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3> - 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> - 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4> - 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5> - 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4> - 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4> - 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> - 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS - 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4> - 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2> - 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4> - 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS - 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS - 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS - 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4> - 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS - 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS - 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4> - 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5> - 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5> - 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS - 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS - 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS - 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5> - 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS - 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS - 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2> - 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5> - 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6> - 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS - 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6> - 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS - 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4> - 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS - 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2> - 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4> - 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7> - 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4> - 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> - 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4> - 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4> - 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7> - 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> - 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS - 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS - 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS - 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u> - 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS - 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS - 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS - 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u> - 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS - 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0> - 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS - 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5> - 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0> - 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5> - 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7> - 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5> - 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0> - 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS - 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2> - 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5> - 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0> - 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS - 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4> - 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5> - 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6> - 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3> - 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS - 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS - 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3> - 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5> - 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5> - 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5> - 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7> - 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7> - 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS - 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5> - 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2> - 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1> - 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4> - 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3> - 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0> - 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5> - 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4> - 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5> - 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2> - 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS - 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4> - 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5> - 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4> - 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS - 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS - 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5> - 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6> - 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS - 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS - 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3> - 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4> - 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2> - 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5> - 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5> - 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0> - 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS - 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS - 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS - 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6> - 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6> - 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6> - 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS - 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5> - 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6> - 27705344U, // <4,5,6,7>: Cost 0 copy RHS - 27705344U, // <4,5,6,u>: Cost 0 copy RHS - 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS - 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4> - 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7> - 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5> - 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS - 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7> - 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4> - 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4> - 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS - 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS - 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS - 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0> - 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u> - 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS - 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS - 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7> - 27705344U, // <4,5,u,7>: Cost 0 copy RHS - 27705344U, // <4,5,u,u>: Cost 0 copy RHS - 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0> - 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS - 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6> - 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0> - 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5> - 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7> - 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0> - 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS - 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS - 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2> - 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1> - 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0> - 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3> - 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5> - 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7> - 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7> - 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS - 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3> - 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4> - 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3> - 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2> - 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1> - 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6> - 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7> - 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7> - 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3> - 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1> - 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2> - 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6> - 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2> - 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3> - 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6> - 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6> - 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6> - 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4> - 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2> - 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS - 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3> - 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4> - 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4> - 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS - 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS - 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS - 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4> - 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS - 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS - 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3> - 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3> - 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3> - 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6> - 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5> - 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6> - 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS - 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS - 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS - 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2> - 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3> - 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2> - 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6> - 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6> - 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6> - 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS - 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS - 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2> - 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2> - 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7> - 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4> - 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6> - 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> - 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3> - 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> - 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2> - 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS - 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS - 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS - 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1> - 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u> - 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS - 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS - 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS - 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS - 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0> - 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS - 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4> - 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4> - 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5> - 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0> - 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7> - 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4> - 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS - 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1> - 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1> - 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4> - 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5> - 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS - 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7> - 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7> - 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3> - 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1> - 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS - 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3> - 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2> - 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4> - 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7> - 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7> - 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7> - 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3> - 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7> - 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2> - 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4> - 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4> - 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3> - 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6> - 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7> - 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7> - 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4> - 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7> - 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1> - 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3> - 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7> - 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5> - 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4> - 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS - 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4> - 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7> - 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS - 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2> - 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7> - 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5> - 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5> - 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6> - 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5> - 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5> - 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7> - 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2> - 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS - 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2> - 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2> - 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2> - 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS - 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6> - 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3> - 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7> - 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS - 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS - 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4> - 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7> - 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4> - 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7> - 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7> - 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7> - 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7> - 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7> - 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS - 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS - 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2> - 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2> - 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS - 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u> - 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3> - 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7> - 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS - 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0> - 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS - 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u> - 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2> - 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5> - 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0> - 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u> - 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0> - 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS - 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2> - 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1> - 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3> - 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3> - 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7> - 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7> - 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3> - 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS - 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u> - 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2> - 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u> - 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u> - 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4> - 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7> - 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3> - 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u> - 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2> - 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2> - 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u> - 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3> - 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6> - 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7> - 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u> - 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2> - 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS - 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2> - 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4> - 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4> - 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS - 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS - 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS - 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6> - 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS - 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS - 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS - 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5> - 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS - 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS - 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS - 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS - 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS - 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS - 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS - 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2> - 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS - 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6> - 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS - 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6> - 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS - 27705344U, // <4,u,6,7>: Cost 0 copy RHS - 27705344U, // <4,u,6,u>: Cost 0 copy RHS - 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS - 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4> - 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7> - 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u> - 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS - 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> - 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7> - 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> - 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS - 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS - 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS - 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS - 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u> - 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS - 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS - 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS - 27705344U, // <4,u,u,7>: Cost 0 copy RHS - 27705344U, // <4,u,u,u>: Cost 0 copy RHS - 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0> - 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1> - 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2> - 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5> - 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5> - 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0> - 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0> - 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0> - 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2> - 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS - 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1> - 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS - 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7> - 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS - 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1> - 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7> - 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2> - 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2> - 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> - 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4> - 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5> - 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5> - 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5> - 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4> - 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> - 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5> - 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5> - 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4> - 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5> - 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5> - 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0> - 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4> - 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0> - 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7> - 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5> - 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS - 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> - 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6> - 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5> - 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS - 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS - 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5> - 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> - 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> - 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1> - 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS - 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS - 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0> - 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5> - 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0> - 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0> - 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS - 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS - 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0> - 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS - 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7> - 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6> - 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5> - 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7> - 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6> - 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5> - 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS - 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS - 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0> - 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7> - 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2> - 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS - 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0> - 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7> - 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7> - 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS - 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2> - 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5> - 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS - 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5> - 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6> - 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS - 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u> - 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> - 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0> - 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS - 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2> - 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2> - 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5> - 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0> - 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7> - 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0> - 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS - 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1> - 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1> - 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0> - 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3> - 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5> - 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5> - 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5> - 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5> - 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3> - 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2> - 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3> - 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2> - 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0> - 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5> - 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3> - 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7> - 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0> - 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0> - 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS - 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3> - 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5> - 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5> - 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5> - 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7> - 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7> - 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5> - 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3> - 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1> - 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5> - 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5> - 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5> - 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4> - 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS - 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6> - 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4> - 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1> - 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1> - 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1> - 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1> - 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7> - 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5> - 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5> - 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0> - 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7> - 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1> - 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS - 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7> - 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3> - 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7> - 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS - 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7> - 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6> - 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1> - 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1> - 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS - 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1> - 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1> - 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS - 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS - 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3> - 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6> - 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7> - 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS - 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1> - 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS - 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5> - 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS - 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5> - 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS - 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7> - 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1> - 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS - 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0> - 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS - 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2> - 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2> - 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1> - 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1> - 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4> - 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0> - 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS - 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2> - 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2> - 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5> - 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5> - 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS - 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0> - 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3> - 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1> - 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5> - 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS - 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3> - 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2> - 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3> - 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5> - 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7> - 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6> - 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5> - 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3> - 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1> - 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5> - 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5> - 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5> - 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5> - 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5> - 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5> - 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7> - 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5> - 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2> - 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3> - 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5> - 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5> - 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6> - 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS - 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2> - 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4> - 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS - 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS - 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3> - 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7> - 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS - 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS - 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5> - 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7> - 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1> - 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS - 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS - 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3> - 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3> - 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7> - 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5> - 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7> - 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7> - 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1> - 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7> - 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS - 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2> - 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7> - 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS - 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS - 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5> - 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6> - 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7> - 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS - 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1> - 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS - 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u> - 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3> - 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5> - 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS - 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5> - 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u> - 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5> - 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0> - 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2> - 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0> - 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2> - 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1> - 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2> - 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0> - 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0> - 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2> - 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3> - 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1> - 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3> - 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5> - 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3> - 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7> - 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7> - 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5> - 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3> - 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1> - 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5> - 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2> - 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4> - 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5> - 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4> - 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3> - 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3> - 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4> - 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1> - 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3> - 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2> - 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3> - 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5> - 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5> - 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7> - 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5> - 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5> - 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5> - 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0> - 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3> - 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5> - 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5> - 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6> - 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5> - 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4> - 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6> - 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS - 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5> - 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5> - 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5> - 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS - 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5> - 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0> - 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5> - 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS - 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS - 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6> - 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6> - 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6> - 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS - 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0> - 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6> - 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4> - 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS - 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS - 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7> - 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2> - 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2> - 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS - 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3> - 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3> - 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7> - 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS - 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS - 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u> - 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2> - 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2> - 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS - 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6> - 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3> - 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u> - 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS - 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS - 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS - 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2> - 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5> - 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5> - 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1> - 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0> - 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0> - 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS - 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1> - 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4> - 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4> - 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7> - 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4> - 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0> - 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5> - 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1> - 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1> - 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4> - 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4> - 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4> - 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5> - 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4> - 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3> - 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3> - 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5> - 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5> - 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS - 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4> - 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4> - 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3> - 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4> - 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0> - 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5> - 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7> - 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4> - 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS - 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4> - 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3> - 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4> - 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4> - 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5> - 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4> - 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4> - 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5> - 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS - 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5> - 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3> - 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2> - 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS - 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5> - 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS - 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS - 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS - 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS - 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7> - 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2> - 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6> - 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS - 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5> - 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7> - 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5> - 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS - 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS - 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4> - 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7> - 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7> - 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS - 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5> - 94817590U, // <5,4,7,6>: Cost 1 vrev RHS - 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7> - 94965064U, // <5,4,7,u>: Cost 1 vrev RHS - 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS - 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u> - 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u> - 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4> - 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS - 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5> - 94825783U, // <5,4,u,6>: Cost 1 vrev RHS - 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5> - 94973257U, // <5,4,u,u>: Cost 1 vrev RHS - 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0> - 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS - 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2> - 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2> - 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1> - 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0> - 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7> - 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS - 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS - 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2> - 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5> - 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0> - 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3> - 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5> - 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7> - 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7> - 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3> - 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5> - 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS - 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3> - 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2> - 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4> - 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3> - 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3> - 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7> - 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7> - 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4> - 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2> - 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5> - 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3> - 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5> - 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6> - 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5> - 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7> - 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1> - 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2> - 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1> - 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5> - 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3> - 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4> - 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> - 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS - 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5> - 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6> - 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5> - 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS - 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3> - 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2> - 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2> - 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS - 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0> - 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7> - 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS - 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS - 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6> - 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3> - 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6> - 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5> - 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5> - 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6> - 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1> - 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1> - 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS - 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7> - 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5> - 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7> - 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS - 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5> - 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6> - 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS - 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS - 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS - 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS - 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5> - 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u> - 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS - 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7> - 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS - 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS - 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0> - 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS - 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2> - 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4> - 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5> - 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6> - 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7> - 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS - 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS - 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2> - 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1> - 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0> - 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3> - 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6> - 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7> - 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7> - 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS - 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6> - 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2> - 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3> - 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2> - 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1> - 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6> - 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6> - 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7> - 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3> - 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6> - 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2> - 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3> - 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6> - 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3> - 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6> - 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6> - 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7> - 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS - 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6> - 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS - 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5> - 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5> - 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5> - 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6> - 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS - 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6> - 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5> - 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS - 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS - 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3> - 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6> - 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4> - 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6> - 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5> - 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1> - 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS - 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS - 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS - 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4> - 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3> - 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6> - 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS - 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6> - 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6> - 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS - 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS - 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS - 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> - 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> - 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS - 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6> - 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6> - 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7> - 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS - 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS - 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS - 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> - 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS - 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS - 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3> - 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2> - 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS - 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0> - 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS - 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2> - 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0> - 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5> - 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7> - 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7> - 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0> - 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS - 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2> - 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1> - 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0> - 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7> - 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS - 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7> - 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7> - 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7> - 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7> - 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7> - 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3> - 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2> - 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1> - 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7> - 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7> - 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7> - 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7> - 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1> - 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2> - 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5> - 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1> - 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3> - 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6> - 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0> - 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7> - 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7> - 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2> - 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS - 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7> - 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0> - 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4> - 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS - 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS - 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> - 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7> - 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS - 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS - 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3> - 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3> - 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7> - 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS - 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5> - 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7> - 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS - 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS - 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0> - 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5> - 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2> - 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6> - 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4> - 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u> - 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> - 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7> - 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u> - 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS - 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1> - 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2> - 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3> - 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS - 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7> - 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3> - 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7> - 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS - 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS - 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS - 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0> - 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS - 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS - 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS - 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7> - 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS - 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS - 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0> - 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS - 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2> - 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2> - 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1> - 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1> - 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0> - 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0> - 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS - 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2> - 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1> - 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS - 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u> - 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u> - 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0> - 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u> - 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS - 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0> - 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> - 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2> - 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3> - 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u> - 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u> - 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7> - 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3> - 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1> - 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1> - 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u> - 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u> - 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3> - 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u> - 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0> - 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5> - 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u> - 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u> - 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u> - 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5> - 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6> - 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5> - 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> - 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS - 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> - 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u> - 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS - 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS - 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5> - 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3> - 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7> - 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS - 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS - 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS - 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS - 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS - 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS - 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6> - 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7> - 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS - 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS - 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> - 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS - 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7> - 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS - 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7> - 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2> - 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS - 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS - 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3> - 118708378U, // <5,u,7,6>: Cost 1 vrev RHS - 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS - 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS - 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS - 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS - 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS - 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS - 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS - 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS - 118716571U, // <5,u,u,6>: Cost 1 vrev RHS - 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS - 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS - 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0> - 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1> - 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2> - 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5> - 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6> - 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0> - 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6> - 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7> - 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2> - 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS - 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0> - 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS - 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6> - 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS - 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1> - 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1> - 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1> - 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS - 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2> - 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6> - 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6> - 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5> - 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6> - 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7> - 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6> - 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2> - 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6> - 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2> - 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4> - 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5> - 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3> - 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6> - 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6> - 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6> - 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7> - 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5> - 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6> - 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5> - 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6> - 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6> - 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6> - 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS - 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0> - 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0> - 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6> - 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS - 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6> - 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6> - 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0> - 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6> - 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6> - 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0> - 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS - 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS - 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0> - 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS - 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS - 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5> - 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0> - 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7> - 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0> - 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1> - 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS - 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS - 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0> - 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2> - 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7> - 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS - 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5> - 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0> - 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7> - 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS - 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2> - 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1> - 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS - 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5> - 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6> - 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS - 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u> - 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS - 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS - 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS - 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS - 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6> - 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2> - 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS - 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2> - 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1> - 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0> - 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2> - 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1> - 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1> - 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6> - 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3> - 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6> - 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5> - 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6> - 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1> - 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3> - 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS - 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3> - 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2> - 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0> - 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS - 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3> - 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3> - 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0> - 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0> - 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS - 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3> - 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6> - 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1> - 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6> - 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7> - 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3> - 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2> - 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3> - 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1> - 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6> - 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4> - 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6> - 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS - 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6> - 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0> - 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1> - 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6> - 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1> - 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7> - 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6> - 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7> - 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6> - 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6> - 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0> - 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS - 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7> - 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS - 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7> - 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6> - 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS - 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS - 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7> - 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6> - 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1> - 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS - 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS - 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7> - 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2> - 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS - 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS - 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5> - 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0> - 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7> - 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS - 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS - 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3> - 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6> - 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0> - 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6> - 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7> - 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u> - 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u> - 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0> - 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0> - 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS - 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2> - 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0> - 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6> - 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3> - 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4> - 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0> - 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS - 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1> - 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1> - 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0> - 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS - 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6> - 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7> - 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3> - 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1> - 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1> - 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1> - 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3> - 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2> - 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3> - 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6> - 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7> - 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6> - 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7> - 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3> - 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1> - 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0> - 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6> - 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4> - 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5> - 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6> - 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6> - 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4> - 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1> - 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> - 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u> - 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6> - 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6> - 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6> - 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS - 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0> - 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2> - 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2> - 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3> - 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3> - 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7> - 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6> - 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5> - 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5> - 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0> - 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS - 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6> - 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1> - 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3> - 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6> - 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7> - 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5> - 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7> - 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6> - 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1> - 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7> - 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS - 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2> - 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7> - 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS - 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS - 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7> - 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6> - 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7> - 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS - 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2> - 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS - 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6> - 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS - 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5> - 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS - 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0> - 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS - 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS - 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0> - 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2> - 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4> - 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2> - 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2> - 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2> - 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0> - 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0> - 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2> - 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3> - 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1> - 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3> - 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1> - 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> - 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3> - 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0> - 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3> - 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> - 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4> - 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> - 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2> - 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0> - 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6> - 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7> - 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7> - 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6> - 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> - 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1> - 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3> - 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3> - 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3> - 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6> - 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5> - 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7> - 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7> - 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5> - 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS - 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3> - 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3> - 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6> - 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS - 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6> - 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6> - 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4> - 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6> - 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS - 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7> - 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5> - 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5> - 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6> - 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7> - 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6> - 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0> - 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6> - 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS - 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3> - 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6> - 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1> - 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6> - 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7> - 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6> - 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7> - 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS - 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS - 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7> - 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7> - 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2> - 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS - 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3> - 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3> - 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7> - 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS - 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS - 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2> - 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u> - 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2> - 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS - 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6> - 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0> - 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7> - 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS - 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0> - 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS - 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6> - 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1> - 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6> - 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1> - 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2> - 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0> - 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS - 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1> - 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1> - 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0> - 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS - 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS - 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0> - 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3> - 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1> - 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1> - 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4> - 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3> - 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2> - 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1> - 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4> - 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS - 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0> - 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0> - 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4> - 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2> - 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3> - 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6> - 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3> - 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6> - 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6> - 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5> - 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7> - 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6> - 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS - 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4> - 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4> - 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4> - 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4> - 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS - 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> - 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4> - 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> - 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS - 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3> - 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5> - 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6> - 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS - 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5> - 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS - 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS - 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS - 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS - 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2> - 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2> - 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2> - 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS - 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3> - 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3> - 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2> - 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS - 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS - 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4> - 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5> - 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7> - 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS - 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5> - 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6> - 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7> - 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS - 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS - 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS - 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u> - 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6> - 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6> - 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS - 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS - 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS - 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS - 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0> - 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS - 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6> - 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2> - 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1> - 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5> - 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7> - 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0> - 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0> - 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1> - 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5> - 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0> - 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS - 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6> - 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7> - 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4> - 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3> - 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3> - 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS - 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5> - 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2> - 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5> - 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS - 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6> - 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7> - 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS - 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5> - 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2> - 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5> - 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3> - 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3> - 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6> - 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6> - 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0> - 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3> - 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3> - 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS - 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5> - 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5> - 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5> - 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS - 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS - 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0> - 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6> - 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6> - 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS - 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2> - 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2> - 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2> - 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS - 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5> - 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6> - 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7> - 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7> - 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1> - 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4> - 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4> - 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4> - 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5> - 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5> - 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6> - 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0> - 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1> - 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS - 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7> - 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7> - 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2> - 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS - 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5> - 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6> - 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS - 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS - 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS - 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u> - 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u> - 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7> - 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS - 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS - 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6> - 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u> - 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u> - 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS - 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS - 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4> - 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1> - 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2> - 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3> - 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0> - 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS - 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS - 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2> - 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1> - 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0> - 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS - 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3> - 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7> - 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3> - 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3> - 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS - 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6> - 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3> - 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6> - 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1> - 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3> - 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7> - 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7> - 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3> - 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3> - 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2> - 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4> - 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6> - 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3> - 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5> - 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4> - 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6> - 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS - 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5> - 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS - 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2> - 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6> - 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6> - 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6> - 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS - 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0> - 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6> - 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6> - 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS - 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3> - 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5> - 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0> - 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6> - 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5> - 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0> - 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS - 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS - 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS - 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2> - 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3> - 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2> - 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS - 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3> - 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS - 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7> - 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS - 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS - 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7> - 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7> - 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6> - 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS - 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4> - 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6> - 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS - 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS - 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS - 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS - 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u> - 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS - 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS - 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS - 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS - 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS - 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS - 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> - 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS - 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> - 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0> - 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> - 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0> - 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7> - 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2> - 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS - 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> - 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> - 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> - 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3> - 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5> - 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7> - 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7> - 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2> - 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3> - 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2> - 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3> - 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> - 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1> - 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6> - 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7> - 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7> - 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1> - 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1> - 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> - 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3> - 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1> - 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> - 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> - 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7> - 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7> - 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1> - 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> - 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> - 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3> - 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3> - 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5> - 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> - 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS - 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7> - 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS - 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> - 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> - 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0> - 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> - 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> - 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> - 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7> - 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7> - 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2> - 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3> - 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5> - 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6> - 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7> - 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> - 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1> - 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1> - 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2> - 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7> - 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3> - 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7> - 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6> - 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7> - 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7> - 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7> - 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2> - 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2> - 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS - 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0> - 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1> - 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6> - 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS - 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7> - 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1> - 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS - 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0> - 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS - 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2> - 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2> - 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5> - 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0> - 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7> - 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0> - 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS - 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2> - 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1> - 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS - 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3> - 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS - 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7> - 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7> - 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3> - 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS - 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS - 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3> - 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2> - 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1> - 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS - 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u> - 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7> - 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3> - 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1> - 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2> - 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3> - 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3> - 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3> - 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6> - 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7> - 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5> - 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1> - 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2> - 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1> - 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3> - 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6> - 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6> - 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4> - 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS - 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6> - 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS - 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS - 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5> - 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7> - 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6> - 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5> - 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS - 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7> - 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS - 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS - 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2> - 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7> - 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS - 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5> - 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS - 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1> - 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS - 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS - 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7> - 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7> - 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS - 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS - 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5> - 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7> - 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS - 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS - 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS - 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS - 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS - 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1> - 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS - 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS - 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS - 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS - 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS - 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0> - 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1> - 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2> - 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0> - 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1> - 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6> - 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0> - 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7> - 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1> - 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS - 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5> - 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS - 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7> - 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS - 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7> - 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1> - 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0> - 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS - 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2> - 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5> - 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0> - 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1> - 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6> - 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> - 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7> - 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7> - 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2> - 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2> - 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0> - 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0> - 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3> - 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6> - 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0> - 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0> - 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7> - 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0> - 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4> - 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5> - 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6> - 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4> - 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6> - 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS - 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6> - 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5> - 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5> - 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS - 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3> - 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7> - 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0> - 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6> - 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5> - 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0> - 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7> - 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0> - 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7> - 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> - 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7> - 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0> - 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7> - 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7> - 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6> - 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1> - 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7> - 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2> - 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS - 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS - 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7> - 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6> - 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7> - 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0> - 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7> - 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2> - 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2> - 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1> - 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS - 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u> - 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6> - 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS - 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u> - 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1> - 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS - 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS - 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS - 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS - 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2> - 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS - 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> - 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0> - 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1> - 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2> - 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1> - 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1> - 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6> - 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3> - 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5> - 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7> - 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1> - 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5> - 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3> - 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1> - 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3> - 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2> - 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0> - 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5> - 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3> - 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2> - 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0> - 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0> - 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0> - 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3> - 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0> - 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7> - 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5> - 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7> - 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7> - 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3> - 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7> - 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5> - 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5> - 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5> - 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5> - 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5> - 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS - 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1> - 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0> - 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS - 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS - 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7> - 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6> - 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7> - 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS - 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7> - 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1> - 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1> - 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7> - 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7> - 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7> - 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> - 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7> - 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7> - 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7> - 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7> - 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1> - 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7> - 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2> - 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1> - 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3> - 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS - 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS - 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7> - 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0> - 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7> - 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS - 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS - 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3> - 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0> - 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7> - 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS - 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7> - 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7> - 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS - 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7> - 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2> - 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2> - 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0> - 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0> - 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6> - 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7> - 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1> - 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2> - 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7> - 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3> - 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0> - 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0> - 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1> - 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3> - 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0> - 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3> - 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2> - 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1> - 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1> - 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3> - 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3> - 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5> - 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7> - 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6> - 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5> - 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3> - 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1> - 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5> - 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6> - 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7> - 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5> - 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7> - 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6> - 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0> - 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1> - 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6> - 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3> - 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4> - 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5> - 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6> - 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7> - 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0> - 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0> - 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7> - 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7> - 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3> - 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7> - 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7> - 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7> - 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7> - 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7> - 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0> - 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7> - 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS - 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3> - 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7> - 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7> - 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS - 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7> - 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7> - 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7> - 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7> - 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1> - 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0> - 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5> - 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS - 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS - 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7> - 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6> - 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7> - 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS - 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1> - 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5> - 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7> - 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5> - 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7> - 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7> - 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0> - 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7> - 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0> - 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2> - 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0> - 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2> - 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1> - 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2> - 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0> - 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0> - 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2> - 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1> - 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1> - 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3> - 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5> - 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6> - 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3> - 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1> - 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5> - 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5> - 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1> - 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0> - 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2> - 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0> - 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5> - 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4> - 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3> - 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3> - 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0> - 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1> - 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3> - 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3> - 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3> - 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4> - 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7> - 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3> - 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7> - 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3> - 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1> - 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2> - 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4> - 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5> - 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5> - 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6> - 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6> - 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4> - 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6> - 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS - 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3> - 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3> - 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7> - 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5> - 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7> - 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0> - 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0> - 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3> - 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7> - 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3> - 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3> - 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7> - 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7> - 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7> - 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6> - 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7> - 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3> - 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1> - 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5> - 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6> - 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7> - 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5> - 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7> - 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7> - 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7> - 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1> - 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u> - 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2> - 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3> - 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3> - 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u> - 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6> - 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3> - 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0> - 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2> - 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0> - 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS - 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2> - 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1> - 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5> - 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> - 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> - 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1> - 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1> - 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2> - 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1> - 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3> - 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7> - 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3> - 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0> - 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3> - 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3> - 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0> - 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1> - 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3> - 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2> - 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1> - 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7> - 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> - 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0> - 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5> - 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0> - 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2> - 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5> - 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1> - 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3> - 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6> - 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4> - 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5> - 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7> - 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4> - 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2> - 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1> - 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4> - 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3> - 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4> - 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5> - 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> - 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7> - 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5> - 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS - 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7> - 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3> - 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5> - 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS - 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7> - 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS - 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7> - 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS - 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1> - 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5> - 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3> - 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4> - 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6> - 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7> - 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7> - 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1> - 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2> - 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2> - 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2> - 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5> - 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6> - 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1> - 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0> - 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1> - 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7> - 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7> - 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS - 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS - 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3> - 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u> - 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6> - 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1> - 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS - 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5> - 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS - 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS - 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS - 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS - 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0> - 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1> - 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1> - 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1> - 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2> - 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS - 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1> - 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1> - 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0> - 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7> - 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5> - 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3> - 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1> - 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> - 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> - 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1> - 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0> - 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7> - 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4> - 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7> - 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> - 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3> - 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3> - 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4> - 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS - 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5> - 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5> - 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3> - 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS - 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5> - 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5> - 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0> - 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0> - 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS - 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7> - 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5> - 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4> - 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS - 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS - 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5> - 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> - 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6> - 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1> - 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3> - 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3> - 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3> - 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4> - 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5> - 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6> - 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> - 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7> - 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1> - 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7> - 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2> - 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4> - 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5> - 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7> - 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7> - 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0> - 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0> - 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS - 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3> - 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2> - 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7> - 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS - 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> - 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> - 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1> - 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3> - 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS - 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS - 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0> - 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u> - 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS - 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7> - 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7> - 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0> - 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3> - 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0> - 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS - 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4> - 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0> - 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2> - 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0> - 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0> - 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2> - 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS - 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1> - 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1> - 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0> - 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3> - 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS - 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7> - 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> - 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3> - 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1> - 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1> - 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3> - 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2> - 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1> - 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5> - 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7> - 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7> - 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> - 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> - 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2> - 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3> - 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3> - 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3> - 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5> - 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7> - 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6> - 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0> - 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0> - 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1> - 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3> - 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5> - 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4> - 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6> - 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS - 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0> - 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5> - 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS - 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS - 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3> - 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7> - 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4> - 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6> - 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5> - 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0> - 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7> - 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5> - 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1> - 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3> - 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3> - 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4> - 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4> - 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7> - 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6> - 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> - 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7> - 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1> - 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0> - 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2> - 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0> - 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5> - 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4> - 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6> - 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7> - 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1> - 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1> - 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS - 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2> - 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0> - 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5> - 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS - 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6> - 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3> - 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1> - 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0> - 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2> - 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0> - 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0> - 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1> - 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0> - 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0> - 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2> - 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2> - 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3> - 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1> - 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3> - 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5> - 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS - 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3> - 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1> - 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1> - 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3> - 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5> - 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0> - 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2> - 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0> - 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5> - 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3> - 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3> - 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3> - 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3> - 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1> - 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3> - 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6> - 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7> - 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5> - 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3> - 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7> - 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7> - 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7> - 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5> - 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7> - 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3> - 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4> - 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4> - 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6> - 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4> - 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6> - 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6> - 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS - 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7> - 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3> - 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7> - 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7> - 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7> - 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7> - 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5> - 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7> - 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS - 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0> - 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7> - 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7> - 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS - 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7> - 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7> - 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0> - 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7> - 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS - 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2> - 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2> - 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2> - 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS - 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7> - 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7> - 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS - 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS - 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS - 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2> - 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3> - 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0> - 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS - 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6> - 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7> - 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS - 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS - 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0> - 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2> - 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2> - 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2> - 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1> - 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1> - 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2> - 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS - 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2> - 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS - 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1> - 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS - 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3> - 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS - 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3> - 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1> - 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3> - 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS - 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2> - 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0> - 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0> - 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6> - 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4> - 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3> - 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3> - 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0> - 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1> - 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3> - 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6> - 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3> - 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5> - 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7> - 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7> - 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0> - 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1> - 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1> - 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5> - 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6> - 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5> - 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4> - 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6> - 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6> - 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6> - 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6> - 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS - 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u> - 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3> - 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7> - 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u> - 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u> - 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS - 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7> - 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS - 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1> - 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7> - 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u> - 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7> - 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6> - 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7> - 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u> - 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0> - 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7> - 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1> - 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3> - 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6> - 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7> - 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5> - 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7> - 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2> - 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS - 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS - 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1> - 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2> - 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS - 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0> - 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5> - 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6> - 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS - 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS - 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS - 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS - 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> - 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> - 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0> - 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS - 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3> - 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0> - 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0> - 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS - 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS - 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS - 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS - 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3> - 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS - 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7> - 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1> - 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2> - 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS - 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> - 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> - 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS - 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1> - 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> - 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> - 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7> - 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2> - 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> - 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0> - 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0> - 72589981U, // <u,0,3,2>: Cost 1 vrev LHS - 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3> - 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6> - 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0> - 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0> - 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3> - 73032403U, // <u,0,3,u>: Cost 1 vrev LHS - 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u> - 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> - 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> - 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2> - 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS - 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS - 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6> - 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> - 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> - 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS - 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS - 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7> - 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0> - 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5> - 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5> - 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0> - 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS - 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0> - 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS - 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> - 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS - 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0> - 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS - 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u> - 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6> - 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1> - 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS - 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS - 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0> - 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7> - 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7> - 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS - 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u> - 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7> - 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7> - 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS - 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS - 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> - 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS - 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1> - 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> - 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS - 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u> - 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS - 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS - 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1> - 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS - 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1> - 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> - 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS - 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> - 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0> - 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0> - 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> - 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS - 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS - 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2> - 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> - 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS - 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> - 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1> - 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1> - 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS - 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS - 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3> - 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1> - 835584U, // <u,1,2,3>: Cost 0 copy LHS - 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS - 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3> - 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7> - 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2> - 835584U, // <u,1,2,u>: Cost 0 copy LHS - 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS - 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> - 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> - 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS - 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS - 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> - 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> - 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1> - 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> - 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1> - 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u> - 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u> - 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4> - 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS - 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS - 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1> - 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4> - 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4> - 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS - 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2> - 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2> - 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> - 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS - 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1> - 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1> - 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS - 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> - 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS - 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> - 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> - 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7> - 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS - 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> - 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1> - 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1> - 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1> - 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS - 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7> - 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1> - 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS - 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS - 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7> - 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u> - 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7> - 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS - 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS - 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS - 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0> - 835584U, // <u,1,u,3>: Cost 0 copy LHS - 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS - 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> - 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7> - 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u> - 835584U, // <u,1,u,u>: Cost 0 copy LHS - 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2> - 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS - 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2> - 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0> - 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5> - 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7> - 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> - 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u> - 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS - 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2> - 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1> - 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0> - 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> - 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS - 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7> - 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> - 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2> - 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1> - 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS - 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2> - 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS - 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> - 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS - 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> - 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7> - 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2> - 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS - 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS - 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> - 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS - 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS - 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> - 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> - 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS - 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> - 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4> - 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5> - 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5> - 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS - 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS - 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2> - 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4> - 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS - 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS - 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0> - 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> - 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> - 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5> - 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5> - 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7> - 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS - 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5> - 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS - 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2> - 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2> - 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> - 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS - 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> - 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> - 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1> - 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> - 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2> - 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2> - 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7> - 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS - 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS - 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6> - 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6> - 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7> - 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS - 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS - 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS - 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS - 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> - 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS - 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS - 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS - 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> - 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS - 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> - 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS - 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0> - 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0> - 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7> - 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0> - 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS - 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> - 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> - 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5> - 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7> - 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u> - 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> - 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS - 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2> - 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> - 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1> - 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS - 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6> - 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u> - 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1> - 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2> - 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3> - 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3> - 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS - 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6> - 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5> - 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3> - 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7> - 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS - 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS - 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4> - 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0> - 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2> - 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS - 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS - 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6> - 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4> - 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS - 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS - 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3> - 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5> - 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6> - 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> - 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> - 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> - 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS - 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS - 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2> - 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6> - 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3> - 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u> - 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6> - 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6> - 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> - 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1> - 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6> - 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS - 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7> - 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7> - 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2> - 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS - 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u> - 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u> - 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> - 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS - 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2> - 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS - 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0> - 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS - 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6> - 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS - 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7> - 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS - 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS - 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0> - 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS - 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4> - 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> - 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5> - 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> - 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> - 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1> - 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS - 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS - 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1> - 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4> - 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3> - 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS - 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS - 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3> - 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1> - 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4> - 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS - 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u> - 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2> - 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5> - 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS - 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> - 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS - 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2> - 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS - 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2> - 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4> - 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3> - 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3> - 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6> - 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5> - 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6> - 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7> - 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u> - 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS - 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4> - 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4> - 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4> - 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS - 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS - 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> - 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4> - 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS - 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS - 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3> - 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5> - 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2> - 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS - 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS - 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS - 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS - 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS - 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS - 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2> - 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2> - 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2> - 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS - 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> - 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS - 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1> - 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS - 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS - 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4> - 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7> - 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7> - 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS - 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0> - 96808489U, // <u,4,7,6>: Cost 1 vrev RHS - 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7> - 96955963U, // <u,4,7,u>: Cost 1 vrev RHS - 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS - 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS - 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u> - 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2> - 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS - 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS - 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS - 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS - 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS - 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0> - 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS - 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5> - 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5> - 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> - 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> - 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5> - 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0> - 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS - 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS - 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5> - 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0> - 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7> - 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS - 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5> - 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3> - 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> - 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> - 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS - 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7> - 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5> - 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5> - 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS - 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> - 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7> - 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS - 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5> - 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS - 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3> - 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u> - 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3> - 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6> - 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5> - 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6> - 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS - 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS - 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS - 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> - 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5> - 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4> - 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5> - 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS - 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5> - 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> - 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS - 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS - 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7> - 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2> - 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2> - 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS - 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS - 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6> - 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> - 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS - 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS - 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6> - 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6> - 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6> - 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS - 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7> - 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7> - 27705344U, // <u,5,6,7>: Cost 0 copy RHS - 27705344U, // <u,5,6,u>: Cost 0 copy RHS - 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS - 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7> - 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2> - 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2> - 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS - 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> - 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> - 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS - 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS - 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS - 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u> - 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2> - 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u> - 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS - 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS - 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6> - 27705344U, // <u,5,u,7>: Cost 0 copy RHS - 27705344U, // <u,5,u,u>: Cost 0 copy RHS - 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0> - 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS - 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6> - 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0> - 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6> - 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u> - 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0> - 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS - 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS - 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1> - 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1> - 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0> - 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3> - 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6> - 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7> - 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> - 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS - 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1> - 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS - 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2> - 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2> - 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1> - 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS - 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6> - 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7> - 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> - 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> - 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2> - 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3> - 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3> - 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3> - 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6> - 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6> - 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6> - 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS - 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS - 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS - 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3> - 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4> - 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u> - 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6> - 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS - 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6> - 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS - 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS - 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS - 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3> - 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7> - 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> - 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6> - 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5> - 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0> - 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS - 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5> - 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS - 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2> - 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3> - 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2> - 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS - 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3> - 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS - 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> - 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS - 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS - 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> - 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> - 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS - 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3> - 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3> - 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS - 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS - 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS - 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS - 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> - 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS - 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS - 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS - 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS - 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS - 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> - 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS - 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> - 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0> - 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> - 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0> - 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7> - 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2> - 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS - 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> - 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> - 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> - 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7> - 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS - 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7> - 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7> - 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u> - 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7> - 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7> - 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2> - 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> - 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1> - 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7> - 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7> - 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7> - 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u> - 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7> - 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> - 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u> - 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u> - 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> - 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> - 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3> - 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3> - 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7> - 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> - 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> - 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7> - 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3> - 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4> - 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> - 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS - 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0> - 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS - 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> - 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> - 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0> - 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> - 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> - 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> - 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7> - 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7> - 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS - 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2> - 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5> - 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS - 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6> - 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> - 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1> - 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1> - 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2> - 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7> - 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2> - 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7> - 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6> - 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7> - 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7> - 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS - 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS - 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2> - 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS - 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0> - 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1> - 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6> - 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS - 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7> - 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS - 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS - 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS - 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS - 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2> - 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0> - 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2> - 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0> - 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS - 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS - 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS - 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS - 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1> - 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3> - 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS - 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS - 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2> - 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS - 835584U, // <u,u,2,3>: Cost 0 copy LHS - 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS - 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6> - 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3> - 835584U, // <u,u,2,u>: Cost 0 copy LHS - 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS - 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 120371557U, // <u,u,3,2>: Cost 1 vrev LHS - 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS - 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS - 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7> - 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS - 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS - 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS - 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4> - 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6> - 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4> - 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS - 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS - 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6> - 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS - 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS - 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5> - 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7> - 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS - 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS - 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS - 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS - 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS - 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS - 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2> - 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7> - 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS - 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6> - 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS - 27705344U, // <u,u,6,7>: Cost 0 copy RHS - 27705344U, // <u,u,6,u>: Cost 0 copy RHS - 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS - 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7> - 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7> - 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS - 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS - 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3> - 120699277U, // <u,u,7,6>: Cost 1 vrev RHS - 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS - 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS - 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS - 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS - 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS - 835584U, // <u,u,u,3>: Cost 0 copy LHS - 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS - 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS - 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS - 27705344U, // <u,u,u,7>: Cost 0 copy RHS - 835584U, // <u,u,u,u>: Cost 0 copy LHS - 0 -}; diff --git a/lib/Target/ARM64/ARM64PromoteConstant.cpp b/lib/Target/ARM64/ARM64PromoteConstant.cpp deleted file mode 100644 index 9fbaedb..0000000 --- a/lib/Target/ARM64/ARM64PromoteConstant.cpp +++ /dev/null @@ -1,585 +0,0 @@ - -//===-- ARM64PromoteConstant.cpp --- Promote constant to global for ARM64 -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64PromoteConstant pass which promotes constant -// to global variables when this is likely to be more efficient. -// Currently only types related to constant vector (i.e., constant vector, array -// of constant vectors, constant structure with a constant vector field, etc.) -// are promoted to global variables. -// Indeed, constant vector are likely to be lowered in target constant pool -// during instruction selection. -// Therefore, the access will remain the same (memory load), but the structures -// types are not split into different constant pool accesses for each field. -// The bonus side effect is that created globals may be merged by the global -// merge pass. -// -// FIXME: This pass may be useful for other targets too. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-promote-const" -#include "ARM64.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" - -using namespace llvm; - -// Stress testing mode - disable heuristics. -static cl::opt<bool> Stress("arm64-stress-promote-const", cl::Hidden, - cl::desc("Promote all vector constants")); - -STATISTIC(NumPromoted, "Number of promoted constants"); -STATISTIC(NumPromotedUses, "Number of promoted constants uses"); - -//===----------------------------------------------------------------------===// -// ARM64PromoteConstant -//===----------------------------------------------------------------------===// - -namespace { -/// Promotes interesting constant into global variables. -/// The motivating example is: -/// static const uint16_t TableA[32] = { -/// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, -/// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, -/// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, -/// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, -/// }; -/// -/// uint8x16x4_t LoadStatic(void) { -/// uint8x16x4_t ret; -/// ret.val[0] = vld1q_u16(TableA + 0); -/// ret.val[1] = vld1q_u16(TableA + 8); -/// ret.val[2] = vld1q_u16(TableA + 16); -/// ret.val[3] = vld1q_u16(TableA + 24); -/// return ret; -/// } -/// -/// The constants in that example are folded into the uses. Thus, 4 different -/// constants are created. -/// As their type is vector the cheapest way to create them is to load them -/// for the memory. -/// Therefore the final assembly final has 4 different load. -/// With this pass enabled, only one load is issued for the constants. -class ARM64PromoteConstant : public ModulePass { - -public: - static char ID; - ARM64PromoteConstant() : ModulePass(ID) {} - - virtual const char *getPassName() const { return "ARM64 Promote Constant"; } - - /// Iterate over the functions and promote the interesting constants into - /// global variables with module scope. - bool runOnModule(Module &M) { - DEBUG(dbgs() << getPassName() << '\n'); - bool Changed = false; - for (auto &MF: M) { - Changed |= runOnFunction(MF); - } - return Changed; - } - -private: - /// Look for interesting constants used within the given function. - /// Promote them into global variables, load these global variables within - /// the related function, so that the number of inserted load is minimal. - bool runOnFunction(Function &F); - - // This transformation requires dominator info - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - } - - /// Type to store a list of User - typedef SmallVector<Value::user_iterator, 4> Users; - /// Map an insertion point to all the uses it dominates. - typedef DenseMap<Instruction *, Users> InsertionPoints; - /// Map a function to the required insertion point of load for a - /// global variable - typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc; - - /// Find the closest point that dominates the given Use. - Instruction *findInsertionPoint(Value::user_iterator &Use); - - /// Check if the given insertion point is dominated by an existing - /// insertion point. - /// If true, the given use is added to the list of dominated uses for - /// the related existing point. - /// \param NewPt the insertion point to be checked - /// \param UseIt the use to be added into the list of dominated uses - /// \param InsertPts existing insertion points - /// \pre NewPt and all instruction in InsertPts belong to the same function - /// \return true if one of the insertion point in InsertPts dominates NewPt, - /// false otherwise - bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt, - InsertionPoints &InsertPts); - - /// Check if the given insertion point can be merged with an existing - /// insertion point in a common dominator. - /// If true, the given use is added to the list of the created insertion - /// point. - /// \param NewPt the insertion point to be checked - /// \param UseIt the use to be added into the list of dominated uses - /// \param InsertPts existing insertion points - /// \pre NewPt and all instruction in InsertPts belong to the same function - /// \pre isDominated returns false for the exact same parameters. - /// \return true if it exists an insertion point in InsertPts that could - /// have been merged with NewPt in a common dominator, - /// false otherwise - bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt, - InsertionPoints &InsertPts); - - /// Compute the minimal insertion points to dominates all the interesting - /// uses of value. - /// Insertion points are group per function and each insertion point - /// contains a list of all the uses it dominates within the related function - /// \param Val constant to be examined - /// \param[out] InsPtsPerFunc output storage of the analysis - void computeInsertionPoints(Constant *Val, - InsertionPointsPerFunc &InsPtsPerFunc); - - /// Insert a definition of a new global variable at each point contained in - /// InsPtsPerFunc and update the related uses (also contained in - /// InsPtsPerFunc). - bool insertDefinitions(Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc); - - /// Compute the minimal insertion points to dominate all the interesting - /// uses of Val and insert a definition of a new global variable - /// at these points. - /// Also update the uses of Val accordingly. - /// Currently a use of Val is considered interesting if: - /// - Val is not UndefValue - /// - Val is not zeroinitialized - /// - Replacing Val per a load of a global variable is valid. - /// \see shouldConvert for more details - bool computeAndInsertDefinitions(Constant *Val); - - /// Promote the given constant into a global variable if it is expected to - /// be profitable. - /// \return true if Cst has been promoted - bool promoteConstant(Constant *Cst); - - /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. - /// Append UseIt to this list and delete the entry of IPI in InsertPts. - static void appendAndTransferDominatedUses(Instruction *NewPt, - Value::user_iterator &UseIt, - InsertionPoints::iterator &IPI, - InsertionPoints &InsertPts) { - // Record the dominated use - IPI->second.push_back(UseIt); - // Transfer the dominated uses of IPI to NewPt - // Inserting into the DenseMap may invalidate existing iterator. - // Keep a copy of the key to find the iterator to erase. - Instruction *OldInstr = IPI->first; - InsertPts.insert(InsertionPoints::value_type(NewPt, IPI->second)); - // Erase IPI - IPI = InsertPts.find(OldInstr); - InsertPts.erase(IPI); - } -}; -} // end anonymous namespace - -char ARM64PromoteConstant::ID = 0; - -namespace llvm { -void initializeARM64PromoteConstantPass(PassRegistry &); -} - -INITIALIZE_PASS_BEGIN(ARM64PromoteConstant, "arm64-promote-const", - "ARM64 Promote Constant Pass", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(ARM64PromoteConstant, "arm64-promote-const", - "ARM64 Promote Constant Pass", false, false) - -ModulePass *llvm::createARM64PromoteConstantPass() { - return new ARM64PromoteConstant(); -} - -/// Check if the given type uses a vector type. -static bool isConstantUsingVectorTy(const Type *CstTy) { - if (CstTy->isVectorTy()) - return true; - if (CstTy->isStructTy()) { - for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); - EltIdx < EndEltIdx; ++EltIdx) - if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx))) - return true; - } else if (CstTy->isArrayTy()) - return isConstantUsingVectorTy(CstTy->getArrayElementType()); - return false; -} - -/// Check if the given use (Instruction + OpIdx) of Cst should be converted into -/// a load of a global variable initialized with Cst. -/// A use should be converted if it is legal to do so. -/// For instance, it is not legal to turn the mask operand of a shuffle vector -/// into a load of a global variable. -static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, - unsigned OpIdx) { - // shufflevector instruction expects a const for the mask argument, i.e., the - // third argument. Do not promote this use in that case. - if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2) - return false; - - // extractvalue instruction expects a const idx - if (isa<const ExtractValueInst>(Instr) && OpIdx > 0) - return false; - - // extractvalue instruction expects a const idx - if (isa<const InsertValueInst>(Instr) && OpIdx > 1) - return false; - - if (isa<const AllocaInst>(Instr) && OpIdx > 0) - return false; - - // Alignment argument must be constant - if (isa<const LoadInst>(Instr) && OpIdx > 0) - return false; - - // Alignment argument must be constant - if (isa<const StoreInst>(Instr) && OpIdx > 1) - return false; - - // Index must be constant - if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0) - return false; - - // Personality function and filters must be constant. - // Give up on that instruction. - if (isa<const LandingPadInst>(Instr)) - return false; - - // switch instruction expects constants to compare to - if (isa<const SwitchInst>(Instr)) - return false; - - // Expected address must be a constant - if (isa<const IndirectBrInst>(Instr)) - return false; - - // Do not mess with intrinsic - if (isa<const IntrinsicInst>(Instr)) - return false; - - // Do not mess with inline asm - const CallInst *CI = dyn_cast<const CallInst>(Instr); - if (CI && isa<const InlineAsm>(CI->getCalledValue())) - return false; - - return true; -} - -/// Check if the given Cst should be converted into -/// a load of a global variable initialized with Cst. -/// A constant should be converted if it is likely that the materialization of -/// the constant will be tricky. Thus, we give up on zero or undef values. -/// -/// \todo Currently, accept only vector related types. -/// Also we give up on all simple vector type to keep the existing -/// behavior. Otherwise, we should push here all the check of the lowering of -/// BUILD_VECTOR. By giving up, we lose the potential benefit of merging -/// constant via global merge and the fact that the same constant is stored -/// only once with this method (versus, as many function that uses the constant -/// for the regular approach, even for float). -/// Again, the simplest solution would be to promote every -/// constant and rematerialize them when they are actually cheap to create. -static bool shouldConvert(const Constant *Cst) { - if (isa<const UndefValue>(Cst)) - return false; - - // FIXME: In some cases, it may be interesting to promote in memory - // a zero initialized constant. - // E.g., when the type of Cst require more instructions than the - // adrp/add/load sequence or when this sequence can be shared by several - // instances of Cst. - // Ideally, we could promote this into a global and rematerialize the constant - // when it was a bad idea. - if (Cst->isZeroValue()) - return false; - - if (Stress) - return true; - - // FIXME: see function \todo - if (Cst->getType()->isVectorTy()) - return false; - return isConstantUsingVectorTy(Cst->getType()); -} - -Instruction * -ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) { - // If this user is a phi, the insertion point is in the related - // incoming basic block - PHINode *PhiInst = dyn_cast<PHINode>(*Use); - Instruction *InsertionPoint; - if (PhiInst) - InsertionPoint = - PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator(); - else - InsertionPoint = dyn_cast<Instruction>(*Use); - assert(InsertionPoint && "User is not an instruction!"); - return InsertionPoint; -} - -bool ARM64PromoteConstant::isDominated(Instruction *NewPt, - Value::user_iterator &UseIt, - InsertionPoints &InsertPts) { - - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( - *NewPt->getParent()->getParent()).getDomTree(); - - // Traverse all the existing insertion point and check if one is dominating - // NewPt - for (InsertionPoints::iterator IPI = InsertPts.begin(), - EndIPI = InsertPts.end(); - IPI != EndIPI; ++IPI) { - if (NewPt == IPI->first || DT.dominates(IPI->first, NewPt) || - // When IPI->first is a terminator instruction, DT may think that - // the result is defined on the edge. - // Here we are testing the insertion point, not the definition. - (IPI->first->getParent() != NewPt->getParent() && - DT.dominates(IPI->first->getParent(), NewPt->getParent()))) { - // No need to insert this point - // Record the dominated use - DEBUG(dbgs() << "Insertion point dominated by:\n"); - DEBUG(IPI->first->print(dbgs())); - DEBUG(dbgs() << '\n'); - IPI->second.push_back(UseIt); - return true; - } - } - return false; -} - -bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt, - Value::user_iterator &UseIt, - InsertionPoints &InsertPts) { - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( - *NewPt->getParent()->getParent()).getDomTree(); - BasicBlock *NewBB = NewPt->getParent(); - - // Traverse all the existing insertion point and check if one is dominated by - // NewPt and thus useless or can be combined with NewPt into a common - // dominator - for (InsertionPoints::iterator IPI = InsertPts.begin(), - EndIPI = InsertPts.end(); - IPI != EndIPI; ++IPI) { - BasicBlock *CurBB = IPI->first->getParent(); - if (NewBB == CurBB) { - // Instructions are in the same block. - // By construction, NewPt is dominating the other. - // Indeed, isDominated returned false with the exact same arguments. - DEBUG(dbgs() << "Merge insertion point with:\n"); - DEBUG(IPI->first->print(dbgs())); - DEBUG(dbgs() << "\nat considered insertion point.\n"); - appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts); - return true; - } - - // Look for a common dominator - BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB); - // If none exists, we cannot merge these two points - if (!CommonDominator) - continue; - - if (CommonDominator != NewBB) { - // By construction, the CommonDominator cannot be CurBB - assert(CommonDominator != CurBB && - "Instruction has not been rejected during isDominated check!"); - // Take the last instruction of the CommonDominator as insertion point - NewPt = CommonDominator->getTerminator(); - } - // else, CommonDominator is the block of NewBB, hence NewBB is the last - // possible insertion point in that block - DEBUG(dbgs() << "Merge insertion point with:\n"); - DEBUG(IPI->first->print(dbgs())); - DEBUG(dbgs() << '\n'); - DEBUG(NewPt->print(dbgs())); - DEBUG(dbgs() << '\n'); - appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts); - return true; - } - return false; -} - -void ARM64PromoteConstant::computeInsertionPoints( - Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) { - DEBUG(dbgs() << "** Compute insertion points **\n"); - for (Value::user_iterator UseIt = Val->user_begin(), - EndUseIt = Val->user_end(); - UseIt != EndUseIt; ++UseIt) { - // If the user is not an Instruction, we cannot modify it - if (!isa<Instruction>(*UseIt)) - continue; - - // Filter out uses that should not be converted - if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo())) - continue; - - DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n"); - DEBUG((*UseIt)->print(dbgs())); - DEBUG(dbgs() << '\n'); - - Instruction *InsertionPoint = findInsertionPoint(UseIt); - - DEBUG(dbgs() << "Considered insertion point:\n"); - DEBUG(InsertionPoint->print(dbgs())); - DEBUG(dbgs() << '\n'); - - // Check if the current insertion point is useless, i.e., it is dominated - // by another one. - InsertionPoints &InsertPts = - InsPtsPerFunc[InsertionPoint->getParent()->getParent()]; - if (isDominated(InsertionPoint, UseIt, InsertPts)) - continue; - // This insertion point is useful, check if we can merge some insertion - // point in a common dominator or if NewPt dominates an existing one. - if (tryAndMerge(InsertionPoint, UseIt, InsertPts)) - continue; - - DEBUG(dbgs() << "Keep considered insertion point\n"); - - // It is definitely useful by its own - InsertPts[InsertionPoint].push_back(UseIt); - } -} - -bool -ARM64PromoteConstant::insertDefinitions(Constant *Cst, - InsertionPointsPerFunc &InsPtsPerFunc) { - // We will create one global variable per Module - DenseMap<Module *, GlobalVariable *> ModuleToMergedGV; - bool HasChanged = false; - - // Traverse all insertion points in all the function - for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(), - EndIt = InsPtsPerFunc.end(); - FctToInstPtsIt != EndIt; ++FctToInstPtsIt) { - InsertionPoints &InsertPts = FctToInstPtsIt->second; -// Do more check for debug purposes -#ifndef NDEBUG - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( - *FctToInstPtsIt->first).getDomTree(); -#endif - GlobalVariable *PromotedGV; - assert(!InsertPts.empty() && "Empty uses does not need a definition"); - - Module *M = FctToInstPtsIt->first->getParent(); - DenseMap<Module *, GlobalVariable *>::iterator MapIt = - ModuleToMergedGV.find(M); - if (MapIt == ModuleToMergedGV.end()) { - PromotedGV = new GlobalVariable( - *M, Cst->getType(), true, GlobalValue::InternalLinkage, 0, - "_PromotedConst", 0, GlobalVariable::NotThreadLocal); - PromotedGV->setInitializer(Cst); - ModuleToMergedGV[M] = PromotedGV; - DEBUG(dbgs() << "Global replacement: "); - DEBUG(PromotedGV->print(dbgs())); - DEBUG(dbgs() << '\n'); - ++NumPromoted; - HasChanged = true; - } else { - PromotedGV = MapIt->second; - } - - for (InsertionPoints::iterator IPI = InsertPts.begin(), - EndIPI = InsertPts.end(); - IPI != EndIPI; ++IPI) { - // Create the load of the global variable - IRBuilder<> Builder(IPI->first->getParent(), IPI->first); - LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV); - DEBUG(dbgs() << "**********\n"); - DEBUG(dbgs() << "New def: "); - DEBUG(LoadedCst->print(dbgs())); - DEBUG(dbgs() << '\n'); - - // Update the dominated uses - Users &DominatedUsers = IPI->second; - for (Users::iterator UseIt = DominatedUsers.begin(), - EndIt = DominatedUsers.end(); - UseIt != EndIt; ++UseIt) { -#ifndef NDEBUG - assert((DT.dominates(LoadedCst, cast<Instruction>(**UseIt)) || - (isa<PHINode>(**UseIt) && - DT.dominates(LoadedCst, findInsertionPoint(*UseIt)))) && - "Inserted definition does not dominate all its uses!"); -#endif - DEBUG(dbgs() << "Use to update " << UseIt->getOperandNo() << ":"); - DEBUG((*UseIt)->print(dbgs())); - DEBUG(dbgs() << '\n'); - (*UseIt)->setOperand(UseIt->getOperandNo(), LoadedCst); - ++NumPromotedUses; - } - } - } - return HasChanged; -} - -bool ARM64PromoteConstant::computeAndInsertDefinitions(Constant *Val) { - InsertionPointsPerFunc InsertPtsPerFunc; - computeInsertionPoints(Val, InsertPtsPerFunc); - return insertDefinitions(Val, InsertPtsPerFunc); -} - -bool ARM64PromoteConstant::promoteConstant(Constant *Cst) { - assert(Cst && "Given variable is not a valid constant."); - - if (!shouldConvert(Cst)) - return false; - - DEBUG(dbgs() << "******************************\n"); - DEBUG(dbgs() << "Candidate constant: "); - DEBUG(Cst->print(dbgs())); - DEBUG(dbgs() << '\n'); - - return computeAndInsertDefinitions(Cst); -} - -bool ARM64PromoteConstant::runOnFunction(Function &F) { - // Look for instructions using constant vector - // Promote that constant to a global variable. - // Create as few load of this variable as possible and update the uses - // accordingly - bool LocalChange = false; - SmallSet<Constant *, 8> AlreadyChecked; - - for (auto &MBB : F) { - for (auto &MI: MBB) { - // Traverse the operand, looking for constant vectors - // Replace them by a load of a global variable of type constant vector - for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); - OpIdx != EndOpIdx; ++OpIdx) { - Constant *Cst = dyn_cast<Constant>(MI.getOperand(OpIdx)); - // There is no point is promoting global value, they are already global. - // Do not promote constant expression, as they may require some code - // expansion. - if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) && - AlreadyChecked.insert(Cst)) - LocalChange |= promoteConstant(Cst); - } - } - } - return LocalChange; -} diff --git a/lib/Target/ARM64/ARM64RegisterInfo.cpp b/lib/Target/ARM64/ARM64RegisterInfo.cpp deleted file mode 100644 index 4c7fc8a..0000000 --- a/lib/Target/ARM64/ARM64RegisterInfo.cpp +++ /dev/null @@ -1,400 +0,0 @@ -//===- ARM64RegisterInfo.cpp - ARM64 Register Information -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#include "ARM64RegisterInfo.h" -#include "ARM64FrameLowering.h" -#include "ARM64InstrInfo.h" -#include "ARM64Subtarget.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetOptions.h" - -#define GET_REGINFO_TARGET_DESC -#include "ARM64GenRegisterInfo.inc" - -using namespace llvm; - -ARM64RegisterInfo::ARM64RegisterInfo(const ARM64InstrInfo *tii, - const ARM64Subtarget *sti) - : ARM64GenRegisterInfo(ARM64::LR), TII(tii), STI(sti) {} - -const uint16_t * -ARM64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - assert(MF && "Invalid MachineFunction pointer."); - if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) - return CSR_ARM64_AllRegs_SaveList; - else - return CSR_ARM64_AAPCS_SaveList; -} - -const uint32_t * -ARM64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { - if (CC == CallingConv::AnyReg) - return CSR_ARM64_AllRegs_RegMask; - else - return CSR_ARM64_AAPCS_RegMask; -} - -const uint32_t *ARM64RegisterInfo::getTLSCallPreservedMask() const { - if (STI->isTargetDarwin()) - return CSR_ARM64_TLS_Darwin_RegMask; - - assert(STI->isTargetELF() && "only expect Darwin or ELF TLS"); - return CSR_ARM64_TLS_ELF_RegMask; -} - -const uint32_t * -ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { - // This should return a register mask that is the same as that returned by - // getCallPreservedMask but that additionally preserves the register used for - // the first i64 argument (which must also be the register used to return a - // single i64 return value) - // - // In case that the calling convention does not use the same register for - // both, the function should return NULL (does not currently apply) - return CSR_ARM64_AAPCS_ThisReturn_RegMask; -} - -BitVector ARM64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - // FIXME: avoid re-calculating this everytime. - BitVector Reserved(getNumRegs()); - Reserved.set(ARM64::SP); - Reserved.set(ARM64::XZR); - Reserved.set(ARM64::WSP); - Reserved.set(ARM64::WZR); - - if (TFI->hasFP(MF) || STI->isTargetDarwin()) { - Reserved.set(ARM64::FP); - Reserved.set(ARM64::W29); - } - - if (STI->isTargetDarwin()) { - Reserved.set(ARM64::X18); // Platform register - Reserved.set(ARM64::W18); - } - - if (hasBasePointer(MF)) { - Reserved.set(ARM64::X19); - Reserved.set(ARM64::W19); - } - - return Reserved; -} - -bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF, - unsigned Reg) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - switch (Reg) { - default: - break; - case ARM64::SP: - case ARM64::XZR: - case ARM64::WSP: - case ARM64::WZR: - return true; - case ARM64::X18: - case ARM64::W18: - return STI->isTargetDarwin(); - case ARM64::FP: - case ARM64::W29: - return TFI->hasFP(MF) || STI->isTargetDarwin(); - case ARM64::W19: - case ARM64::X19: - return hasBasePointer(MF); - } - - return false; -} - -const TargetRegisterClass * -ARM64RegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { - return &ARM64::GPR64RegClass; -} - -const TargetRegisterClass * -ARM64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { - if (RC == &ARM64::CCRRegClass) - return NULL; // Can't copy CPSR. - return RC; -} - -unsigned ARM64RegisterInfo::getBaseRegister() const { return ARM64::X19; } - -bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - // In the presence of variable sized objects, if the fixed stack size is - // large enough that referencing from the FP won't result in things being - // in range relatively often, we can use a base pointer to allow access - // from the other direction like the SP normally works. - if (MFI->hasVarSizedObjects()) { - // Conservatively estimate whether the negative offset from the frame - // pointer will be sufficient to reach. If a function has a smallish - // frame, it's less likely to have lots of spills and callee saved - // space, so it's all more likely to be within range of the frame pointer. - // If it's wrong, we'll materialize the constant and still get to the - // object; it's just suboptimal. Negative offsets use the unscaled - // load/store instructions, which have a 9-bit signed immediate. - if (MFI->getLocalFrameSize() < 256) - return false; - return true; - } - - return false; -} - -unsigned ARM64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - return TFI->hasFP(MF) ? ARM64::FP : ARM64::SP; -} - -bool -ARM64RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return true; -} - -bool ARM64RegisterInfo::requiresVirtualBaseRegisters(const MachineFunction &MF) - const { - return true; -} - -bool -ARM64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - // ARM64FrameLowering::resolveFrameIndexReference() can always fall back - // to the stack pointer, so only put the emergency spill slot next to the - // FP when there's no better way to access it (SP or base pointer). - return MFI->hasVarSizedObjects() && !hasBasePointer(MF); -} - -bool ARM64RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) - const { - return true; -} - -bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - // Only consider eliminating leaf frames. - if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) && - MFI->adjustsStack())) - return true; - return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); -} - -/// needsFrameBaseReg - Returns true if the instruction's frame index -/// reference would be better served by a base register other than FP -/// or SP. Used by LocalStackFrameAllocation to determine which frame index -/// references it should create new base registers for. -bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, - int64_t Offset) const { - for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) - assert(i < MI->getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - - // It's the load/store FI references that cause issues, as it can be difficult - // to materialize the offset if it won't fit in the literal field. Estimate - // based on the size of the local frame and some conservative assumptions - // about the rest of the stack frame (note, this is pre-regalloc, so - // we don't know everything for certain yet) whether this offset is likely - // to be out of range of the immediate. Return true if so. - - // We only generate virtual base registers for loads and stores, so - // return false for everything else. - if (!MI->mayLoad() && !MI->mayStore()) - return false; - - // Without a virtual base register, if the function has variable sized - // objects, all fixed-size local references will be via the frame pointer, - // Approximate the offset and see if it's legal for the instruction. - // Note that the incoming offset is based on the SP value at function entry, - // so it'll be negative. - MachineFunction &MF = *MI->getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Estimate an offset from the frame pointer. - // Conservatively assume all GPR callee-saved registers get pushed. - // FP, LR, X19-X28, D8-D15. 64-bits each. - int64_t FPOffset = Offset - 16 * 20; - // Estimate an offset from the stack pointer. - // The incoming offset is relating to the SP at the start of the function, - // but when we access the local it'll be relative to the SP after local - // allocation, so adjust our SP-relative offset by that allocation size. - Offset += MFI->getLocalFrameSize(); - // Assume that we'll have at least some spill slots allocated. - // FIXME: This is a total SWAG number. We should run some statistics - // and pick a real one. - Offset += 128; // 128 bytes of spill slots - - // If there is a frame pointer, try using it. - // The FP is only available if there is no dynamic realignment. We - // don't know for sure yet whether we'll need that, so we guess based - // on whether there are any local variables that would trigger it. - if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset)) - return false; - - // If we can reference via the stack pointer or base pointer, try that. - // FIXME: This (and the code that resolves the references) can be improved - // to only disallow SP relative references in the live range of - // the VLA(s). In practice, it's unclear how much difference that - // would make, but it may be worth doing. - if (isFrameOffsetLegal(MI, Offset)) - return false; - - // The offset likely isn't legal; we want to allocate a virtual base register. - return true; -} - -bool ARM64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, - int64_t Offset) const { - assert(Offset <= INT_MAX && "Offset too big to fit in int."); - assert(MI && "Unable to get the legal offset for nil instruction."); - int SaveOffset = Offset; - return isARM64FrameOffsetLegal(*MI, SaveOffset) & ARM64FrameOffsetIsLegal; -} - -/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx -/// at the beginning of the basic block. -void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, - unsigned BaseReg, - int FrameIdx, - int64_t Offset) const { - MachineBasicBlock::iterator Ins = MBB->begin(); - DebugLoc DL; // Defaults to "unknown" - if (Ins != MBB->end()) - DL = Ins->getDebugLoc(); - - const MCInstrDesc &MCID = TII->get(ARM64::ADDXri); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const MachineFunction &MF = *MBB->getParent(); - MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF)); - unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0); - - BuildMI(*MBB, Ins, DL, MCID, BaseReg) - .addFrameIndex(FrameIdx) - .addImm(Offset) - .addImm(Shifter); -} - -void ARM64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, - int64_t Offset) const { - int Off = Offset; // ARM doesn't need the general 64-bit offsets - unsigned i = 0; - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - bool Done = rewriteARM64FrameIndex(MI, i, BaseReg, Off, TII); - assert(Done && "Unable to resolve frame index!"); - (void)Done; -} - -void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { - assert(SPAdj == 0 && "Unexpected"); - - MachineInstr &MI = *II; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - const ARM64FrameLowering *TFI = static_cast<const ARM64FrameLowering *>( - MF.getTarget().getFrameLowering()); - - int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - unsigned FrameReg; - int Offset; - - // Special handling of dbg_value, stackmap and patchpoint instructions. - if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP || - MI.getOpcode() == TargetOpcode::PATCHPOINT) { - Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, - /*PreferFP=*/true); - Offset += MI.getOperand(FIOperandNum + 1).getImm(); - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - return; - } - - // Modify MI as necessary to handle as much of 'Offset' as possible - Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg); - if (rewriteARM64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) - return; - - assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) && - "Emergency spill slot is out of reach"); - - // If we get here, the immediate doesn't fit into the instruction. We folded - // as much as possible above. Handle the rest, providing a register that is - // SP+LargeImm. - unsigned ScratchReg = - MF.getRegInfo().createVirtualRegister(&ARM64::GPR64RegClass); - emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII); - MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true); -} - -namespace llvm { - -unsigned ARM64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - switch (RC->getID()) { - default: - return 0; - case ARM64::GPR32RegClassID: - case ARM64::GPR32spRegClassID: - case ARM64::GPR32allRegClassID: - case ARM64::GPR64spRegClassID: - case ARM64::GPR64allRegClassID: - case ARM64::GPR64RegClassID: - case ARM64::GPR32commonRegClassID: - case ARM64::GPR64commonRegClassID: - return 32 - 1 // XZR/SP - - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP - - STI->isTargetDarwin() // X18 reserved as platform register - - hasBasePointer(MF); // X19 - case ARM64::FPR8RegClassID: - case ARM64::FPR16RegClassID: - case ARM64::FPR32RegClassID: - case ARM64::FPR64RegClassID: - case ARM64::FPR128RegClassID: - return 32; - - case ARM64::DDRegClassID: - case ARM64::DDDRegClassID: - case ARM64::DDDDRegClassID: - case ARM64::QQRegClassID: - case ARM64::QQQRegClassID: - case ARM64::QQQQRegClassID: - return 32; - - case ARM64::FPR128_loRegClassID: - return 16; - } -} - -} // namespace llvm diff --git a/lib/Target/ARM64/ARM64RegisterInfo.h b/lib/Target/ARM64/ARM64RegisterInfo.h deleted file mode 100644 index 31d9242..0000000 --- a/lib/Target/ARM64/ARM64RegisterInfo.h +++ /dev/null @@ -1,101 +0,0 @@ -//===- ARM64RegisterInfo.h - ARM64 Register Information Impl ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 implementation of the MRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ARM64REGISTERINFO_H -#define LLVM_TARGET_ARM64REGISTERINFO_H - -#define GET_REGINFO_HEADER -#include "ARM64GenRegisterInfo.inc" - -namespace llvm { - -class ARM64InstrInfo; -class ARM64Subtarget; -class MachineFunction; -class RegScavenger; -class TargetRegisterClass; - -struct ARM64RegisterInfo : public ARM64GenRegisterInfo { -private: - const ARM64InstrInfo *TII; - const ARM64Subtarget *STI; - -public: - ARM64RegisterInfo(const ARM64InstrInfo *tii, const ARM64Subtarget *sti); - - bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; - - /// Code Generation virtual methods... - const uint16_t * - getCalleeSavedRegs(const MachineFunction *MF = 0) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; - - unsigned getCSRFirstUseCost() const { - // The cost will be compared against BlockFrequency where entry has the - // value of 1 << 14. A value of 5 will choose to spill or split really - // cold path instead of using a callee-saved register. - return 5; - } - - // Calls involved in thread-local variable lookup save more registers than - // normal calls, so they need a different mask to represent this. - const uint32_t *getTLSCallPreservedMask() const; - - /// getThisReturnPreservedMask - Returns a call preserved mask specific to the - /// case that 'returned' is on an i64 first argument if the calling convention - /// is one that can (partially) model this attribute with a preserved mask - /// (i.e. it is a calling convention that uses the same register for the first - /// i64 argument and an i64 return value) - /// - /// Should return NULL in the case that the calling convention does not have - /// this property - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; - - BitVector getReservedRegs(const MachineFunction &MF) const override; - const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override; - const TargetRegisterClass * - getCrossCopyRegClass(const TargetRegisterClass *RC) const override; - - bool requiresRegisterScavenging(const MachineFunction &MF) const override; - bool useFPForScavengingIndex(const MachineFunction &MF) const override; - bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; - - bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; - bool isFrameOffsetLegal(const MachineInstr *MI, - int64_t Offset) const override; - void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, - int FrameIdx, - int64_t Offset) const override; - void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, - int64_t Offset) const override; - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - unsigned FIOperandNum, - RegScavenger *RS = NULL) const override; - bool cannotEliminateFrame(const MachineFunction &MF) const; - - bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override; - bool hasBasePointer(const MachineFunction &MF) const; - unsigned getBaseRegister() const; - - // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const override; - - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const override; -}; - -} // end namespace llvm - -#endif // LLVM_TARGET_ARM64REGISTERINFO_H diff --git a/lib/Target/ARM64/ARM64RegisterInfo.td b/lib/Target/ARM64/ARM64RegisterInfo.td deleted file mode 100644 index 96001c5..0000000 --- a/lib/Target/ARM64/ARM64RegisterInfo.td +++ /dev/null @@ -1,561 +0,0 @@ -//===- ARM64RegisterInfo.td - Describe the ARM64 Regisers --*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - - -class ARM64Reg<bits<16> enc, string n, list<Register> subregs = [], - list<string> altNames = []> - : Register<n, altNames> { - let HWEncoding = enc; - let Namespace = "ARM64"; - let SubRegs = subregs; -} - -let Namespace = "ARM64" in { - def sub_32 : SubRegIndex<32>; - - def bsub : SubRegIndex<8>; - def hsub : SubRegIndex<16>; - def ssub : SubRegIndex<32>; - def dsub : SubRegIndex<32>; - def qhisub : SubRegIndex<64>; - def qsub : SubRegIndex<64>; - // Note: Code depends on these having consecutive numbers - def dsub0 : SubRegIndex<64>; - def dsub1 : SubRegIndex<64>; - def dsub2 : SubRegIndex<64>; - def dsub3 : SubRegIndex<64>; - // Note: Code depends on these having consecutive numbers - def qsub0 : SubRegIndex<128>; - def qsub1 : SubRegIndex<128>; - def qsub2 : SubRegIndex<128>; - def qsub3 : SubRegIndex<128>; -} - -let Namespace = "ARM64" in { - def vreg : RegAltNameIndex; - def vlist1 : RegAltNameIndex; -} - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// -def W0 : ARM64Reg<0, "w0" >, DwarfRegNum<[0]>; -def W1 : ARM64Reg<1, "w1" >, DwarfRegNum<[1]>; -def W2 : ARM64Reg<2, "w2" >, DwarfRegNum<[2]>; -def W3 : ARM64Reg<3, "w3" >, DwarfRegNum<[3]>; -def W4 : ARM64Reg<4, "w4" >, DwarfRegNum<[4]>; -def W5 : ARM64Reg<5, "w5" >, DwarfRegNum<[5]>; -def W6 : ARM64Reg<6, "w6" >, DwarfRegNum<[6]>; -def W7 : ARM64Reg<7, "w7" >, DwarfRegNum<[7]>; -def W8 : ARM64Reg<8, "w8" >, DwarfRegNum<[8]>; -def W9 : ARM64Reg<9, "w9" >, DwarfRegNum<[9]>; -def W10 : ARM64Reg<10, "w10">, DwarfRegNum<[10]>; -def W11 : ARM64Reg<11, "w11">, DwarfRegNum<[11]>; -def W12 : ARM64Reg<12, "w12">, DwarfRegNum<[12]>; -def W13 : ARM64Reg<13, "w13">, DwarfRegNum<[13]>; -def W14 : ARM64Reg<14, "w14">, DwarfRegNum<[14]>; -def W15 : ARM64Reg<15, "w15">, DwarfRegNum<[15]>; -def W16 : ARM64Reg<16, "w16">, DwarfRegNum<[16]>; -def W17 : ARM64Reg<17, "w17">, DwarfRegNum<[17]>; -def W18 : ARM64Reg<18, "w18">, DwarfRegNum<[18]>; -def W19 : ARM64Reg<19, "w19">, DwarfRegNum<[19]>; -def W20 : ARM64Reg<20, "w20">, DwarfRegNum<[20]>; -def W21 : ARM64Reg<21, "w21">, DwarfRegNum<[21]>; -def W22 : ARM64Reg<22, "w22">, DwarfRegNum<[22]>; -def W23 : ARM64Reg<23, "w23">, DwarfRegNum<[23]>; -def W24 : ARM64Reg<24, "w24">, DwarfRegNum<[24]>; -def W25 : ARM64Reg<25, "w25">, DwarfRegNum<[25]>; -def W26 : ARM64Reg<26, "w26">, DwarfRegNum<[26]>; -def W27 : ARM64Reg<27, "w27">, DwarfRegNum<[27]>; -def W28 : ARM64Reg<28, "w28">, DwarfRegNum<[28]>; -def W29 : ARM64Reg<29, "w29">, DwarfRegNum<[29]>; -def W30 : ARM64Reg<30, "w30">, DwarfRegNum<[30]>; -def WSP : ARM64Reg<31, "wsp">, DwarfRegNum<[31]>; -def WZR : ARM64Reg<31, "wzr">, DwarfRegAlias<WSP>; - -let SubRegIndices = [sub_32] in { -def X0 : ARM64Reg<0, "x0", [W0]>, DwarfRegAlias<W0>; -def X1 : ARM64Reg<1, "x1", [W1]>, DwarfRegAlias<W1>; -def X2 : ARM64Reg<2, "x2", [W2]>, DwarfRegAlias<W2>; -def X3 : ARM64Reg<3, "x3", [W3]>, DwarfRegAlias<W3>; -def X4 : ARM64Reg<4, "x4", [W4]>, DwarfRegAlias<W4>; -def X5 : ARM64Reg<5, "x5", [W5]>, DwarfRegAlias<W5>; -def X6 : ARM64Reg<6, "x6", [W6]>, DwarfRegAlias<W6>; -def X7 : ARM64Reg<7, "x7", [W7]>, DwarfRegAlias<W7>; -def X8 : ARM64Reg<8, "x8", [W8]>, DwarfRegAlias<W8>; -def X9 : ARM64Reg<9, "x9", [W9]>, DwarfRegAlias<W9>; -def X10 : ARM64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>; -def X11 : ARM64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>; -def X12 : ARM64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>; -def X13 : ARM64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>; -def X14 : ARM64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>; -def X15 : ARM64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>; -def X16 : ARM64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>; -def X17 : ARM64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>; -def X18 : ARM64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>; -def X19 : ARM64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>; -def X20 : ARM64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>; -def X21 : ARM64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>; -def X22 : ARM64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>; -def X23 : ARM64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>; -def X24 : ARM64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>; -def X25 : ARM64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>; -def X26 : ARM64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>; -def X27 : ARM64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>; -def X28 : ARM64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>; -def FP : ARM64Reg<29, "fp", [W29]>, DwarfRegAlias<W29>; -def LR : ARM64Reg<30, "lr", [W30]>, DwarfRegAlias<W30>; -def SP : ARM64Reg<31, "sp", [WSP]>, DwarfRegAlias<WSP>; -def XZR : ARM64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>; -} - -// Condition code register. -def CPSR : ARM64Reg<0, "cpsr">; - -// GPR register classes with the intersections of GPR32/GPR32sp and -// GPR64/GPR64sp for use by the coalescer. -def GPR32common : RegisterClass<"ARM64", [i32], 32, (sequence "W%u", 0, 30)> { - let AltOrders = [(rotl GPR32common, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64common : RegisterClass<"ARM64", [i64], 64, - (add (sequence "X%u", 0, 28), FP, LR)> { - let AltOrders = [(rotl GPR64common, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -// GPR register classes which exclude SP/WSP. -def GPR32 : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR)> { - let AltOrders = [(rotl GPR32, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64 : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR)> { - let AltOrders = [(rotl GPR64, 8)]; - let AltOrderSelect = [{ return 1; }]; -} - -// GPR register classes which include SP/WSP. -def GPR32sp : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WSP)> { - let AltOrders = [(rotl GPR32sp, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64sp : RegisterClass<"ARM64", [i64], 64, (add GPR64common, SP)> { - let AltOrders = [(rotl GPR64sp, 8)]; - let AltOrderSelect = [{ return 1; }]; -} - -// GPR register classes which include WZR/XZR AND SP/WSP. This is not a -// constraint used by any instructions, it is used as a common super-class. -def GPR32all : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR, WSP)>; -def GPR64all : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR, SP)>; - -// For tail calls, we can't use callee-saved registers, as they are restored -// to the saved value before the tail call, which would clobber a call address. -// This is for indirect tail calls to store the address of the destination. -def tcGPR64 : RegisterClass<"ARM64", [i64], 64, (sub GPR64common, X19, X20, X21, - X22, X23, X24, X25, X26, - X27, X28)>; - -// GPR register classes for post increment ammount of vector load/store that -// has alternate printing when Rm=31 and prints a constant immediate value -// equal to the total number of bytes transferred. -def GPR64pi1 : RegisterOperand<GPR64, "printPostIncOperand1">; -def GPR64pi2 : RegisterOperand<GPR64, "printPostIncOperand2">; -def GPR64pi3 : RegisterOperand<GPR64, "printPostIncOperand3">; -def GPR64pi4 : RegisterOperand<GPR64, "printPostIncOperand4">; -def GPR64pi6 : RegisterOperand<GPR64, "printPostIncOperand6">; -def GPR64pi8 : RegisterOperand<GPR64, "printPostIncOperand8">; -def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand12">; -def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand16">; -def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand24">; -def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand32">; -def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand48">; -def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand64">; - -// Condition code regclass. -def CCR : RegisterClass<"ARM64", [i32], 32, (add CPSR)> { - let CopyCost = -1; // Don't allow copying of status registers. - - // CCR is not allocatable. - let isAllocatable = 0; -} - -//===----------------------------------------------------------------------===// -// Floating Point Scalar Registers -//===----------------------------------------------------------------------===// - -def B0 : ARM64Reg<0, "b0">, DwarfRegNum<[64]>; -def B1 : ARM64Reg<1, "b1">, DwarfRegNum<[65]>; -def B2 : ARM64Reg<2, "b2">, DwarfRegNum<[66]>; -def B3 : ARM64Reg<3, "b3">, DwarfRegNum<[67]>; -def B4 : ARM64Reg<4, "b4">, DwarfRegNum<[68]>; -def B5 : ARM64Reg<5, "b5">, DwarfRegNum<[69]>; -def B6 : ARM64Reg<6, "b6">, DwarfRegNum<[70]>; -def B7 : ARM64Reg<7, "b7">, DwarfRegNum<[71]>; -def B8 : ARM64Reg<8, "b8">, DwarfRegNum<[72]>; -def B9 : ARM64Reg<9, "b9">, DwarfRegNum<[73]>; -def B10 : ARM64Reg<10, "b10">, DwarfRegNum<[74]>; -def B11 : ARM64Reg<11, "b11">, DwarfRegNum<[75]>; -def B12 : ARM64Reg<12, "b12">, DwarfRegNum<[76]>; -def B13 : ARM64Reg<13, "b13">, DwarfRegNum<[77]>; -def B14 : ARM64Reg<14, "b14">, DwarfRegNum<[78]>; -def B15 : ARM64Reg<15, "b15">, DwarfRegNum<[79]>; -def B16 : ARM64Reg<16, "b16">, DwarfRegNum<[80]>; -def B17 : ARM64Reg<17, "b17">, DwarfRegNum<[81]>; -def B18 : ARM64Reg<18, "b18">, DwarfRegNum<[82]>; -def B19 : ARM64Reg<19, "b19">, DwarfRegNum<[83]>; -def B20 : ARM64Reg<20, "b20">, DwarfRegNum<[84]>; -def B21 : ARM64Reg<21, "b21">, DwarfRegNum<[85]>; -def B22 : ARM64Reg<22, "b22">, DwarfRegNum<[86]>; -def B23 : ARM64Reg<23, "b23">, DwarfRegNum<[87]>; -def B24 : ARM64Reg<24, "b24">, DwarfRegNum<[88]>; -def B25 : ARM64Reg<25, "b25">, DwarfRegNum<[89]>; -def B26 : ARM64Reg<26, "b26">, DwarfRegNum<[90]>; -def B27 : ARM64Reg<27, "b27">, DwarfRegNum<[91]>; -def B28 : ARM64Reg<28, "b28">, DwarfRegNum<[92]>; -def B29 : ARM64Reg<29, "b29">, DwarfRegNum<[93]>; -def B30 : ARM64Reg<30, "b30">, DwarfRegNum<[94]>; -def B31 : ARM64Reg<31, "b31">, DwarfRegNum<[95]>; - -let SubRegIndices = [bsub] in { -def H0 : ARM64Reg<0, "h0", [B0]>, DwarfRegAlias<B0>; -def H1 : ARM64Reg<1, "h1", [B1]>, DwarfRegAlias<B1>; -def H2 : ARM64Reg<2, "h2", [B2]>, DwarfRegAlias<B2>; -def H3 : ARM64Reg<3, "h3", [B3]>, DwarfRegAlias<B3>; -def H4 : ARM64Reg<4, "h4", [B4]>, DwarfRegAlias<B4>; -def H5 : ARM64Reg<5, "h5", [B5]>, DwarfRegAlias<B5>; -def H6 : ARM64Reg<6, "h6", [B6]>, DwarfRegAlias<B6>; -def H7 : ARM64Reg<7, "h7", [B7]>, DwarfRegAlias<B7>; -def H8 : ARM64Reg<8, "h8", [B8]>, DwarfRegAlias<B8>; -def H9 : ARM64Reg<9, "h9", [B9]>, DwarfRegAlias<B9>; -def H10 : ARM64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>; -def H11 : ARM64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>; -def H12 : ARM64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>; -def H13 : ARM64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>; -def H14 : ARM64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>; -def H15 : ARM64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>; -def H16 : ARM64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>; -def H17 : ARM64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>; -def H18 : ARM64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>; -def H19 : ARM64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>; -def H20 : ARM64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>; -def H21 : ARM64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>; -def H22 : ARM64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>; -def H23 : ARM64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>; -def H24 : ARM64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>; -def H25 : ARM64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>; -def H26 : ARM64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>; -def H27 : ARM64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>; -def H28 : ARM64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>; -def H29 : ARM64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>; -def H30 : ARM64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>; -def H31 : ARM64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>; -} - -let SubRegIndices = [hsub] in { -def S0 : ARM64Reg<0, "s0", [H0]>, DwarfRegAlias<B0>; -def S1 : ARM64Reg<1, "s1", [H1]>, DwarfRegAlias<B1>; -def S2 : ARM64Reg<2, "s2", [H2]>, DwarfRegAlias<B2>; -def S3 : ARM64Reg<3, "s3", [H3]>, DwarfRegAlias<B3>; -def S4 : ARM64Reg<4, "s4", [H4]>, DwarfRegAlias<B4>; -def S5 : ARM64Reg<5, "s5", [H5]>, DwarfRegAlias<B5>; -def S6 : ARM64Reg<6, "s6", [H6]>, DwarfRegAlias<B6>; -def S7 : ARM64Reg<7, "s7", [H7]>, DwarfRegAlias<B7>; -def S8 : ARM64Reg<8, "s8", [H8]>, DwarfRegAlias<B8>; -def S9 : ARM64Reg<9, "s9", [H9]>, DwarfRegAlias<B9>; -def S10 : ARM64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>; -def S11 : ARM64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>; -def S12 : ARM64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>; -def S13 : ARM64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>; -def S14 : ARM64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>; -def S15 : ARM64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>; -def S16 : ARM64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>; -def S17 : ARM64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>; -def S18 : ARM64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>; -def S19 : ARM64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>; -def S20 : ARM64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>; -def S21 : ARM64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>; -def S22 : ARM64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>; -def S23 : ARM64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>; -def S24 : ARM64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>; -def S25 : ARM64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>; -def S26 : ARM64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>; -def S27 : ARM64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>; -def S28 : ARM64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>; -def S29 : ARM64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>; -def S30 : ARM64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>; -def S31 : ARM64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>; -} - -let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in { -def D0 : ARM64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>; -def D1 : ARM64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>; -def D2 : ARM64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>; -def D3 : ARM64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>; -def D4 : ARM64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>; -def D5 : ARM64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>; -def D6 : ARM64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>; -def D7 : ARM64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>; -def D8 : ARM64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>; -def D9 : ARM64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>; -def D10 : ARM64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>; -def D11 : ARM64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>; -def D12 : ARM64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>; -def D13 : ARM64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>; -def D14 : ARM64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>; -def D15 : ARM64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>; -def D16 : ARM64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>; -def D17 : ARM64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>; -def D18 : ARM64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>; -def D19 : ARM64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>; -def D20 : ARM64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>; -def D21 : ARM64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>; -def D22 : ARM64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>; -def D23 : ARM64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>; -def D24 : ARM64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>; -def D25 : ARM64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>; -def D26 : ARM64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>; -def D27 : ARM64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>; -def D28 : ARM64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>; -def D29 : ARM64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>; -def D30 : ARM64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>; -def D31 : ARM64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>; -} - -let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in { -def Q0 : ARM64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>; -def Q1 : ARM64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>; -def Q2 : ARM64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>; -def Q3 : ARM64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>; -def Q4 : ARM64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>; -def Q5 : ARM64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>; -def Q6 : ARM64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>; -def Q7 : ARM64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>; -def Q8 : ARM64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>; -def Q9 : ARM64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>; -def Q10 : ARM64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>; -def Q11 : ARM64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>; -def Q12 : ARM64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>; -def Q13 : ARM64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>; -def Q14 : ARM64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>; -def Q15 : ARM64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>; -def Q16 : ARM64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>; -def Q17 : ARM64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>; -def Q18 : ARM64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>; -def Q19 : ARM64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>; -def Q20 : ARM64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>; -def Q21 : ARM64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>; -def Q22 : ARM64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>; -def Q23 : ARM64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>; -def Q24 : ARM64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>; -def Q25 : ARM64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>; -def Q26 : ARM64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>; -def Q27 : ARM64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>; -def Q28 : ARM64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>; -def Q29 : ARM64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>; -def Q30 : ARM64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>; -def Q31 : ARM64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>; -} - -def FPR8 : RegisterClass<"ARM64", [untyped], 8, (sequence "B%u", 0, 31)> { - let Size = 8; -} -def FPR16 : RegisterClass<"ARM64", [untyped], 16, (sequence "H%u", 0, 31)> { - let Size = 16; -} -def FPR32 : RegisterClass<"ARM64", [f32, i32], 32,(sequence "S%u", 0, 31)>; -def FPR64 : RegisterClass<"ARM64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32, - v1i64], - 64, (sequence "D%u", 0, 31)>; -// We don't (yet) have an f128 legal type, so don't use that here. We -// normalize 128-bit vectors to v2f64 for arg passing and such, so use -// that here. -def FPR128 : RegisterClass<"ARM64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128], - 128, (sequence "Q%u", 0, 31)>; - -// The lower 16 vector registers. Some instructions can only take registers -// in this range. -def FPR128_lo : RegisterClass<"ARM64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (trunc FPR128, 16)>; - -// Pairs, triples, and quads of 64-bit vector registers. -def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>; -def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2)]>; -def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2), (rotl FPR64, 3)]>; -def DD : RegisterClass<"ARM64", [untyped], 64, (add DSeqPairs)> { - let Size = 128; -} -def DDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqTriples)> { - let Size = 196; -} -def DDDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqQuads)> { - let Size = 256; -} - -// Pairs, triples, and quads of 128-bit vector registers. -def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>; -def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2)]>; -def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2), (rotl FPR128, 3)]>; -def QQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqPairs)> { - let Size = 256; -} -def QQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqTriples)> { - let Size = 384; -} -def QQQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqQuads)> { - let Size = 512; -} - - -// Vector operand versions of the FP registers. Alternate name printing and -// assmebler matching. -def VectorRegAsmOperand : AsmOperandClass { let Name = "VectorReg"; } -let ParserMatchClass = VectorRegAsmOperand in { -def V64 : RegisterOperand<FPR64, "printVRegOperand">; -def V128 : RegisterOperand<FPR128, "printVRegOperand">; -def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand">; -} - -class TypedVecListAsmOperand<int count, int regsize, int lanes, string kind> - : AsmOperandClass { - let Name = "TypedVectorList" # count # "_" # lanes # kind; - - let PredicateMethod - = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>"; - let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">"; -} - -class TypedVecListRegOperand<RegisterClass Reg, int lanes, string kind> - : RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '" - # kind # "'>">; - -multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> { - // With implicit types (probably on instruction instead). E.g. { v0, v1 } - def _64AsmOperand : AsmOperandClass { - let Name = NAME # "64"; - let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">"; - let RenderMethod = "addVectorList64Operands<" # count # ">"; - } - - def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand"); - } - - def _128AsmOperand : AsmOperandClass { - let Name = NAME # "128"; - let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">"; - let RenderMethod = "addVectorList128Operands<" # count # ">"; - } - - def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand"); - } - - // 64-bit register lists with explicit type. - - // { v0.8b, v1.8b } - def _8bAsmOperand : TypedVecListAsmOperand<count, 64, 8, "b">; - def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand"); - } - - // { v0.4h, v1.4h } - def _4hAsmOperand : TypedVecListAsmOperand<count, 64, 4, "h">; - def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand"); - } - - // { v0.2s, v1.2s } - def _2sAsmOperand : TypedVecListAsmOperand<count, 64, 2, "s">; - def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand"); - } - - // { v0.1d, v1.1d } - def _1dAsmOperand : TypedVecListAsmOperand<count, 64, 1, "d">; - def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand"); - } - - // 128-bit register lists with explicit type - - // { v0.16b, v1.16b } - def _16bAsmOperand : TypedVecListAsmOperand<count, 128, 16, "b">; - def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand"); - } - - // { v0.8h, v1.8h } - def _8hAsmOperand : TypedVecListAsmOperand<count, 128, 8, "h">; - def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand"); - } - - // { v0.4s, v1.4s } - def _4sAsmOperand : TypedVecListAsmOperand<count, 128, 4, "s">; - def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand"); - } - - // { v0.2d, v1.2d } - def _2dAsmOperand : TypedVecListAsmOperand<count, 128, 2, "d">; - def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand"); - } - - // { v0.b, v1.b } - def _bAsmOperand : TypedVecListAsmOperand<count, 128, 0, "b">; - def "b" : TypedVecListRegOperand<Reg128, 0, "b"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand"); - } - - // { v0.h, v1.h } - def _hAsmOperand : TypedVecListAsmOperand<count, 128, 0, "h">; - def "h" : TypedVecListRegOperand<Reg128, 0, "h"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand"); - } - - // { v0.s, v1.s } - def _sAsmOperand : TypedVecListAsmOperand<count, 128, 0, "s">; - def "s" : TypedVecListRegOperand<Reg128, 0, "s"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand"); - } - - // { v0.d, v1.d } - def _dAsmOperand : TypedVecListAsmOperand<count, 128, 0, "d">; - def "d" : TypedVecListRegOperand<Reg128, 0, "d"> { - let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand"); - } - - -} - -defm VecListOne : VectorList<1, FPR64, FPR128>; -defm VecListTwo : VectorList<2, DD, QQ>; -defm VecListThree : VectorList<3, DDD, QQQ>; -defm VecListFour : VectorList<4, DDDD, QQQQ>; - - -// Register operand versions of the scalar FP registers. -def FPR16Op : RegisterOperand<FPR16, "printOperand">; -def FPR32Op : RegisterOperand<FPR32, "printOperand">; -def FPR64Op : RegisterOperand<FPR64, "printOperand">; -def FPR128Op : RegisterOperand<FPR128, "printOperand">; diff --git a/lib/Target/ARM64/ARM64SchedCyclone.td b/lib/Target/ARM64/ARM64SchedCyclone.td deleted file mode 100644 index 65c68b3..0000000 --- a/lib/Target/ARM64/ARM64SchedCyclone.td +++ /dev/null @@ -1,852 +0,0 @@ -//=- ARMSchedCyclone.td - ARM64 Cyclone Scheduling Defs ------*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for ARM64 Cyclone to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -def CycloneModel : SchedMachineModel { - let IssueWidth = 6; // 6 micro-ops are dispatched per cycle. - let MicroOpBufferSize = 192; // Based on the reorder buffer. - let LoadLatency = 4; // Optimistic load latency. - let MispredictPenalty = 16; // 14-19 cycles are typical. -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on Cyclone. - -// 4 integer pipes -def CyUnitI : ProcResource<4> { - let BufferSize = 48; -} - -// 2 branch units: I[0..1] -def CyUnitB : ProcResource<2> { - let Super = CyUnitI; - let BufferSize = 24; -} - -// 1 indirect-branch unit: I[0] -def CyUnitBR : ProcResource<1> { - let Super = CyUnitB; -} - -// 2 shifter pipes: I[2..3] -// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI -def CyUnitIS : ProcResource<2> { - let Super = CyUnitI; - let BufferSize = 24; -} - -// 1 mul pipe: I[0] -def CyUnitIM : ProcResource<1> { - let Super = CyUnitBR; - let BufferSize = 32; -} - -// 1 div pipe: I[1] -def CyUnitID : ProcResource<1> { - let Super = CyUnitB; - let BufferSize = 16; -} - -// 1 integer division unit. This is driven by the ID pipe, but only -// consumes the pipe for one cycle at issue and another cycle at writeback. -def CyUnitIntDiv : ProcResource<1>; - -// 2 ld/st pipes. -def CyUnitLS : ProcResource<2> { - let BufferSize = 28; -} - -// 3 fp/vector pipes. -def CyUnitV : ProcResource<3> { - let BufferSize = 48; -} -// 2 fp/vector arithmetic and multiply pipes: V[0-1] -def CyUnitVM : ProcResource<2> { - let Super = CyUnitV; - let BufferSize = 32; -} -// 1 fp/vector division/sqrt pipe: V[2] -def CyUnitVD : ProcResource<1> { - let Super = CyUnitV; - let BufferSize = 16; -} -// 1 fp compare pipe: V[0] -def CyUnitVC : ProcResource<1> { - let Super = CyUnitVM; - let BufferSize = 16; -} - -// 2 fp division/square-root units. These are driven by the VD pipe, -// but only consume the pipe for one cycle at issue and a cycle at writeback. -def CyUnitFloatDiv : ProcResource<2>; - -//===----------------------------------------------------------------------===// -// Define scheduler read/write resources and latency on Cyclone. -// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1. - -let SchedModel = CycloneModel in { - -//--- -// 7.8.1. Moves -//--- - -// A single nop micro-op (uX). -def WriteX : SchedWriteRes<[]> { let Latency = 0; } - -// Move zero is a register rename (to machine register zero). -// The move is replaced by a single nop micro-op. -// MOVZ Rd, #0 -// AND Rd, Rzr, #imm -def WriteZPred : SchedPredicate<[{TII->isGPRZero(MI)}]>; -def WriteImmZ : SchedWriteVariant<[ - SchedVar<WriteZPred, [WriteX]>, - SchedVar<NoSchedPred, [WriteImm]>]>; -def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>; - -// Move GPR is a register rename and single nop micro-op. -// ORR Xd, XZR, Xm -// ADD Xd, Xn, #0 -def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(MI)}]>; -def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(MI)}]>; -def WriteMov : SchedWriteVariant<[ - SchedVar<WriteIMovPred, [WriteX]>, - SchedVar<WriteVMovPred, [WriteX]>, - SchedVar<NoSchedPred, [WriteI]>]>; -def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>; - -// Move non-zero immediate is an integer ALU op. -// MOVN,MOVZ,MOVK -def : WriteRes<WriteImm, [CyUnitI]>; - -//--- -// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional, -// Shifts and Bitfield Operations -//--- - -// ADR,ADRP -// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri -// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr -// ADC(S),SBC(S) -// Aliases: CMN, CMP, TST -// -// Conditional operations. -// CCMNi,CCMPi,CCMNr,CCMPr, -// CSEL,CSINC,CSINV,CSNEG -// -// Bit counting and reversal operations. -// CLS,CLZ,RBIT,REV,REV16,REV32 -def : WriteRes<WriteI, [CyUnitI]>; - -// ADD with shifted register operand is a single micro-op that -// consumes a shift pipeline for two cycles. -// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs -// EXAMPLE: ADDrs Xn, Xm LSL #imm -def : WriteRes<WriteISReg, [CyUnitIS]> { - let Latency = 2; - let ResourceCycles = [2]; -} - -// ADD with extended register operand is the same as shifted reg operand. -// ADD(S)re,SUB(S)re -// EXAMPLE: ADDXre Xn, Xm, UXTB #1 -def : WriteRes<WriteIEReg, [CyUnitIS]> { - let Latency = 2; - let ResourceCycles = [2]; -} - -// Variable shift and bitfield operations. -// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM -def : WriteRes<WriteIS, [CyUnitIS]>; - -// EXTR Shifts a pair of registers and requires two micro-ops. -// The second micro-op is delayed, as modeled by ReadExtrHi. -// EXTR Xn, Xm, #imm -def : WriteRes<WriteExtr, [CyUnitIS, CyUnitIS]> { - let Latency = 2; - let NumMicroOps = 2; -} - -// EXTR's first register read is delayed by one cycle, effectively -// shortening its writer's latency. -// EXTR Xn, Xm, #imm -def : ReadAdvance<ReadExtrHi, 1>; - -//--- -// 7.8.6. Multiplies -//--- - -// MUL/MNEG are aliases for MADD/MSUB. -// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL -def : WriteRes<WriteIM32, [CyUnitIM]> { - let Latency = 4; -} -// MADDX,MSUBX,SMULH,UMULH -def : WriteRes<WriteIM64, [CyUnitIM]> { - let Latency = 5; -} - -//--- -// 7.8.7. Divide -//--- - -// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient. -// The ID pipe is consumed for 2 cycles: issue and writeback. -// SDIVW,UDIVW -def : WriteRes<WriteID32, [CyUnitID, CyUnitIntDiv]> { - let Latency = 10; - let ResourceCycles = [2, 10]; -} -// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient. -// The ID pipe is consumed for 2 cycles: issue and writeback. -// SDIVX,UDIVX -def : WriteRes<WriteID64, [CyUnitID, CyUnitIntDiv]> { - let Latency = 13; - let ResourceCycles = [2, 13]; -} - -//--- -// 7.8.8,7.8.10. Load/Store, single element -//--- - -// Integer loads take 4 cycles and use one LS unit for one cycle. -def : WriteRes<WriteLD, [CyUnitLS]> { - let Latency = 4; -} - -// Store-load forwarding is 4 cycles. -// -// Note: The store-exclusive sequence incorporates this -// latency. However, general heuristics should not model the -// dependence between a store and subsequent may-alias load because -// hardware speculation works. -def : WriteRes<WriteST, [CyUnitLS]> { - let Latency = 4; -} - -// Load from base address plus an optionally scaled register offset. -// Rt latency is latency WriteIS + WriteLD. -// EXAMPLE: LDR Xn, Xm [, lsl 3] -def CyWriteLDIdx : SchedWriteVariant<[ - SchedVar<ScaledIdxPred, [WriteIS, WriteLD]>, // Load from scaled register. - SchedVar<NoSchedPred, [WriteLD]>]>; // Load from register offset. -def : SchedAlias<WriteLDIdx, CyWriteLDIdx>; // Map ARM64->Cyclone type. - -// EXAMPLE: STR Xn, Xm [, lsl 3] -def CyWriteSTIdx : SchedWriteVariant<[ - SchedVar<ScaledIdxPred, [WriteIS, WriteST]>, // Store to scaled register. - SchedVar<NoSchedPred, [WriteST]>]>; // Store to register offset. -def : SchedAlias<WriteSTIdx, CyWriteSTIdx>; // Map ARM64->Cyclone type. - -// Read the (unshifted) base register Xn in the second micro-op one cycle later. -// EXAMPLE: LDR Xn, Xm [, lsl 3] -def ReadBaseRS : SchedReadAdvance<1>; -def CyReadAdrBase : SchedReadVariant<[ - SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset. - SchedVar<NoSchedPred, [ReadDefault]>]>; // Read base reg with no shift. -def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map ARM64->Cyclone type. - -//--- -// 7.8.9,7.8.11. Load/Store, paired -//--- - -// Address pre/post increment is a simple ALU op with one cycle latency. -def : WriteRes<WriteAdr, [CyUnitI]>; - -// LDP high register write is fused with the load, but a nop micro-op remains. -def : WriteRes<WriteLDHi, []> { - let Latency = 4; -} - -// STP is a vector op and store, except for QQ, which is just two stores. -def : SchedAlias<WriteSTP, WriteVSTShuffle>; -def : InstRW<[WriteST, WriteST], (instrs STPQi)>; - -//--- -// 7.8.13. Branches -//--- - -// Branches take a single micro-op. -// The misprediction penalty is defined as a SchedMachineModel property. -def : WriteRes<WriteBr, [CyUnitB]> {let Latency = 0;} -def : WriteRes<WriteBrReg, [CyUnitBR]> {let Latency = 0;} - -//--- -// 7.8.14. Never-issued Instructions, Barrier and Hint Operations -//--- - -// NOP,SEV,SEVL,WFE,WFI,YIELD -def : WriteRes<WriteHint, []> {let Latency = 0;} -// ISB -def : InstRW<[WriteI], (instrs ISB)>; -// SLREX,DMB,DSB -def : WriteRes<WriteBarrier, [CyUnitLS]>; - -// System instructions get an invalid latency because the latency of -// other operations across them is meaningless. -def : WriteRes<WriteSys, []> {let Latency = -1;} - -//===----------------------------------------------------------------------===// -// 7.9 Vector Unit Instructions - -// Simple vector operations take 2 cycles. -def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;} - -// Define some longer latency vector op types for Cyclone. -def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;} -def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;} -def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;} -def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;} - -// Simple floating-point operations take 2 cycles. -def : WriteRes<WriteF, [CyUnitV]> {let Latency = 2;} - -//--- -// 7.9.1 Vector Moves -//--- - -// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently -// generates expensive int-float conversion instead: -// FMOVDi Dd, #0.0 -// FMOVv2f64ns Vd.2d, #0.0 - -// FMOVSi,FMOVDi -def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;} - -// MOVI,MVNI are WriteV -// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV - -// Move FPR is a register rename and single nop micro-op. -// ORR.16b Vd,Vn,Vn -// COPY is handled above in the WriteMov Variant. -def WriteVMov : SchedWriteVariant<[ - SchedVar<WriteVMovPred, [WriteX]>, - SchedVar<NoSchedPred, [WriteV]>]>; -def : InstRW<[WriteVMov], (instrs ORRv16i8)>; - -// FMOVSr,FMOVDr are WriteF. - -// MOV V,V is a WriteV. - -// CPY D,V[x] is a WriteV - -// INS V[x],V[y] is a WriteV. - -// FMOVWSr,FMOVXDr,FMOVXDHighr -def : SchedAlias<WriteFCopy, WriteVLD>; - -// FMOVSWr,FMOVDXr -def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>; - -// INS V[x],R -def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>; -def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>; - -// SMOV,UMOV R,V[x] -def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>; -def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>; - -// DUP V,R -def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>; - -// DUP V,V[x] is a WriteV. - -//--- -// 7.9.2 Integer Arithmetic, Logical, and Comparisons -//--- - -// BIC,ORR V,#imm are WriteV - -def : InstRW<[CyWriteV3], (instregex "ABSv")>; - -// MVN,NEG,NOT are WriteV - -def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>; - -// ADDP is a WriteV. -def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;} -def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>; - -def : InstRW<[CyWriteV3], - (instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>; - -def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>; - -// ADD,SUB are WriteV - -// Forward declare. -def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;} - -// Add/Diff and accumulate uses the vector multiply unit. -def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} -def CyReadVAccum : SchedReadAdvance<1, - [CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>; - -def : InstRW<[CyWriteVAccum, CyReadVAccum], - (instregex "SADALP","UADALP")>; - -def : InstRW<[CyWriteVAccum, CyReadVAccum], - (instregex "SABAv","UABAv","SABALv","UABALv")>; - -def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>; - -def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>; - -def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>; - -// WriteV includes: -// AND,BIC,CMTST,EOR,ORN,ORR -// ADDP -// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD -// SADDL,SSUBL,UADDL,USUBL -// SADDW,SSUBW,UADDW,USUBW - -def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv", - "CMLEv","CMLTv", - "CMHIv","CMHSv")>; - -def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv", - "SMAXPv","SMINPv","UMAXPv","UMINPv")>; - -def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv", - "SABDLv","UABDLv")>; - -//--- -// 7.9.3 Floating Point Arithmetic and Comparisons -//--- - -// FABS,FNEG are WriteF - -def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>; -def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>; - -def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i", - "FMINPv2i","FMINNMPv2i")>; - -def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>; - -def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32, - FSUBSrr,FSUBv2f32,FSUBv4f32, - FADDPv2f32,FADDPv4f32, - FABD32,FABDv2f32,FABDv4f32)>; -def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64, - FSUBDrr,FSUBv2f64, - FADDPv2f64, - FABD64,FABDv2f64)>; - -def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>; - -def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT", - "FMAXS","FMAXD","FMAXv", - "FMINS","FMIND","FMINv", - "FMAXNMS","FMAXNMD","FMAXNMv", - "FMINNMS","FMINNMD","FMINNMv", - "FMAXPv2f","FMAXPv4f", - "FMINPv2f","FMINPv4f", - "FMAXNMPv2f","FMAXNMPv4f", - "FMINNMPv2f","FMINNMPv4f")>; - -// FCMP,FCMPE,FCCMP,FCCMPE -def : WriteRes<WriteFCmp, [CyUnitVC]> {let Latency = 4;} - -// FCSEL is a WriteF. - -//--- -// 7.9.4 Shifts and Bitfield Operations -//--- - -// SHL is a WriteV - -def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;} -def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>; - -def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;} -def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>; - -// Shift and accumulate uses the vector multiply unit. -def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} -def CyReadVShiftAcc : SchedReadAdvance<1, - [CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>; -def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc], - (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; - -// SSHL,USHL are WriteV. - -def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>; - -// SQSHL,SQSHLU,UQSHL are WriteV. - -def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>; - -// WriteV includes: -// SHLL,SSHLL,USHLL -// SLI,SRI -// BIF,BIT,BSL -// EXT -// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN -// XTN2 - -def : InstRW<[CyWriteV4], - (instregex "RSHRNv","SHRNv", - "SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv", - "UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; - -//--- -// 7.9.5 Multiplication -//--- - -def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;} -def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv", - "SQDMULLv","SQDMULHv","SQRDMULHv")>; - -// FMUL,FMULX,FNMUL default to WriteFMul. -def : WriteRes<WriteFMul, [CyUnitVM]> { let Latency = 4;} - -def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;} -def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed, - FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>; - -def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>; -def : InstRW<[CyWriteVMul, CyReadVMulAcc], - (instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL", - "SQDMLAL","SQDMLSL")>; - -def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;} -def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;} -def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>; -def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>; - -def : InstRW<[CyWriteSMul, CyReadSMul], - (instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr, - FMLAv2f32,FMLAv4f32, - FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>; -def : InstRW<[CyWriteDMul, CyReadDMul], - (instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr, - FMLAv2f64,FMLAv2i64_indexed, - FMLSv2f64,FMLSv2i64_indexed)>; - -def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; } -def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>; - -//--- -// 7.9.6 Divide and Square Root -//--- - -// FDIV,FSQRT -// TODO: Add 64-bit variant with 19 cycle latency. -// TODO: Specialize FSQRT for longer latency. -def : WriteRes<WriteFDiv, [CyUnitVD, CyUnitFloatDiv]> { - let Latency = 17; - let ResourceCycles = [2, 17]; -} - -def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>; - -def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; } -def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>; - -def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; } -def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; } -def : InstRW<[WriteFRECPS], (instregex "FRECPSv")>; -def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>; - -//--- -// 7.9.7 Integer-FP Conversions -//--- - -// FCVT lengthen f16/s32 -def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>; - -// FCVT,FCVTN,FCVTXN -// SCVTF,UCVTF V,V -// FRINT(AIMNPXZ) V,V -def : WriteRes<WriteFCvt, [CyUnitV]> {let Latency = 4;} - -// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles. -def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>; -def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>; - -// FCVT Rd, S/D = V6+LD4: 10 cycles -def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>; -def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>; - -// FCVTL is a WriteV - -//--- -// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup -//--- - -def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;} -def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr, - AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr, - SHA1SU0rrr)>; - -def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;} -def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>; - -def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;} -def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr, - SHA256Hrrr,SHA256H2rrr)>; - -// TRN,UZP,ZUP are WriteV. - -// TBL,TBX are WriteV. - -//--- -// 7.9.11-7.9.14 Load/Store, single element and paired -//--- - -// Loading into the vector unit takes 5 cycles vs 4 for integer loads. -def : WriteRes<WriteVLD, [CyUnitLS]> { - let Latency = 5; -} - -// Store-load forwarding is 4 cycles. -def : WriteRes<WriteVST, [CyUnitLS]> { - let Latency = 4; -} - -// WriteVLDPair/VSTPair sequences are expanded by the target description. - -//--- -// 7.9.15 Load, element operations -//--- - -// Only the first WriteVLD and WriteAdr for writeback matches def operands. -// Subsequent WriteVLDs consume resources. Since all loaded values have the -// same latency, this is acceptable. - -// Vd is read 5 cycles after issuing the vector load. -def : ReadAdvance<ReadVLD, 5>; - -def : InstRW<[WriteVLD], - (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr], - (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; - -// Register writes from the load's high half are fused micro-ops. -def : InstRW<[WriteVLD], - (instregex "LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, WriteAdr], - (instregex "LD1Twov(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVLD, WriteVLD], - (instregex "LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD], - (instregex "LD1Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLD, WriteVLD], - (instregex "LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD], - (instregex "LD1Threev(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVLD, WriteVLD, WriteVLD], - (instregex "LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD], - (instregex "LD1Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLD, WriteVLD], - (instregex "LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD], - (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD], - (instregex "LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD], - (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD], - (instregex "LD1i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr], - (instregex "LD1i(8|16|32)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD], (instrs LD1i64)>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>; - -def : InstRW<[WriteVLDShuffle], - (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr], - (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[WriteVLDShuffle, WriteV], - (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], - (instregex "LD2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle], - (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle], - (instregex "LD2Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], - (instregex "LD2i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], - (instregex "LD2i(8|16|32)_POST")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], - (instregex "LD2i64$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], - (instregex "LD2i64_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteV], - (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], - (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], - (instregex "LD3Threev(8b|4h|2s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], - (instregex "LD3Threev(8b|4h|2s)_POST")>; -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle], - (instregex "LD3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle], - (instregex "LD3Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV], - (instregex "LD3i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV], - (instregex "LD3i(8|16|32)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV], - (instregex "LD3i64$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], - (instregex "LD3i64_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteV, WriteV], - (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV], - (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], - (instrs LD3Rv1d,LD3Rv2d)>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], - (instrs LD3Rv2d_POST,LD3Rv2d_POST)>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], - (instregex "LD4Fourv(8b|4h|2s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], - (instregex "LD4Fourv(8b|4h|2s)_POST")>; -def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle, - WriteVLDPairShuffle, WriteVLDPairShuffle], - (instregex "LD4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle, - WriteVLDPairShuffle, WriteVLDPairShuffle], - (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV], - (instregex "LD4i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV], - (instregex "LD4i(8|16|32)_POST")>; - - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV], - (instrs LD4i64)>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], - (instrs LD4i64_POST)>; - -def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV], - (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV], - (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], - (instrs LD4Rv1d,LD4Rv2d)>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], - (instrs LD4Rv1d_POST,LD4Rv2d_POST)>; - -//--- -// 7.9.16 Store, element operations -//--- - -// Only the WriteAdr for writeback matches a def operands. -// Subsequent WriteVLDs only consume resources. - -def : InstRW<[WriteVST], - (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST], - (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], - (instregex "ST1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], - (instregex "ST1Twov(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVST, WriteVST], - (instregex "ST1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST, WriteVST], - (instregex "ST1Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle, WriteVST], - (instregex "ST1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST], - (instregex "ST1Threev(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVST, WriteVST, WriteVST], - (instregex "ST1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST], - (instregex "ST1Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], - (instregex "ST1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST], - (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instregex "ST1i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instrs ST1i64)>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>; - -def : InstRW<[WriteVSTShuffle], - (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], - (instregex "ST2Twov(8b|4h|2s)_POST")>; -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST2Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instregex "ST2i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>; -def : InstRW<[WriteVSTShuffle], (instrs ST2i64)>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>; - -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(8b|4h|2s)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(8b|4h|2s)_POST")>; -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instregex "ST3i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>; - -def :InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64)>; -def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>; - -def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle, - WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle, - WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTPairShuffle], (instregex "ST4i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>; - -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>; - -} // SchedModel = CycloneModel diff --git a/lib/Target/ARM64/ARM64Schedule.td b/lib/Target/ARM64/ARM64Schedule.td deleted file mode 100644 index 52f9262..0000000 --- a/lib/Target/ARM64/ARM64Schedule.td +++ /dev/null @@ -1,92 +0,0 @@ -//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -// Define TII for use in SchedVariant Predicates. -// const MachineInstr *MI and const TargetSchedModel *SchedModel -// are defined by default. -def : PredicateProlog<[{ - const ARM64InstrInfo *TII = - static_cast<const ARM64InstrInfo*>(SchedModel->getInstrInfo()); - (void)TII; -}]>; - -// ARM64 Scheduler Definitions - -def WriteImm : SchedWrite; // MOVN, MOVZ -// TODO: Provide variants for MOV32/64imm Pseudos that dynamically -// select the correct sequence of WriteImms. - -def WriteI : SchedWrite; // ALU -def WriteISReg : SchedWrite; // ALU of Shifted-Reg -def WriteIEReg : SchedWrite; // ALU of Extended-Reg -def WriteExtr : SchedWrite; // EXTR shifts a reg pair -def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair -def WriteIS : SchedWrite; // Shift/Scale -def WriteID32 : SchedWrite; // 32-bit Divide -def WriteID64 : SchedWrite; // 64-bit Divide -def WriteIM32 : SchedWrite; // 32-bit Multiply -def WriteIM64 : SchedWrite; // 64-bit Multiply -def WriteBr : SchedWrite; // Branch -def WriteBrReg : SchedWrite; // Indirect Branch - -def WriteLD : SchedWrite; // Load from base addr plus immediate offset -def WriteST : SchedWrite; // Store to base addr plus immediate offset -def WriteSTP : SchedWrite; // Store a register pair. -def WriteAdr : SchedWrite; // Address pre/post increment. - -def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled). -def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled). -def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST. - -// ScaledIdxPred is true if a WriteLDIdx operand will be -// scaled. Subtargets can use this to dynamically select resources and -// latency for WriteLDIdx and ReadAdrBase. -def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(MI)}]>; - -// Serialized two-level address load. -// EXAMPLE: LOADGot -def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>; - -// Serialized two-level address lookup. -// EXAMPLE: MOVaddr... -def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>; - -// The second register of a load-pair. -// LDP,LDPSW,LDNP,LDXP,LDAXP -def WriteLDHi : SchedWrite; - -// Store-exclusive is a store followed by a dependent load. -def WriteSTX : WriteSequence<[WriteST, WriteLD]>; - -def WriteSys : SchedWrite; // Long, variable latency system ops. -def WriteBarrier : SchedWrite; // Memory barrier. -def WriteHint : SchedWrite; // Hint instruction. - -def WriteF : SchedWrite; // General floating-point ops. -def WriteFCmp : SchedWrite; // Floating-point compare. -def WriteFCvt : SchedWrite; // Float conversion. -def WriteFCopy : SchedWrite; // Float-int register copy. -def WriteFImm : SchedWrite; // Floating-point immediate. -def WriteFMul : SchedWrite; // Floating-point multiply. -def WriteFDiv : SchedWrite; // Floating-point division. - -def WriteV : SchedWrite; // Vector ops. -def WriteVLD : SchedWrite; // Vector loads. -def WriteVST : SchedWrite; // Vector stores. - -// Read the unwritten lanes of the VLD's destination registers. -def ReadVLD : SchedRead; - -// Sequential vector load and shuffle. -def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>; -def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>; - -// Store a shuffled vector. -def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>; -def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>; diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp b/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp deleted file mode 100644 index 79d507f..0000000 --- a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp +++ /dev/null @@ -1,57 +0,0 @@ -//===-- ARM64SelectionDAGInfo.cpp - ARM64 SelectionDAG Info ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64SelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-selectiondag-info" -#include "ARM64TargetMachine.h" -using namespace llvm; - -ARM64SelectionDAGInfo::ARM64SelectionDAGInfo(const TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget<ARM64Subtarget>()) {} - -ARM64SelectionDAGInfo::~ARM64SelectionDAGInfo() {} - -SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset( - SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo) const { - // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); - ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size); - const char *bzeroEntry = - (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : 0; - // For small size (< 256), it is not beneficial to use bzero - // instead of memset. - if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { - const ARM64TargetLowering &TLI = *static_cast<const ARM64TargetLowering *>( - DAG.getTarget().getTargetLowering()); - - EVT IntPtr = TLI.getPointerTy(); - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - Entry.Node = Size; - Args.push_back(Entry); - TargetLowering::CallLoweringInfo CLI( - Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, - 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - return CallResult.second; - } - return SDValue(); -} diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.h b/lib/Target/ARM64/ARM64SelectionDAGInfo.h deleted file mode 100644 index 770775f..0000000 --- a/lib/Target/ARM64/ARM64SelectionDAGInfo.h +++ /dev/null @@ -1,37 +0,0 @@ -//===-- ARM64SelectionDAGInfo.h - ARM64 SelectionDAG Info -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the ARM64 subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64SELECTIONDAGINFO_H -#define ARM64SELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class ARM64SelectionDAGInfo : public TargetSelectionDAGInfo { - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can - /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; - -public: - explicit ARM64SelectionDAGInfo(const TargetMachine &TM); - ~ARM64SelectionDAGInfo(); - - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, - SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo) const override; -}; -} - -#endif diff --git a/lib/Target/ARM64/ARM64StorePairSuppress.cpp b/lib/Target/ARM64/ARM64StorePairSuppress.cpp deleted file mode 100644 index 6521d13..0000000 --- a/lib/Target/ARM64/ARM64StorePairSuppress.cpp +++ /dev/null @@ -1,167 +0,0 @@ -//===---- ARM64StorePairSuppress.cpp --- Suppress store pair formation ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass identifies floating point stores that should not be combined into -// store pairs. Later we may do the same for floating point loads. -// ===---------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-stp-suppress" -#include "ARM64InstrInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineTraceMetrics.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { -class ARM64StorePairSuppress : public MachineFunctionPass { - const ARM64InstrInfo *TII; - const TargetRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - MachineFunction *MF; - TargetSchedModel SchedModel; - MachineTraceMetrics *Traces; - MachineTraceMetrics::Ensemble *MinInstr; - -public: - static char ID; - ARM64StorePairSuppress() : MachineFunctionPass(ID) {} - - virtual const char *getPassName() const override { - return "ARM64 Store Pair Suppression"; - } - - bool runOnMachineFunction(MachineFunction &F) override; - -private: - bool shouldAddSTPToBlock(const MachineBasicBlock *BB); - - bool isNarrowFPStore(const MachineInstr &MI); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<MachineTraceMetrics>(); - AU.addPreserved<MachineTraceMetrics>(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; -char ARM64StorePairSuppress::ID = 0; -} // anonymous - -FunctionPass *llvm::createARM64StorePairSuppressPass() { - return new ARM64StorePairSuppress(); -} - -/// Return true if an STP can be added to this block without increasing the -/// critical resource height. STP is good to form in Ld/St limited blocks and -/// bad to form in float-point limited blocks. This is true independent of the -/// critical path. If the critical path is longer than the resource height, the -/// extra vector ops can limit physreg renaming. Otherwise, it could simply -/// oversaturate the vector units. -bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { - if (!MinInstr) - MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); - - MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); - unsigned ResLength = BBTrace.getResourceLength(); - - // Get the machine model's scheduling class for STPQi. - // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. - unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass(); - const MCSchedClassDesc *SCDesc = - SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); - - // If a subtarget does not define resources for STPQi, bail here. - if (SCDesc->isValid() && !SCDesc->isVariant()) { - unsigned ResLenWithSTP = BBTrace.getResourceLength( - ArrayRef<const MachineBasicBlock *>(), SCDesc); - if (ResLenWithSTP > ResLength) { - DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() - << " resources " << ResLength << " -> " << ResLenWithSTP - << "\n"); - return false; - } - } - return true; -} - -/// Return true if this is a floating-point store smaller than the V reg. On -/// cyclone, these require a vector shuffle before storing a pair. -/// Ideally we would call getMatchingPairOpcode() and have the machine model -/// tell us if it's profitable with no cpu knowledge here. -/// -/// FIXME: We plan to develop a decent Target abstraction for simple loads and -/// stores. Until then use a nasty switch similar to ARM64LoadStoreOptimizer. -bool ARM64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - return false; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STURSi: - case ARM64::STURDi: - return true; - } -} - -bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) { - MF = &mf; - TII = static_cast<const ARM64InstrInfo *>(MF->getTarget().getInstrInfo()); - TRI = MF->getTarget().getRegisterInfo(); - MRI = &MF->getRegInfo(); - const TargetSubtargetInfo &ST = - MF->getTarget().getSubtarget<TargetSubtargetInfo>(); - SchedModel.init(*ST.getSchedModel(), &ST, TII); - - Traces = &getAnalysis<MachineTraceMetrics>(); - MinInstr = 0; - - DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n'); - - if (!SchedModel.hasInstrSchedModel()) { - DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); - return false; - } - - // Check for a sequence of stores to the same base address. We don't need to - // precisely determine whether a store pair can be formed. But we do want to - // filter out most situations where we can't form store pairs to avoid - // computing trace metrics in those cases. - for (auto &MBB: *MF) { - bool SuppressSTP = false; - unsigned PrevBaseReg = 0; - for (auto &MI: MBB) { - if (!isNarrowFPStore(MI)) - continue; - unsigned BaseReg; - unsigned Offset; - if (TII->getLdStBaseRegImmOfs(&MI, BaseReg, Offset, TRI)) { - if (PrevBaseReg == BaseReg) { - // If this block can take STPs, skip ahead to the next block. - if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) - break; - // Otherwise, continue unpairing the stores in this block. - DEBUG(dbgs() << "Unpairing store " << MI << "\n"); - SuppressSTP = true; - TII->suppressLdStPair(&MI); - } - PrevBaseReg = BaseReg; - } else - PrevBaseReg = 0; - } - } - // This pass just sets some internal MachineMemOperand flags. It can't really - // invalidate anything. - return false; -} diff --git a/lib/Target/ARM64/ARM64Subtarget.cpp b/lib/Target/ARM64/ARM64Subtarget.cpp deleted file mode 100644 index 14b5444..0000000 --- a/lib/Target/ARM64/ARM64Subtarget.cpp +++ /dev/null @@ -1,100 +0,0 @@ -//===-- ARM64Subtarget.cpp - ARM64 Subtarget Information --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64 specific subclass of TargetSubtarget. -// -//===----------------------------------------------------------------------===// - -#include "ARM64InstrInfo.h" -#include "ARM64Subtarget.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineScheduler.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_SUBTARGETINFO_CTOR -#define GET_SUBTARGETINFO_TARGET_DESC -#include "ARM64GenSubtargetInfo.inc" - -using namespace llvm; - -ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS) - : ARM64GenSubtargetInfo(TT, CPU, FS), HasZeroCycleRegMove(false), - HasZeroCycleZeroing(false), CPUString(CPU), TargetTriple(TT) { - // Determine default and user-specified characteristics - - if (CPUString.empty()) - // We default to Cyclone for now. - CPUString = "cyclone"; - - ParseSubtargetFeatures(CPUString, FS); -} - -/// ClassifyGlobalReference - Find the target operand flags that describe -/// how a global value should be referenced for the current subtarget. -unsigned char -ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, - const TargetMachine &TM) const { - - // Determine whether this is a reference to a definition or a declaration. - // Materializable GVs (in JIT lazy compilation mode) do not require an extra - // load from stub. - bool isDecl = GV->hasAvailableExternallyLinkage(); - if (GV->isDeclaration() && !GV->isMaterializable()) - isDecl = true; - - // MachO large model always goes via a GOT, simply to get a single 8-byte - // absolute relocation on all global addresses. - if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) - return ARM64II::MO_GOT; - - // The small code mode's direct accesses use ADRP, which cannot necessarily - // produce the value 0 (if the code is above 4GB). Therefore they must use the - // GOT. - if (TM.getCodeModel() == CodeModel::Small && GV->isWeakForLinker() && isDecl) - return ARM64II::MO_GOT; - - // If symbol visibility is hidden, the extra load is not needed if - // the symbol is definitely defined in the current translation unit. - - // The handling of non-hidden symbols in PIC mode is rather target-dependent: - // + On MachO, if the symbol is defined in this module the GOT can be - // skipped. - // + On ELF, the R_AARCH64_COPY relocation means that even symbols actually - // defined could end up in unexpected places. Use a GOT. - if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) { - if (isTargetMachO()) - return (isDecl || GV->isWeakForLinker()) ? ARM64II::MO_GOT - : ARM64II::MO_NO_FLAG; - else - return ARM64II::MO_GOT; - } - - return ARM64II::MO_NO_FLAG; -} - -/// This function returns the name of a function which has an interface -/// like the non-standard bzero function, if such a function exists on -/// the current subtarget and it is considered prefereable over -/// memset with zero passed as the second argument. Otherwise it -/// returns null. -const char *ARM64Subtarget::getBZeroEntry() const { - // At the moment, always prefer bzero. - return "bzero"; -} - -void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, - MachineInstr *begin, MachineInstr *end, - unsigned NumRegionInstrs) const { - // LNT run (at least on Cyclone) showed reasonably significant gains for - // bi-directional scheduling. 253.perlbmk. - Policy.OnlyTopDown = false; - Policy.OnlyBottomUp = false; -} diff --git a/lib/Target/ARM64/ARM64Subtarget.h b/lib/Target/ARM64/ARM64Subtarget.h deleted file mode 100644 index 1cbd79e..0000000 --- a/lib/Target/ARM64/ARM64Subtarget.h +++ /dev/null @@ -1,87 +0,0 @@ -//=====---- ARM64Subtarget.h - Define Subtarget for the ARM64 -*- C++ -*--====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the ARM64 specific subclass of TargetSubtarget. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64SUBTARGET_H -#define ARM64SUBTARGET_H - -#include "llvm/Target/TargetSubtargetInfo.h" -#include "ARM64RegisterInfo.h" -#include <string> - -#define GET_SUBTARGETINFO_HEADER -#include "ARM64GenSubtargetInfo.inc" - -namespace llvm { -class GlobalValue; -class StringRef; - -class ARM64Subtarget : public ARM64GenSubtargetInfo { -protected: - // HasZeroCycleRegMove - Has zero-cycle register mov instructions. - bool HasZeroCycleRegMove; - - // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. - bool HasZeroCycleZeroing; - - /// CPUString - String name of used CPU. - std::string CPUString; - - /// TargetTriple - What processor and OS we're targeting. - Triple TargetTriple; - -public: - /// This constructor initializes the data members to match that - /// of the specified triple. - ARM64Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); - - bool enableMachineScheduler() const override { return true; } - - bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } - - bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } - - bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } - - bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } - - bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } - - bool isCyclone() const { return CPUString == "cyclone"; } - - /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size - /// that still makes it profitable to inline the call. - unsigned getMaxInlineSizeThreshold() const { return 64; } - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - - /// ClassifyGlobalReference - Find the target operand flags that describe - /// how a global value should be referenced for the current subtarget. - unsigned char ClassifyGlobalReference(const GlobalValue *GV, - const TargetMachine &TM) const; - - /// This function returns the name of a function which has an interface - /// like the non-standard bzero function, if such a function exists on - /// the current subtarget and it is considered prefereable over - /// memset with zero passed as the second argument. Otherwise it - /// returns null. - const char *getBZeroEntry() const; - - void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, - MachineInstr *end, unsigned NumRegionInstrs) const; -}; -} // End llvm namespace - -#endif // ARM64SUBTARGET_H diff --git a/lib/Target/ARM64/ARM64TargetMachine.cpp b/lib/Target/ARM64/ARM64TargetMachine.cpp deleted file mode 100644 index 101dc25..0000000 --- a/lib/Target/ARM64/ARM64TargetMachine.cpp +++ /dev/null @@ -1,157 +0,0 @@ -//===-- ARM64TargetMachine.cpp - Define TargetMachine for ARM64 -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#include "ARM64.h" -#include "ARM64TargetMachine.h" -#include "llvm/PassManager.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Transforms/Scalar.h" -using namespace llvm; - -static cl::opt<bool> EnableCCMP("arm64-ccmp", - cl::desc("Enable the CCMP formation pass"), - cl::init(true)); - -static cl::opt<bool> EnableStPairSuppress("arm64-stp-suppress", cl::Hidden, - cl::desc("Suppress STP for ARM64"), - cl::init(true)); - -static cl::opt<bool> -EnablePromoteConstant("arm64-promote-const", cl::Hidden, - cl::desc("Enable the promote constant pass"), - cl::init(true)); - -static cl::opt<bool> -EnableCollectLOH("arm64-collect-loh", cl::Hidden, - cl::desc("Enable the pass that emits the linker" - " optimization hints (LOH)"), - cl::init(true)); - -extern "C" void LLVMInitializeARM64Target() { - // Register the target. - RegisterTargetMachine<ARM64TargetMachine> X(TheARM64Target); -} - -/// TargetMachine ctor - Create an ARM64 architecture model. -/// -ARM64TargetMachine::ARM64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), - DL(Subtarget.isTargetMachO() ? "e-m:o-i64:64-i128:128-n32:64-S128" - : "e-m:e-i64:64-i128:128-n32:64-S128"), - InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget), - TSInfo(*this) { - initAsmInfo(); -} - -namespace { -/// ARM64 Code Generator Pass Configuration Options. -class ARM64PassConfig : public TargetPassConfig { -public: - ARM64PassConfig(ARM64TargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} - - ARM64TargetMachine &getARM64TargetMachine() const { - return getTM<ARM64TargetMachine>(); - } - - virtual bool addPreISel(); - virtual bool addInstSelector(); - virtual bool addILPOpts(); - virtual bool addPreRegAlloc(); - virtual bool addPostRegAlloc(); - virtual bool addPreSched2(); - virtual bool addPreEmitPass(); -}; -} // namespace - -void ARM64TargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our ARM64 pass. This - // allows the ARM64 pass to delegate to the target independent layer when - // appropriate. - PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createARM64TargetTransformInfoPass(this)); -} - -TargetPassConfig *ARM64TargetMachine::createPassConfig(PassManagerBase &PM) { - return new ARM64PassConfig(this, PM); -} - -// Pass Pipeline Configuration -bool ARM64PassConfig::addPreISel() { - // Run promote constant before global merge, so that the promoted constants - // get a chance to be merged - if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) - addPass(createARM64PromoteConstantPass()); - if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createGlobalMergePass(TM)); - if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createARM64AddressTypePromotionPass()); - return false; -} - -bool ARM64PassConfig::addInstSelector() { - addPass(createARM64ISelDag(getARM64TargetMachine(), getOptLevel())); - - // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many - // references to _TLS_MODULE_BASE_ as possible. - if (TM->getSubtarget<ARM64Subtarget>().isTargetELF() && - getOptLevel() != CodeGenOpt::None) - addPass(createARM64CleanupLocalDynamicTLSPass()); - - return false; -} - -bool ARM64PassConfig::addILPOpts() { - if (EnableCCMP) - addPass(createARM64ConditionalCompares()); - addPass(&EarlyIfConverterID); - if (EnableStPairSuppress) - addPass(createARM64StorePairSuppressPass()); - return true; -} - -bool ARM64PassConfig::addPreRegAlloc() { - // Use AdvSIMD scalar instructions whenever profitable. - addPass(createARM64AdvSIMDScalar()); - return true; -} - -bool ARM64PassConfig::addPostRegAlloc() { - // Change dead register definitions to refer to the zero register. - addPass(createARM64DeadRegisterDefinitions()); - return true; -} - -bool ARM64PassConfig::addPreSched2() { - // Expand some pseudo instructions to allow proper scheduling. - addPass(createARM64ExpandPseudoPass()); - // Use load/store pair instructions when possible. - addPass(createARM64LoadStoreOptimizationPass()); - return true; -} - -bool ARM64PassConfig::addPreEmitPass() { - // Relax conditional branch instructions if they're otherwise out of - // range of their destination. - addPass(createARM64BranchRelaxation()); - if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH) - addPass(createARM64CollectLOHPass()); - return true; -} diff --git a/lib/Target/ARM64/ARM64TargetMachine.h b/lib/Target/ARM64/ARM64TargetMachine.h deleted file mode 100644 index 8274550..0000000 --- a/lib/Target/ARM64/ARM64TargetMachine.h +++ /dev/null @@ -1,69 +0,0 @@ -//===-- ARM64TargetMachine.h - Define TargetMachine for ARM64 ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the ARM64 specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64TARGETMACHINE_H -#define ARM64TARGETMACHINE_H - -#include "ARM64InstrInfo.h" -#include "ARM64ISelLowering.h" -#include "ARM64Subtarget.h" -#include "ARM64FrameLowering.h" -#include "ARM64SelectionDAGInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/MC/MCStreamer.h" - -namespace llvm { - -class ARM64TargetMachine : public LLVMTargetMachine { -protected: - ARM64Subtarget Subtarget; - -private: - const DataLayout DL; - ARM64InstrInfo InstrInfo; - ARM64TargetLowering TLInfo; - ARM64FrameLowering FrameLowering; - ARM64SelectionDAGInfo TSInfo; - -public: - ARM64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Reloc::Model RM, - CodeModel::Model CM, CodeGenOpt::Level OL); - - const ARM64Subtarget *getSubtargetImpl() const override { return &Subtarget; } - const ARM64TargetLowering *getTargetLowering() const override { - return &TLInfo; - } - const DataLayout *getDataLayout() const override { return &DL; } - const ARM64FrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - const ARM64InstrInfo *getInstrInfo() const override { return &InstrInfo; } - const ARM64RegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - const ARM64SelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - // Pass Pipeline Configuration - TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - - /// \brief Register ARM64 analysis passes with a pass manager. - void addAnalysisPasses(PassManagerBase &PM) override; -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.cpp b/lib/Target/ARM64/ARM64TargetObjectFile.cpp deleted file mode 100644 index cde01e5..0000000 --- a/lib/Target/ARM64/ARM64TargetObjectFile.cpp +++ /dev/null @@ -1,52 +0,0 @@ -//===-- ARM64TargetObjectFile.cpp - ARM64 Object Info ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "ARM64TargetObjectFile.h" -#include "ARM64TargetMachine.h" -#include "llvm/IR/Mangler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Dwarf.h" -using namespace llvm; -using namespace dwarf; - -void ARM64_ELFTargetObjectFile::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - TargetLoweringObjectFileELF::Initialize(Ctx, TM); - InitializeELF(TM.Options.UseInitArray); -} - -const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference( - const GlobalValue *GV, unsigned Encoding, Mangler &Mang, - const TargetMachine &TM, MachineModuleInfo *MMI, - MCStreamer &Streamer) const { - // On Darwin, we can reference dwarf symbols with foo@GOT-., which - // is an indirect pc-relative reference. The default implementation - // won't reference using the GOT, so we need this target-specific - // version. - if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { - const MCSymbol *Sym = TM.getSymbol(GV, Mang); - const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); - MCSymbol *PCSym = getContext().CreateTempSymbol(); - Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); - return MCBinaryExpr::CreateSub(Res, PC, getContext()); - } - - return TargetLoweringObjectFileMachO::getTTypeGlobalReference( - GV, Encoding, Mang, TM, MMI, Streamer); -} - -MCSymbol *ARM64_MachoTargetObjectFile::getCFIPersonalitySymbol( - const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, - MachineModuleInfo *MMI) const { - return TM.getSymbol(GV, Mang); -} diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.h b/lib/Target/ARM64/ARM64TargetObjectFile.h deleted file mode 100644 index 62446f9..0000000 --- a/lib/Target/ARM64/ARM64TargetObjectFile.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- ARM64TargetObjectFile.h - ARM64 Object Info -*- C++ -------------*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ARM64_TARGETOBJECTFILE_H -#define LLVM_TARGET_ARM64_TARGETOBJECTFILE_H - -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Target/TargetLoweringObjectFile.h" - -namespace llvm { -class ARM64TargetMachine; - -/// This implementation is used for AArch64 ELF targets (Linux in particular). -class ARM64_ELFTargetObjectFile : public TargetLoweringObjectFileELF { - void Initialize(MCContext &Ctx, const TargetMachine &TM) override; -}; - -/// ARM64_MachoTargetObjectFile - This TLOF implementation is used for Darwin. -class ARM64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { -public: - const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, - unsigned Encoding, Mangler &Mang, - const TargetMachine &TM, - MachineModuleInfo *MMI, - MCStreamer &Streamer) const override; - - MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang, - const TargetMachine &TM, - MachineModuleInfo *MMI) const override; -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp deleted file mode 100644 index 9b598d7..0000000 --- a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp +++ /dev/null @@ -1,326 +0,0 @@ -//===-- ARM64TargetTransformInfo.cpp - ARM64 specific TTI pass ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements a TargetTransformInfo analysis pass specific to the -/// ARM64 target machine. It uses the target's detailed information to provide -/// more precise answers to certain TTI queries, while letting the target -/// independent and default TTI implementations handle the rest. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64tti" -#include "ARM64.h" -#include "ARM64TargetMachine.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/CostTable.h" -#include "llvm/Target/TargetLowering.h" -using namespace llvm; - -// Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the -// pass constructor initialization. -namespace llvm { -void initializeARM64TTIPass(PassRegistry &); -} - -namespace { - -class ARM64TTI final : public ImmutablePass, public TargetTransformInfo { - const ARM64TargetMachine *TM; - const ARM64Subtarget *ST; - const ARM64TargetLowering *TLI; - - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - -public: - ARM64TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - ARM64TTI(const ARM64TargetMachine *TM) - : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), - TLI(TM->getTargetLowering()) { - initializeARM64TTIPass(*PassRegistry::getPassRegistry()); - } - - void initializePass() override { pushTTIStack(this); } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo *)this; - return this; - } - - /// \name Scalar TTI Implementations - /// @{ - - unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; - PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; - - /// @} - - /// \name Vector TTI Implementations - /// @{ - - unsigned getNumberOfRegisters(bool Vector) const override { - if (Vector) - return 32; - - return 31; - } - - unsigned getRegisterBitWidth(bool Vector) const override { - if (Vector) - return 128; - - return 64; - } - - unsigned getMaximumUnrollFactor() const override { return 2; } - - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const - override; - - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const - override; - - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Opd1Info = OK_AnyValue, - OperandValueKind Opd2Info = OK_AnyValue) const - override; - - unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override; - - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const - override; - - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const override; - /// @} -}; - -} // end anonymous namespace - -INITIALIZE_AG_PASS(ARM64TTI, TargetTransformInfo, "arm64tti", - "ARM64 Target Transform Info", true, true, false) -char ARM64TTI::ID = 0; - -ImmutablePass * -llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) { - return new ARM64TTI(TM); -} - -unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { - assert(Ty->isIntegerTy()); - - unsigned BitSize = Ty->getPrimitiveSizeInBits(); - if (BitSize == 0) - return ~0U; - - int64_t Val = Imm.getSExtValue(); - if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize)) - return 1; - - if ((int64_t)Val < 0) - Val = ~Val; - if (BitSize == 32) - Val &= (1LL << 32) - 1; - - unsigned LZ = countLeadingZeros((uint64_t)Val); - unsigned Shift = (63 - LZ) / 16; - // MOVZ is free so return true for one or fewer MOVK. - return (Shift == 0) ? 1 : Shift; -} - -ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const { - assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - if (TyWidth == 32 || TyWidth == 64) - return PSK_FastHardware; - // TODO: ARM64TargetLowering::LowerCTPOP() supports 128bit popcount. - return PSK_Software; -} - -unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - EVT SrcTy = TLI->getValueType(Src); - EVT DstTy = TLI->getValueType(Dst); - - if (!SrcTy.isSimple() || !DstTy.isSimple()) - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); - - static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = { - // LowerVectorINT_TO_FP: - { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, - { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, - // LowerVectorFP_TO_INT - { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 }, - }; - - int Idx = ConvertCostTableLookup<MVT>( - ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(), - SrcTy.getSimpleVT()); - if (Idx != -1) - return ConversionTbl[Idx].Cost; - - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); -} - -unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { - assert(Val->isVectorTy() && "This must be a vector type"); - - if (Index != -1U) { - // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val); - - // This type is legalized to a scalar type. - if (!LT.second.isVector()) - return 0; - - // The type may be split. Normalize the index to the new type. - unsigned Width = LT.second.getVectorNumElements(); - Index = Index % Width; - - // The element at index zero is already inside the vector. - if (Index == 0) - return 0; - } - - // All other insert/extracts cost this much. - return 2; -} - -unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Opd1Info, - OperandValueKind Opd2Info) const { - // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); - - int ISD = TLI->InstructionOpcodeToISD(Opcode); - - switch (ISD) { - default: - return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info, - Opd2Info); - case ISD::ADD: - case ISD::MUL: - case ISD::XOR: - case ISD::OR: - case ISD::AND: - // These nodes are marked as 'custom' for combining purposes only. - // We know that they are legal. See LowerAdd in ISelLowering. - return 1 * LT.first; - } -} - -unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { - // Address computations in vectorized code with non-consecutive addresses will - // likely result in more instructions compared to scalar code where the - // computation can more often be merged into the index mode. The resulting - // extra micro-ops can significantly decrease throughput. - unsigned NumVectorInstToHideOverhead = 10; - - if (Ty->isVectorTy() && IsComplex) - return NumVectorInstToHideOverhead; - - // In many cases the address computation is not merged into the instruction - // addressing mode. - return 1; -} - -unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { - - int ISD = TLI->InstructionOpcodeToISD(Opcode); - // We don't lower vector selects well that are wider than the register width. - if (ValTy->isVectorTy() && ISD == ISD::SELECT) { - // We would need this many instructions to hide the scalarization happening. - unsigned AmortizationCost = 20; - static const TypeConversionCostTblEntry<MVT::SimpleValueType> - VectorSelectTbl[] = { - { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost }, - { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost }, - { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost }, - { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost }, - { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost }, - { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } - }; - - EVT SelCondTy = TLI->getValueType(CondTy); - EVT SelValTy = TLI->getValueType(ValTy); - if (SelCondTy.isSimple() && SelValTy.isSimple()) { - int Idx = - ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(), - SelValTy.getSimpleVT()); - if (Idx != -1) - return VectorSelectTbl[Idx].Cost; - } - } - return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); -} - -unsigned ARM64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); - - if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 && - Src->getVectorElementType()->isIntegerTy(64)) { - // Unaligned stores are extremely inefficient. We don't split - // unaligned v2i64 stores because the negative impact that has shown in - // practice on inlined memcpy code. - // We make v2i64 stores expensive so that we will only vectorize if there - // are 6 other instructions getting vectorized. - unsigned AmortizationCost = 6; - - return LT.first * 2 * AmortizationCost; - } - - if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) && - Src->getVectorNumElements() < 8) { - // We scalarize the loads/stores because there is not v.4b register and we - // have to promote the elements to v.4h. - unsigned NumVecElts = Src->getVectorNumElements(); - unsigned NumVectorizableInstsToAmortize = NumVecElts * 2; - // We generate 2 instructions per vector element. - return NumVectorizableInstsToAmortize * NumVecElts * 2; - } - - return LT.first; -} diff --git a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp deleted file mode 100644 index 38a61d8..0000000 --- a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp +++ /dev/null @@ -1,4832 +0,0 @@ -//===-- ARM64AsmParser.cpp - Parse ARM64 assembly to MCInst instructions --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include <cstdio> -using namespace llvm; - -namespace { - -class ARM64Operand; - -class ARM64AsmParser : public MCTargetAsmParser { -public: - typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector; - -private: - StringRef Mnemonic; ///< Instruction mnemonic. - MCSubtargetInfo &STI; - MCAsmParser &Parser; - - MCAsmParser &getParser() const { return Parser; } - MCAsmLexer &getLexer() const { return Parser.getLexer(); } - - SMLoc getLoc() const { return Parser.getTok().getLoc(); } - - bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands); - unsigned parseCondCodeString(StringRef Cond); - bool parseCondCode(OperandVector &Operands, bool invertCondCode); - int tryParseRegister(); - int tryMatchVectorRegister(StringRef &Kind); - bool parseOptionalShift(OperandVector &Operands); - bool parseOptionalExtend(OperandVector &Operands); - bool parseRegister(OperandVector &Operands); - bool parseMemory(OperandVector &Operands); - bool parseSymbolicImmVal(const MCExpr *&ImmVal); - bool parseVectorList(OperandVector &Operands); - bool parseOperand(OperandVector &Operands, bool isCondCode, - bool invertCondCode); - - void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } - bool showMatchError(SMLoc Loc, unsigned ErrCode); - - bool parseDirectiveWord(unsigned Size, SMLoc L); - bool parseDirectiveTLSDescCall(SMLoc L); - - bool parseDirectiveLOH(StringRef LOH, SMLoc L); - - bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc); - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, MCStreamer &Out, - unsigned &ErrorInfo, bool MatchingInlineAsm); -/// @name Auto-generated Match Functions -/// { - -#define GET_ASSEMBLER_HEADER -#include "ARM64GenAsmMatcher.inc" - - /// } - - OperandMatchResultTy tryParseNoIndexMemory(OperandVector &Operands); - OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands); - OperandMatchResultTy tryParseSystemRegister(OperandVector &Operands); - OperandMatchResultTy tryParseCPSRField(OperandVector &Operands); - OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands); - OperandMatchResultTy tryParsePrefetch(OperandVector &Operands); - OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands); - OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands); - OperandMatchResultTy tryParseFPImm(OperandVector &Operands); - bool tryParseVectorRegister(OperandVector &Operands); - -public: - enum ARM64MatchResultTy { - Match_InvalidSuffix = FIRST_TARGET_MATCH_RESULT_TY, -#define GET_OPERAND_DIAGNOSTIC_TYPES -#include "ARM64GenAsmMatcher.inc" - }; - ARM64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { - MCAsmParserExtension::Initialize(_Parser); - } - - virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, OperandVector &Operands); - virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - virtual bool ParseDirective(AsmToken DirectiveID); - unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); - - static bool classifySymbolRef(const MCExpr *Expr, - ARM64MCExpr::VariantKind &ELFRefKind, - MCSymbolRefExpr::VariantKind &DarwinRefKind, - const MCConstantExpr *&Addend); -}; -} // end anonymous namespace - -namespace { - -/// ARM64Operand - Instances of this class represent a parsed ARM64 machine -/// instruction. -class ARM64Operand : public MCParsedAsmOperand { -public: - enum MemIdxKindTy { - ImmediateOffset, // pre-indexed, no writeback - RegisterOffset // register offset, with optional extend - }; - -private: - enum KindTy { - k_Immediate, - k_Memory, - k_Register, - k_VectorList, - k_VectorIndex, - k_Token, - k_SysCR, - k_Prefetch, - k_Shifter, - k_Extend, - k_FPImm, - k_Barrier, - k_SystemRegister, - k_CPSRField - } Kind; - - SMLoc StartLoc, EndLoc, OffsetLoc; - - struct TokOp { - const char *Data; - unsigned Length; - bool IsSuffix; // Is the operand actually a suffix on the mnemonic. - }; - - struct RegOp { - unsigned RegNum; - bool isVector; - }; - - struct VectorListOp { - unsigned RegNum; - unsigned Count; - unsigned NumElements; - unsigned ElementKind; - }; - - struct VectorIndexOp { - unsigned Val; - }; - - struct ImmOp { - const MCExpr *Val; - }; - - struct FPImmOp { - unsigned Val; // Encoded 8-bit representation. - }; - - struct BarrierOp { - unsigned Val; // Not the enum since not all values have names. - }; - - struct SystemRegisterOp { - // 16-bit immediate, usually from the ARM64SYS::SystermRegister enum, - // but not limited to those values. - uint16_t Val; - }; - - struct CPSRFieldOp { - ARM64SYS::CPSRField Field; - }; - - struct SysCRImmOp { - unsigned Val; - }; - - struct PrefetchOp { - unsigned Val; - }; - - struct ShifterOp { - unsigned Val; - }; - - struct ExtendOp { - unsigned Val; - }; - - // This is for all forms of ARM64 address expressions - struct MemOp { - unsigned BaseRegNum, OffsetRegNum; - ARM64_AM::ExtendType ExtType; - unsigned ShiftVal; - bool ExplicitShift; - const MCExpr *OffsetImm; - MemIdxKindTy Mode; - }; - - union { - struct TokOp Tok; - struct RegOp Reg; - struct VectorListOp VectorList; - struct VectorIndexOp VectorIndex; - struct ImmOp Imm; - struct FPImmOp FPImm; - struct BarrierOp Barrier; - struct SystemRegisterOp SystemRegister; - struct CPSRFieldOp CPSRField; - struct SysCRImmOp SysCRImm; - struct PrefetchOp Prefetch; - struct ShifterOp Shifter; - struct ExtendOp Extend; - struct MemOp Mem; - }; - - // Keep the MCContext around as the MCExprs may need manipulated during - // the add<>Operands() calls. - MCContext &Ctx; - - ARM64Operand(KindTy K, MCContext &_Ctx) - : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {} - -public: - ARM64Operand(const ARM64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { - Kind = o.Kind; - StartLoc = o.StartLoc; - EndLoc = o.EndLoc; - switch (Kind) { - case k_Token: - Tok = o.Tok; - break; - case k_Immediate: - Imm = o.Imm; - break; - case k_FPImm: - FPImm = o.FPImm; - break; - case k_Barrier: - Barrier = o.Barrier; - break; - case k_SystemRegister: - SystemRegister = o.SystemRegister; - break; - case k_CPSRField: - CPSRField = o.CPSRField; - break; - case k_Register: - Reg = o.Reg; - break; - case k_VectorList: - VectorList = o.VectorList; - break; - case k_VectorIndex: - VectorIndex = o.VectorIndex; - break; - case k_SysCR: - SysCRImm = o.SysCRImm; - break; - case k_Prefetch: - Prefetch = o.Prefetch; - break; - case k_Memory: - Mem = o.Mem; - break; - case k_Shifter: - Shifter = o.Shifter; - break; - case k_Extend: - Extend = o.Extend; - break; - } - } - - /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { return StartLoc; } - /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { return EndLoc; } - /// getOffsetLoc - Get the location of the offset of this memory operand. - SMLoc getOffsetLoc() const { return OffsetLoc; } - - StringRef getToken() const { - assert(Kind == k_Token && "Invalid access!"); - return StringRef(Tok.Data, Tok.Length); - } - - bool isTokenSuffix() const { - assert(Kind == k_Token && "Invalid access!"); - return Tok.IsSuffix; - } - - const MCExpr *getImm() const { - assert(Kind == k_Immediate && "Invalid access!"); - return Imm.Val; - } - - unsigned getFPImm() const { - assert(Kind == k_FPImm && "Invalid access!"); - return FPImm.Val; - } - - unsigned getBarrier() const { - assert(Kind == k_Barrier && "Invalid access!"); - return Barrier.Val; - } - - uint16_t getSystemRegister() const { - assert(Kind == k_SystemRegister && "Invalid access!"); - return SystemRegister.Val; - } - - ARM64SYS::CPSRField getCPSRField() const { - assert(Kind == k_CPSRField && "Invalid access!"); - return CPSRField.Field; - } - - unsigned getReg() const { - assert(Kind == k_Register && "Invalid access!"); - return Reg.RegNum; - } - - unsigned getVectorListStart() const { - assert(Kind == k_VectorList && "Invalid access!"); - return VectorList.RegNum; - } - - unsigned getVectorListCount() const { - assert(Kind == k_VectorList && "Invalid access!"); - return VectorList.Count; - } - - unsigned getVectorIndex() const { - assert(Kind == k_VectorIndex && "Invalid access!"); - return VectorIndex.Val; - } - - unsigned getSysCR() const { - assert(Kind == k_SysCR && "Invalid access!"); - return SysCRImm.Val; - } - - unsigned getPrefetch() const { - assert(Kind == k_Prefetch && "Invalid access!"); - return Prefetch.Val; - } - - unsigned getShifter() const { - assert(Kind == k_Shifter && "Invalid access!"); - return Shifter.Val; - } - - unsigned getExtend() const { - assert(Kind == k_Extend && "Invalid access!"); - return Extend.Val; - } - - bool isImm() const { return Kind == k_Immediate; } - bool isSImm9() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -256 && Val < 256); - } - bool isSImm7s4() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -256 && Val <= 252 && (Val & 3) == 0); - } - bool isSImm7s8() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -512 && Val <= 504 && (Val & 7) == 0); - } - bool isSImm7s16() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -1024 && Val <= 1008 && (Val & 15) == 0); - } - bool isImm0_7() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 8); - } - bool isImm1_8() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val > 0 && Val < 9); - } - bool isImm0_15() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 16); - } - bool isImm1_16() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val > 0 && Val < 17); - } - bool isImm0_31() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 32); - } - bool isImm1_31() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 1 && Val < 32); - } - bool isImm1_32() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 1 && Val < 33); - } - bool isImm0_63() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 64); - } - bool isImm1_63() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 1 && Val < 64); - } - bool isImm1_64() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 1 && Val < 65); - } - bool isImm0_127() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 128); - } - bool isImm0_255() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 256); - } - bool isImm0_65535() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 65536); - } - bool isLogicalImm32() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - return ARM64_AM::isLogicalImmediate(MCE->getValue(), 32); - } - bool isLogicalImm64() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - return ARM64_AM::isLogicalImmediate(MCE->getValue(), 64); - } - bool isSIMDImmType10() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return false; - return ARM64_AM::isAdvSIMDModImmType10(MCE->getValue()); - } - bool isBranchTarget26() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return true; - int64_t Val = MCE->getValue(); - if (Val & 0x3) - return false; - return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2)); - } - bool isBranchTarget19() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return true; - int64_t Val = MCE->getValue(); - if (Val & 0x3) - return false; - return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2)); - } - bool isBranchTarget14() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) - return true; - int64_t Val = MCE->getValue(); - if (Val & 0x3) - return false; - return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2)); - } - - bool isMovWSymbol(ArrayRef<ARM64MCExpr::VariantKind> AllowedModifiers) const { - if (!isImm()) - return false; - - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (!ARM64AsmParser::classifySymbolRef(getImm(), ELFRefKind, DarwinRefKind, - Addend)) { - return false; - } - if (DarwinRefKind != MCSymbolRefExpr::VK_None) - return false; - - for (unsigned i = 0; i != AllowedModifiers.size(); ++i) { - if (ELFRefKind == AllowedModifiers[i]) - return Addend == 0; - } - - return false; - } - - bool isMovZSymbolG3() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G3 }; - return isMovWSymbol(Variants); - } - - bool isMovZSymbolG2() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2, - ARM64MCExpr::VK_TPREL_G2, - ARM64MCExpr::VK_DTPREL_G2 }; - return isMovWSymbol(Variants); - } - - bool isMovZSymbolG1() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G1, - ARM64MCExpr::VK_GOTTPREL_G1, - ARM64MCExpr::VK_TPREL_G1, - ARM64MCExpr::VK_DTPREL_G1, }; - return isMovWSymbol(Variants); - } - - bool isMovZSymbolG0() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G0, - ARM64MCExpr::VK_TPREL_G0, - ARM64MCExpr::VK_DTPREL_G0 }; - return isMovWSymbol(Variants); - } - - bool isMovKSymbolG2() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2_NC }; - return isMovWSymbol(Variants); - } - - bool isMovKSymbolG1() const { - static ARM64MCExpr::VariantKind Variants[] = { - ARM64MCExpr::VK_ABS_G1_NC, ARM64MCExpr::VK_TPREL_G1_NC, - ARM64MCExpr::VK_DTPREL_G1_NC - }; - return isMovWSymbol(Variants); - } - - bool isMovKSymbolG0() const { - static ARM64MCExpr::VariantKind Variants[] = { - ARM64MCExpr::VK_ABS_G0_NC, ARM64MCExpr::VK_GOTTPREL_G0_NC, - ARM64MCExpr::VK_TPREL_G0_NC, ARM64MCExpr::VK_DTPREL_G0_NC - }; - return isMovWSymbol(Variants); - } - - bool isFPImm() const { return Kind == k_FPImm; } - bool isBarrier() const { return Kind == k_Barrier; } - bool isSystemRegister() const { - if (Kind == k_SystemRegister) - return true; - // SPSel is legal for both the system register and the CPSR-field - // variants of MSR, so special case that. Fugly. - return (Kind == k_CPSRField && getCPSRField() == ARM64SYS::cpsr_SPSel); - } - bool isSystemCPSRField() const { return Kind == k_CPSRField; } - bool isReg() const { return Kind == k_Register && !Reg.isVector; } - bool isVectorReg() const { return Kind == k_Register && Reg.isVector; } - - /// Is this a vector list with the type implicit (presumably attached to the - /// instruction itself)? - template <unsigned NumRegs> bool isImplicitlyTypedVectorList() const { - return Kind == k_VectorList && VectorList.Count == NumRegs && - !VectorList.ElementKind; - } - - template <unsigned NumRegs, unsigned NumElements, char ElementKind> - bool isTypedVectorList() const { - if (Kind != k_VectorList) - return false; - if (VectorList.Count != NumRegs) - return false; - if (VectorList.ElementKind != ElementKind) - return false; - return VectorList.NumElements == NumElements; - } - - bool isVectorIndexB() const { - return Kind == k_VectorIndex && VectorIndex.Val < 16; - } - bool isVectorIndexH() const { - return Kind == k_VectorIndex && VectorIndex.Val < 8; - } - bool isVectorIndexS() const { - return Kind == k_VectorIndex && VectorIndex.Val < 4; - } - bool isVectorIndexD() const { - return Kind == k_VectorIndex && VectorIndex.Val < 2; - } - bool isToken() const { return Kind == k_Token; } - bool isTokenEqual(StringRef Str) const { - return Kind == k_Token && getToken() == Str; - } - bool isMem() const { return Kind == k_Memory; } - bool isSysCR() const { return Kind == k_SysCR; } - bool isPrefetch() const { return Kind == k_Prefetch; } - bool isShifter() const { return Kind == k_Shifter; } - bool isExtend() const { - // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand. - if (isShifter()) { - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val); - return ST == ARM64_AM::LSL; - } - return Kind == k_Extend; - } - bool isExtend64() const { - if (Kind != k_Extend) - return false; - // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class). - ARM64_AM::ExtendType ET = ARM64_AM::getArithExtendType(Extend.Val); - return ET != ARM64_AM::UXTX && ET != ARM64_AM::SXTX; - } - bool isExtendLSL64() const { - // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand. - if (isShifter()) { - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val); - return ST == ARM64_AM::LSL; - } - if (Kind != k_Extend) - return false; - ARM64_AM::ExtendType ET = ARM64_AM::getArithExtendType(Extend.Val); - return ET == ARM64_AM::UXTX || ET == ARM64_AM::SXTX; - } - - bool isArithmeticShifter() const { - if (!isShifter()) - return false; - - // An arithmetic shifter is LSL, LSR, or ASR. - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val); - return ST == ARM64_AM::LSL || ST == ARM64_AM::LSR || ST == ARM64_AM::ASR; - } - - bool isMovImm32Shifter() const { - if (!isShifter()) - return false; - - // A MOVi shifter is LSL of 0, 16, 32, or 48. - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val); - if (ST != ARM64_AM::LSL) - return false; - uint64_t Val = ARM64_AM::getShiftValue(Shifter.Val); - return (Val == 0 || Val == 16); - } - - bool isMovImm64Shifter() const { - if (!isShifter()) - return false; - - // A MOVi shifter is LSL of 0 or 16. - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val); - if (ST != ARM64_AM::LSL) - return false; - uint64_t Val = ARM64_AM::getShiftValue(Shifter.Val); - return (Val == 0 || Val == 16 || Val == 32 || Val == 48); - } - - bool isAddSubShifter() const { - if (!isShifter()) - return false; - - // An ADD/SUB shifter is either 'lsl #0' or 'lsl #12'. - unsigned Val = Shifter.Val; - return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL && - (ARM64_AM::getShiftValue(Val) == 0 || - ARM64_AM::getShiftValue(Val) == 12); - } - - bool isLogicalVecShifter() const { - if (!isShifter()) - return false; - - // A logical vector shifter is a left shift by 0, 8, 16, or 24. - unsigned Val = Shifter.Val; - unsigned Shift = ARM64_AM::getShiftValue(Val); - return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL && - (Shift == 0 || Shift == 8 || Shift == 16 || Shift == 24); - } - - bool isLogicalVecHalfWordShifter() const { - if (!isLogicalVecShifter()) - return false; - - // A logical vector shifter is a left shift by 0 or 8. - unsigned Val = Shifter.Val; - unsigned Shift = ARM64_AM::getShiftValue(Val); - return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL && - (Shift == 0 || Shift == 8); - } - - bool isMoveVecShifter() const { - if (!isShifter()) - return false; - - // A logical vector shifter is a left shift by 8 or 16. - unsigned Val = Shifter.Val; - unsigned Shift = ARM64_AM::getShiftValue(Val); - return ARM64_AM::getShiftType(Val) == ARM64_AM::MSL && - (Shift == 8 || Shift == 16); - } - - bool isMemoryRegisterOffset8() const { - return isMem() && Mem.Mode == RegisterOffset && Mem.ShiftVal == 0; - } - - bool isMemoryRegisterOffset16() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 1); - } - - bool isMemoryRegisterOffset32() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 2); - } - - bool isMemoryRegisterOffset64() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 3); - } - - bool isMemoryRegisterOffset128() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 4); - } - - bool isMemoryUnscaled() const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - // Make sure the immediate value is valid. - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm); - if (!CE) - return false; - // The offset must fit in a signed 9-bit unscaled immediate. - int64_t Value = CE->getValue(); - return (Value >= -256 && Value < 256); - } - // Fallback unscaled operands are for aliases of LDR/STR that fall back - // to LDUR/STUR when the offset is not legal for the former but is for - // the latter. As such, in addition to checking for being a legal unscaled - // address, also check that it is not a legal scaled address. This avoids - // ambiguity in the matcher. - bool isMemoryUnscaledFB8() const { - return isMemoryUnscaled() && !isMemoryIndexed8(); - } - bool isMemoryUnscaledFB16() const { - return isMemoryUnscaled() && !isMemoryIndexed16(); - } - bool isMemoryUnscaledFB32() const { - return isMemoryUnscaled() && !isMemoryIndexed32(); - } - bool isMemoryUnscaledFB64() const { - return isMemoryUnscaled() && !isMemoryIndexed64(); - } - bool isMemoryUnscaledFB128() const { - return isMemoryUnscaled() && !isMemoryIndexed128(); - } - bool isMemoryIndexed(unsigned Scale) const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - // Make sure the immediate value is valid. - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm); - - if (CE) { - // The offset must be a positive multiple of the scale and in range of - // encoding with a 12-bit immediate. - int64_t Value = CE->getValue(); - return (Value >= 0 && (Value % Scale) == 0 && Value <= (4095 * Scale)); - } - - // If it's not a constant, check for some expressions we know. - const MCExpr *Expr = Mem.OffsetImm; - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, - Addend)) { - // If we don't understand the expression, assume the best and - // let the fixup and relocation code deal with it. - return true; - } - - if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || - ELFRefKind == ARM64MCExpr::VK_LO12 || - ELFRefKind == ARM64MCExpr::VK_GOT_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_GOTTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) { - // Note that we don't range-check the addend. It's adjusted modulo page - // size when converted, so there is no "out of range" condition when using - // @pageoff. - int64_t Value = Addend ? Addend->getValue() : 0; - return Value >= 0 && (Value % Scale) == 0; - } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF || - DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) { - // @gotpageoff/@tlvppageoff can only be used directly, not with an addend. - return Addend == 0; - } - - return false; - } - bool isMemoryIndexed128() const { return isMemoryIndexed(16); } - bool isMemoryIndexed64() const { return isMemoryIndexed(8); } - bool isMemoryIndexed32() const { return isMemoryIndexed(4); } - bool isMemoryIndexed16() const { return isMemoryIndexed(2); } - bool isMemoryIndexed8() const { return isMemoryIndexed(1); } - bool isMemoryNoIndex() const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - - // Make sure the immediate value is valid. Only zero is allowed. - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm); - if (!CE || CE->getValue() != 0) - return false; - return true; - } - bool isMemorySIMDNoIndex() const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - return Mem.OffsetImm == 0; - } - bool isMemoryIndexedSImm9() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return Value >= -256 && Value <= 255; - } - bool isMemoryIndexed32SImm7() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return ((Value % 4) == 0) && Value >= -256 && Value <= 252; - } - bool isMemoryIndexed64SImm7() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return ((Value % 8) == 0) && Value >= -512 && Value <= 504; - } - bool isMemoryIndexed128SImm7() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return ((Value % 16) == 0) && Value >= -1024 && Value <= 1008; - } - - bool isAdrpLabel() const { - // Validation was handled during parsing, so we just sanity check that - // something didn't go haywire. - return isImm(); - } - - bool isAdrLabel() const { - // Validation was handled during parsing, so we just sanity check that - // something didn't go haywire. - return isImm(); - } - - void addExpr(MCInst &Inst, const MCExpr *Expr) const { - // Add as immediates when possible. Null MCExpr = 0. - if (Expr == 0) - Inst.addOperand(MCOperand::CreateImm(0)); - else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - else - Inst.addOperand(MCOperand::CreateExpr(Expr)); - } - - void addRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getReg())); - } - - void addVectorRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getReg())); - } - - template <unsigned NumRegs> - void addVectorList64Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - static unsigned FirstRegs[] = { ARM64::D0, ARM64::D0_D1, - ARM64::D0_D1_D2, ARM64::D0_D1_D2_D3 }; - unsigned FirstReg = FirstRegs[NumRegs - 1]; - - Inst.addOperand( - MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0)); - } - - template <unsigned NumRegs> - void addVectorList128Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - static unsigned FirstRegs[] = { ARM64::Q0, ARM64::Q0_Q1, - ARM64::Q0_Q1_Q2, ARM64::Q0_Q1_Q2_Q3 }; - unsigned FirstReg = FirstRegs[NumRegs - 1]; - - Inst.addOperand( - MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0)); - } - - void addVectorIndexBOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); - } - - void addVectorIndexHOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); - } - - void addVectorIndexSOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); - } - - void addVectorIndexDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); - } - - void addImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // If this is a pageoff symrefexpr with an addend, adjust the addend - // to be only the page-offset portion. Otherwise, just add the expr - // as-is. - addExpr(Inst, getImm()); - } - - void addAdrpLabelOperands(MCInst &Inst, unsigned N) const { - addImmOperands(Inst, N); - } - - void addAdrLabelOperands(MCInst &Inst, unsigned N) const { - addImmOperands(Inst, N); - } - - void addSImm9Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addSImm7s4Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4)); - } - - void addSImm7s8Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8)); - } - - void addSImm7s16Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16)); - } - - void addImm0_7Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_8Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_15Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_16Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_31Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_31Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_32Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_63Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_63Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_64Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_127Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_255Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_65535Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addLogicalImm32Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid logical immediate operand!"); - uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 32); - Inst.addOperand(MCOperand::CreateImm(encoding)); - } - - void addLogicalImm64Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid logical immediate operand!"); - uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 64); - Inst.addOperand(MCOperand::CreateImm(encoding)); - } - - void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - assert(MCE && "Invalid immediate operand!"); - uint64_t encoding = ARM64_AM::encodeAdvSIMDModImmType10(MCE->getValue()); - Inst.addOperand(MCOperand::CreateImm(encoding)); - } - - void addBranchTarget26Operands(MCInst &Inst, unsigned N) const { - // Branch operands don't encode the low bits, so shift them off - // here. If it's a label, however, just put it on directly as there's - // not enough information now to do anything. - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) { - addExpr(Inst, getImm()); - return; - } - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); - } - - void addBranchTarget19Operands(MCInst &Inst, unsigned N) const { - // Branch operands don't encode the low bits, so shift them off - // here. If it's a label, however, just put it on directly as there's - // not enough information now to do anything. - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) { - addExpr(Inst, getImm()); - return; - } - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); - } - - void addBranchTarget14Operands(MCInst &Inst, unsigned N) const { - // Branch operands don't encode the low bits, so shift them off - // here. If it's a label, however, just put it on directly as there's - // not enough information now to do anything. - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); - if (!MCE) { - addExpr(Inst, getImm()); - return; - } - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); - } - - void addFPImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getFPImm())); - } - - void addBarrierOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getBarrier())); - } - - void addSystemRegisterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - if (Kind == k_SystemRegister) - Inst.addOperand(MCOperand::CreateImm(getSystemRegister())); - else { - assert(Kind == k_CPSRField && getCPSRField() == ARM64SYS::cpsr_SPSel); - Inst.addOperand(MCOperand::CreateImm(ARM64SYS::SPSel)); - } - } - - void addSystemCPSRFieldOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getCPSRField())); - } - - void addSysCROperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getSysCR())); - } - - void addPrefetchOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getPrefetch())); - } - - void addShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addArithmeticShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addMovImm32ShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addMovImm64ShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addAddSubShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addLogicalVecShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addLogicalVecHalfWordShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addMoveVecShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addExtendOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand. - if (isShifter()) { - assert(ARM64_AM::getShiftType(getShifter()) == ARM64_AM::LSL); - unsigned imm = getArithExtendImm(ARM64_AM::UXTX, - ARM64_AM::getShiftValue(getShifter())); - Inst.addOperand(MCOperand::CreateImm(imm)); - } else - Inst.addOperand(MCOperand::CreateImm(getExtend())); - } - - void addExtend64Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getExtend())); - } - - void addExtendLSL64Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand. - if (isShifter()) { - assert(ARM64_AM::getShiftType(getShifter()) == ARM64_AM::LSL); - unsigned imm = getArithExtendImm(ARM64_AM::UXTX, - ARM64_AM::getShiftValue(getShifter())); - Inst.addOperand(MCOperand::CreateImm(imm)); - } else - Inst.addOperand(MCOperand::CreateImm(getExtend())); - } - - void addMemoryRegisterOffsetOperands(MCInst &Inst, unsigned N, bool DoShift) { - assert(N == 3 && "Invalid number of operands!"); - - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - Inst.addOperand(MCOperand::CreateReg(Mem.OffsetRegNum)); - unsigned ExtendImm = ARM64_AM::getMemExtendImm(Mem.ExtType, DoShift); - Inst.addOperand(MCOperand::CreateImm(ExtendImm)); - } - - void addMemoryRegisterOffset8Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ExplicitShift); - } - - void addMemoryRegisterOffset16Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 1); - } - - void addMemoryRegisterOffset32Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 2); - } - - void addMemoryRegisterOffset64Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 3); - } - - void addMemoryRegisterOffset128Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 4); - } - - void addMemoryIndexedOperands(MCInst &Inst, unsigned N, - unsigned Scale) const { - // Add the base register operand. - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - - if (!Mem.OffsetImm) { - // There isn't an offset. - Inst.addOperand(MCOperand::CreateImm(0)); - return; - } - - // Add the offset operand. - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm)) { - assert(CE->getValue() % Scale == 0 && - "Offset operand must be multiple of the scale!"); - - // The MCInst offset operand doesn't include the low bits (like the - // instruction encoding). - Inst.addOperand(MCOperand::CreateImm(CE->getValue() / Scale)); - } - - // If this is a pageoff symrefexpr with an addend, the linker will - // do the scaling of the addend. - // - // Otherwise we don't know what this is, so just add the scaling divide to - // the expression and let the MC fixup evaluation code deal with it. - const MCExpr *Expr = Mem.OffsetImm; - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (Scale > 1 && - (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, - Addend) || - (Addend != 0 && DarwinRefKind != MCSymbolRefExpr::VK_PAGEOFF))) { - Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(Scale, Ctx), - Ctx); - } - - Inst.addOperand(MCOperand::CreateExpr(Expr)); - } - - void addMemoryUnscaledOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryUnscaled() && "Invalid number of operands!"); - // Add the base register operand. - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - - // Add the offset operand. - if (!Mem.OffsetImm) - Inst.addOperand(MCOperand::CreateImm(0)); - else { - // Only constant offsets supported. - const MCConstantExpr *CE = cast<MCConstantExpr>(Mem.OffsetImm); - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - } - } - - void addMemoryIndexed128Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed128() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 16); - } - - void addMemoryIndexed64Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed64() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 8); - } - - void addMemoryIndexed32Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed32() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 4); - } - - void addMemoryIndexed16Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed16() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 2); - } - - void addMemoryIndexed8Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed8() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 1); - } - - void addMemoryNoIndexOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && isMemoryNoIndex() && "Invalid number of operands!"); - // Add the base register operand (the offset is always zero, so ignore it). - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - } - - void addMemorySIMDNoIndexOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && isMemorySIMDNoIndex() && "Invalid number of operands!"); - // Add the base register operand (the offset is always zero, so ignore it). - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - } - - void addMemoryWritebackIndexedOperands(MCInst &Inst, unsigned N, - unsigned Scale) const { - assert(N == 2 && "Invalid number of operands!"); - - // Add the base register operand. - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - - // Add the offset operand. - int64_t Offset = 0; - if (Mem.OffsetImm) { - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm); - assert(CE && "Non-constant indexed offset operand!"); - Offset = CE->getValue(); - } - - if (Scale != 1) { - assert(Offset % Scale == 0 && - "Offset operand must be a multiple of the scale!"); - Offset /= Scale; - } - - Inst.addOperand(MCOperand::CreateImm(Offset)); - } - - void addMemoryIndexedSImm9Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 1); - } - - void addMemoryIndexed32SImm7Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 4); - } - - void addMemoryIndexed64SImm7Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 8); - } - - void addMemoryIndexed128SImm7Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 16); - } - - virtual void print(raw_ostream &OS) const; - - static ARM64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Token, Ctx); - Op->Tok.Data = Str.data(); - Op->Tok.Length = Str.size(); - Op->Tok.IsSuffix = IsSuffix; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S, - SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Register, Ctx); - Op->Reg.RegNum = RegNum; - Op->Reg.isVector = isVector; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateVectorList(unsigned RegNum, unsigned Count, - unsigned NumElements, char ElementKind, - SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_VectorList, Ctx); - Op->VectorList.RegNum = RegNum; - Op->VectorList.Count = Count; - Op->VectorList.NumElements = NumElements; - Op->VectorList.ElementKind = ElementKind; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_VectorIndex, Ctx); - Op->VectorIndex.Val = Idx; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Immediate, Ctx); - Op->Imm.Val = Val; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_FPImm, Ctx); - Op->FPImm.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Barrier, Ctx); - Op->Barrier.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateSystemRegister(uint16_t Val, SMLoc S, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_SystemRegister, Ctx); - Op->SystemRegister.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateCPSRField(ARM64SYS::CPSRField Field, SMLoc S, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_CPSRField, Ctx); - Op->CPSRField.Field = Field; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateMem(unsigned BaseRegNum, const MCExpr *Off, - SMLoc S, SMLoc E, SMLoc OffsetLoc, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx); - Op->Mem.BaseRegNum = BaseRegNum; - Op->Mem.OffsetRegNum = 0; - Op->Mem.OffsetImm = Off; - Op->Mem.ExtType = ARM64_AM::UXTX; - Op->Mem.ShiftVal = 0; - Op->Mem.ExplicitShift = false; - Op->Mem.Mode = ImmediateOffset; - Op->OffsetLoc = OffsetLoc; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateRegOffsetMem(unsigned BaseReg, unsigned OffsetReg, - ARM64_AM::ExtendType ExtType, - unsigned ShiftVal, bool ExplicitShift, - SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx); - Op->Mem.BaseRegNum = BaseReg; - Op->Mem.OffsetRegNum = OffsetReg; - Op->Mem.OffsetImm = 0; - Op->Mem.ExtType = ExtType; - Op->Mem.ShiftVal = ShiftVal; - Op->Mem.ExplicitShift = ExplicitShift; - Op->Mem.Mode = RegisterOffset; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_SysCR, Ctx); - Op->SysCRImm.Val = Val; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Prefetch, Ctx); - Op->Prefetch.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateShifter(ARM64_AM::ShiftType ShOp, unsigned Val, - SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Shifter, Ctx); - Op->Shifter.Val = ARM64_AM::getShifterImm(ShOp, Val); - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateExtend(ARM64_AM::ExtendType ExtOp, unsigned Val, - SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Extend, Ctx); - Op->Extend.Val = ARM64_AM::getArithExtendImm(ExtOp, Val); - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } -}; - -} // end anonymous namespace. - -void ARM64Operand::print(raw_ostream &OS) const { - switch (Kind) { - case k_FPImm: - OS << "<fpimm " << getFPImm() << "(" << ARM64_AM::getFPImmFloat(getFPImm()) - << ") >"; - break; - case k_Barrier: { - const char *Name = - ARM64SYS::getBarrierOptName((ARM64SYS::BarrierOption)getBarrier()); - OS << "<barrier "; - if (Name) - OS << Name; - else - OS << getBarrier(); - OS << ">"; - break; - } - case k_SystemRegister: { - const char *Name = ARM64SYS::getSystemRegisterName( - (ARM64SYS::SystemRegister)getSystemRegister()); - OS << "<systemreg "; - if (Name) - OS << Name; - else - OS << "#" << getSystemRegister(); - OS << ">"; - break; - } - case k_CPSRField: { - const char *Name = ARM64SYS::getCPSRFieldName(getCPSRField()); - OS << "<cpsrfield " << Name << ">"; - break; - } - case k_Immediate: - getImm()->print(OS); - break; - case k_Memory: - OS << "<memory>"; - break; - case k_Register: - OS << "<register " << getReg() << ">"; - break; - case k_VectorList: { - OS << "<vectorlist "; - unsigned Reg = getVectorListStart(); - for (unsigned i = 0, e = getVectorListCount(); i != e; ++i) - OS << Reg + i << " "; - OS << ">"; - break; - } - case k_VectorIndex: - OS << "<vectorindex " << getVectorIndex() << ">"; - break; - case k_Token: - OS << "'" << getToken() << "'"; - break; - case k_SysCR: - OS << "c" << getSysCR(); - break; - case k_Prefetch: - OS << "<prfop "; - if (ARM64_AM::isNamedPrefetchOp(getPrefetch())) - OS << ARM64_AM::getPrefetchOpName((ARM64_AM::PrefetchOp)getPrefetch()); - else - OS << "#" << getPrefetch(); - OS << ">"; - break; - case k_Shifter: { - unsigned Val = getShifter(); - OS << "<" << ARM64_AM::getShiftName(ARM64_AM::getShiftType(Val)) << " #" - << ARM64_AM::getShiftValue(Val) << ">"; - break; - } - case k_Extend: { - unsigned Val = getExtend(); - OS << "<" << ARM64_AM::getExtendName(ARM64_AM::getArithExtendType(Val)) - << " #" << ARM64_AM::getArithShiftValue(Val) << ">"; - break; - } - } -} - -/// @name Auto-generated Match Functions -/// { - -static unsigned MatchRegisterName(StringRef Name); - -/// } - -static unsigned matchVectorRegName(StringRef Name) { - return StringSwitch<unsigned>(Name) - .Case("v0", ARM64::Q0) - .Case("v1", ARM64::Q1) - .Case("v2", ARM64::Q2) - .Case("v3", ARM64::Q3) - .Case("v4", ARM64::Q4) - .Case("v5", ARM64::Q5) - .Case("v6", ARM64::Q6) - .Case("v7", ARM64::Q7) - .Case("v8", ARM64::Q8) - .Case("v9", ARM64::Q9) - .Case("v10", ARM64::Q10) - .Case("v11", ARM64::Q11) - .Case("v12", ARM64::Q12) - .Case("v13", ARM64::Q13) - .Case("v14", ARM64::Q14) - .Case("v15", ARM64::Q15) - .Case("v16", ARM64::Q16) - .Case("v17", ARM64::Q17) - .Case("v18", ARM64::Q18) - .Case("v19", ARM64::Q19) - .Case("v20", ARM64::Q20) - .Case("v21", ARM64::Q21) - .Case("v22", ARM64::Q22) - .Case("v23", ARM64::Q23) - .Case("v24", ARM64::Q24) - .Case("v25", ARM64::Q25) - .Case("v26", ARM64::Q26) - .Case("v27", ARM64::Q27) - .Case("v28", ARM64::Q28) - .Case("v29", ARM64::Q29) - .Case("v30", ARM64::Q30) - .Case("v31", ARM64::Q31) - .Default(0); -} - -static bool isValidVectorKind(StringRef Name) { - return StringSwitch<bool>(Name.lower()) - .Case(".8b", true) - .Case(".16b", true) - .Case(".4h", true) - .Case(".8h", true) - .Case(".2s", true) - .Case(".4s", true) - .Case(".1d", true) - .Case(".2d", true) - .Case(".1q", true) - // Accept the width neutral ones, too, for verbose syntax. If those - // aren't used in the right places, the token operand won't match so - // all will work out. - .Case(".b", true) - .Case(".h", true) - .Case(".s", true) - .Case(".d", true) - .Default(false); -} - -static void parseValidVectorKind(StringRef Name, unsigned &NumElements, - char &ElementKind) { - assert(isValidVectorKind(Name)); - - ElementKind = Name.lower()[Name.size() - 1]; - NumElements = 0; - - if (Name.size() == 2) - return; - - // Parse the lane count - Name = Name.drop_front(); - while (isdigit(Name.front())) { - NumElements = 10 * NumElements + (Name.front() - '0'); - Name = Name.drop_front(); - } -} - -bool ARM64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) { - StartLoc = getLoc(); - RegNo = tryParseRegister(); - EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1); - return (RegNo == (unsigned)-1); -} - -/// tryParseRegister - Try to parse a register name. The token must be an -/// Identifier when called, and if it is a register name the token is eaten and -/// the register is added to the operand list. -int ARM64AsmParser::tryParseRegister() { - const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); - - std::string lowerCase = Tok.getString().lower(); - unsigned RegNum = MatchRegisterName(lowerCase); - // Also handle a few aliases of registers. - if (RegNum == 0) - RegNum = StringSwitch<unsigned>(lowerCase) - .Case("x29", ARM64::FP) - .Case("x30", ARM64::LR) - .Case("x31", ARM64::XZR) - .Case("w31", ARM64::WZR) - .Default(0); - - if (RegNum == 0) - return -1; - - Parser.Lex(); // Eat identifier token. - return RegNum; -} - -/// tryMatchVectorRegister - Try to parse a vector register name with optional -/// kind specifier. If it is a register specifier, eat the token and return it. -int ARM64AsmParser::tryMatchVectorRegister(StringRef &Kind) { - if (Parser.getTok().isNot(AsmToken::Identifier)) { - TokError("vector register expected"); - return -1; - } - - StringRef Name = Parser.getTok().getString(); - // If there is a kind specifier, it's separated from the register name by - // a '.'. - size_t Start = 0, Next = Name.find('.'); - StringRef Head = Name.slice(Start, Next); - unsigned RegNum = matchVectorRegName(Head); - if (RegNum) { - if (Next != StringRef::npos) { - Kind = Name.slice(Next, StringRef::npos); - if (!isValidVectorKind(Kind)) { - TokError("invalid vector kind qualifier"); - return -1; - } - } - Parser.Lex(); // Eat the register token. - return RegNum; - } - return -1; -} - -static int MatchSysCRName(StringRef Name) { - // Use the same layout as the tablegen'erated register name matcher. Ugly, - // but efficient. - switch (Name.size()) { - default: - break; - case 2: - if (Name[0] != 'c' && Name[0] != 'C') - return -1; - switch (Name[1]) { - default: - return -1; - case '0': - return 0; - case '1': - return 1; - case '2': - return 2; - case '3': - return 3; - case '4': - return 4; - case '5': - return 5; - case '6': - return 6; - case '7': - return 7; - case '8': - return 8; - case '9': - return 9; - } - break; - case 3: - if ((Name[0] != 'c' && Name[0] != 'C') || Name[1] != '1') - return -1; - switch (Name[2]) { - default: - return -1; - case '0': - return 10; - case '1': - return 11; - case '2': - return 12; - case '3': - return 13; - case '4': - return 14; - case '5': - return 15; - } - break; - } - - llvm_unreachable("Unhandled SysCR operand string!"); - return -1; -} - -/// tryParseSysCROperand - Try to parse a system instruction CR operand name. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseSysCROperand(OperandVector &Operands) { - SMLoc S = getLoc(); - const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - - int Num = MatchSysCRName(Tok.getString()); - if (Num == -1) - return MatchOperand_NoMatch; - - Parser.Lex(); // Eat identifier token. - Operands.push_back(ARM64Operand::CreateSysCR(Num, S, getLoc(), getContext())); - return MatchOperand_Success; -} - -/// tryParsePrefetch - Try to parse a prefetch operand. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) { - SMLoc S = getLoc(); - const AsmToken &Tok = Parser.getTok(); - // Either an identifier for named values or a 5-bit immediate. - if (Tok.is(AsmToken::Hash)) { - Parser.Lex(); // Eat hash token. - const MCExpr *ImmVal; - if (getParser().parseExpression(ImmVal)) - return MatchOperand_ParseFail; - - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) { - TokError("immediate value expected for prefetch operand"); - return MatchOperand_ParseFail; - } - unsigned prfop = MCE->getValue(); - if (prfop > 31) { - TokError("prefetch operand out of range, [0,31] expected"); - return MatchOperand_ParseFail; - } - - Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext())); - return MatchOperand_Success; - } - - if (Tok.isNot(AsmToken::Identifier)) { - TokError("pre-fetch hint expected"); - return MatchOperand_ParseFail; - } - - unsigned prfop = StringSwitch<unsigned>(Tok.getString()) - .Case("pldl1keep", ARM64_AM::PLDL1KEEP) - .Case("pldl1strm", ARM64_AM::PLDL1STRM) - .Case("pldl2keep", ARM64_AM::PLDL2KEEP) - .Case("pldl2strm", ARM64_AM::PLDL2STRM) - .Case("pldl3keep", ARM64_AM::PLDL3KEEP) - .Case("pldl3strm", ARM64_AM::PLDL3STRM) - .Case("pstl1keep", ARM64_AM::PSTL1KEEP) - .Case("pstl1strm", ARM64_AM::PSTL1STRM) - .Case("pstl2keep", ARM64_AM::PSTL2KEEP) - .Case("pstl2strm", ARM64_AM::PSTL2STRM) - .Case("pstl3keep", ARM64_AM::PSTL3KEEP) - .Case("pstl3strm", ARM64_AM::PSTL3STRM) - .Default(0xff); - if (prfop == 0xff) { - TokError("pre-fetch hint expected"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // Eat identifier token. - Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext())); - return MatchOperand_Success; -} - -/// tryParseAdrpLabel - Parse and validate a source label for the ADRP -/// instruction. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { - SMLoc S = getLoc(); - const MCExpr *Expr; - if (parseSymbolicImmVal(Expr)) - return MatchOperand_ParseFail; - - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) { - Error(S, "modified label reference + constant expected"); - return MatchOperand_ParseFail; - } - - if (DarwinRefKind == MCSymbolRefExpr::VK_None && - ELFRefKind == ARM64MCExpr::VK_INVALID) { - // No modifier was specified at all; this is the syntax for an ELF basic - // ADRP relocation (unfortunately). - Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_ABS_PAGE, getContext()); - } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE || - DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) && - Addend != 0) { - Error(S, "gotpage label reference not allowed an addend"); - return MatchOperand_ParseFail; - } else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE && - DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE && - DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE && - ELFRefKind != ARM64MCExpr::VK_GOT_PAGE && - ELFRefKind != ARM64MCExpr::VK_GOTTPREL_PAGE && - ELFRefKind != ARM64MCExpr::VK_TLSDESC_PAGE) { - // The operand must be an @page or @gotpage qualified symbolref. - Error(S, "page or gotpage label reference expected"); - return MatchOperand_ParseFail; - } - - // We have a label reference possibly with addend. The addend is a raw value - // here. The linker will adjust it to only reference the page. - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext())); - - return MatchOperand_Success; -} - -/// tryParseAdrLabel - Parse and validate a source label for the ADR -/// instruction. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseAdrLabel(OperandVector &Operands) { - SMLoc S = getLoc(); - const MCExpr *Expr; - if (getParser().parseExpression(Expr)) - return MatchOperand_ParseFail; - - // The operand must be an un-qualified assembler local symbolref. - // FIXME: wrong for ELF. - if (const MCSymbolRefExpr *SRE = dyn_cast<const MCSymbolRefExpr>(Expr)) { - // FIXME: Should reference the MachineAsmInfo to get the private prefix. - bool isTemporary = SRE->getSymbol().getName().startswith("L"); - if (!isTemporary || SRE->getKind() != MCSymbolRefExpr::VK_None) { - Error(S, "unqualified, assembler-local label name expected"); - return MatchOperand_ParseFail; - } - } - - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext())); - - return MatchOperand_Success; -} - -/// tryParseFPImm - A floating point immediate expression operand. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseFPImm(OperandVector &Operands) { - SMLoc S = getLoc(); - - if (Parser.getTok().isNot(AsmToken::Hash)) - return MatchOperand_NoMatch; - Parser.Lex(); // Eat the '#'. - - // Handle negation, as that still comes through as a separate token. - bool isNegative = false; - if (Parser.getTok().is(AsmToken::Minus)) { - isNegative = true; - Parser.Lex(); - } - const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Real)) { - APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); - uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; - int Val = ARM64_AM::getFP64Imm(APInt(64, IntVal)); - Parser.Lex(); // Eat the token. - // Check for out of range values. As an exception, we let Zero through, - // as we handle that special case in post-processing before matching in - // order to use the zero register for it. - if (Val == -1 && !RealVal.isZero()) { - TokError("floating point value out of range"); - return MatchOperand_ParseFail; - } - Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext())); - return MatchOperand_Success; - } - if (Tok.is(AsmToken::Integer)) { - int64_t Val; - if (!isNegative && Tok.getString().startswith("0x")) { - Val = Tok.getIntVal(); - if (Val > 255 || Val < 0) { - TokError("encoded floating point value out of range"); - return MatchOperand_ParseFail; - } - } else { - APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); - uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; - Val = ARM64_AM::getFP64Imm(APInt(64, IntVal)); - } - Parser.Lex(); // Eat the token. - Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext())); - return MatchOperand_Success; - } - - TokError("invalid floating point immediate"); - return MatchOperand_ParseFail; -} - -/// parseCondCodeString - Parse a Condition Code string. -unsigned ARM64AsmParser::parseCondCodeString(StringRef Cond) { - unsigned CC = StringSwitch<unsigned>(Cond) - .Case("eq", ARM64CC::EQ) - .Case("ne", ARM64CC::NE) - .Case("cs", ARM64CC::CS) - .Case("hs", ARM64CC::CS) - .Case("cc", ARM64CC::CC) - .Case("lo", ARM64CC::CC) - .Case("mi", ARM64CC::MI) - .Case("pl", ARM64CC::PL) - .Case("vs", ARM64CC::VS) - .Case("vc", ARM64CC::VC) - .Case("hi", ARM64CC::HI) - .Case("ls", ARM64CC::LS) - .Case("ge", ARM64CC::GE) - .Case("lt", ARM64CC::LT) - .Case("gt", ARM64CC::GT) - .Case("le", ARM64CC::LE) - .Case("al", ARM64CC::AL) - // Upper case works too. Not mixed case, though. - .Case("EQ", ARM64CC::EQ) - .Case("NE", ARM64CC::NE) - .Case("CS", ARM64CC::CS) - .Case("HS", ARM64CC::CS) - .Case("CC", ARM64CC::CC) - .Case("LO", ARM64CC::CC) - .Case("MI", ARM64CC::MI) - .Case("PL", ARM64CC::PL) - .Case("VS", ARM64CC::VS) - .Case("VC", ARM64CC::VC) - .Case("HI", ARM64CC::HI) - .Case("LS", ARM64CC::LS) - .Case("GE", ARM64CC::GE) - .Case("LT", ARM64CC::LT) - .Case("GT", ARM64CC::GT) - .Case("LE", ARM64CC::LE) - .Case("AL", ARM64CC::AL) - .Default(~0U); - return CC; -} - -/// parseCondCode - Parse a Condition Code operand. -bool ARM64AsmParser::parseCondCode(OperandVector &Operands, - bool invertCondCode) { - SMLoc S = getLoc(); - const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); - - StringRef Cond = Tok.getString(); - unsigned CC = parseCondCodeString(Cond); - if (CC == ~0U) - return TokError("invalid condition code"); - Parser.Lex(); // Eat identifier token. - - if (invertCondCode) - CC = ARM64CC::getInvertedCondCode(ARM64CC::CondCode(CC)); - - const MCExpr *CCExpr = MCConstantExpr::Create(CC, getContext()); - Operands.push_back( - ARM64Operand::CreateImm(CCExpr, S, getLoc(), getContext())); - return false; -} - -/// ParseOptionalShift - Some operands take an optional shift argument. Parse -/// them if present. -bool ARM64AsmParser::parseOptionalShift(OperandVector &Operands) { - const AsmToken &Tok = Parser.getTok(); - ARM64_AM::ShiftType ShOp = StringSwitch<ARM64_AM::ShiftType>(Tok.getString()) - .Case("lsl", ARM64_AM::LSL) - .Case("lsr", ARM64_AM::LSR) - .Case("asr", ARM64_AM::ASR) - .Case("ror", ARM64_AM::ROR) - .Case("msl", ARM64_AM::MSL) - .Case("LSL", ARM64_AM::LSL) - .Case("LSR", ARM64_AM::LSR) - .Case("ASR", ARM64_AM::ASR) - .Case("ROR", ARM64_AM::ROR) - .Case("MSL", ARM64_AM::MSL) - .Default(ARM64_AM::InvalidShift); - if (ShOp == ARM64_AM::InvalidShift) - return true; - - SMLoc S = Tok.getLoc(); - Parser.Lex(); - - // We expect a number here. - if (getLexer().isNot(AsmToken::Hash)) - return TokError("immediate value expected for shifter operand"); - Parser.Lex(); // Eat the '#'. - - SMLoc ExprLoc = getLoc(); - const MCExpr *ImmVal; - if (getParser().parseExpression(ImmVal)) - return true; - - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) - return TokError("immediate value expected for shifter operand"); - - if ((MCE->getValue() & 0x3f) != MCE->getValue()) - return Error(ExprLoc, "immediate value too large for shifter operand"); - - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back( - ARM64Operand::CreateShifter(ShOp, MCE->getValue(), S, E, getContext())); - return false; -} - -/// parseOptionalExtend - Some operands take an optional extend argument. Parse -/// them if present. -bool ARM64AsmParser::parseOptionalExtend(OperandVector &Operands) { - const AsmToken &Tok = Parser.getTok(); - ARM64_AM::ExtendType ExtOp = - StringSwitch<ARM64_AM::ExtendType>(Tok.getString()) - .Case("uxtb", ARM64_AM::UXTB) - .Case("uxth", ARM64_AM::UXTH) - .Case("uxtw", ARM64_AM::UXTW) - .Case("uxtx", ARM64_AM::UXTX) - .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX - .Case("sxtb", ARM64_AM::SXTB) - .Case("sxth", ARM64_AM::SXTH) - .Case("sxtw", ARM64_AM::SXTW) - .Case("sxtx", ARM64_AM::SXTX) - .Case("UXTB", ARM64_AM::UXTB) - .Case("UXTH", ARM64_AM::UXTH) - .Case("UXTW", ARM64_AM::UXTW) - .Case("UXTX", ARM64_AM::UXTX) - .Case("LSL", ARM64_AM::UXTX) // Alias for UXTX - .Case("SXTB", ARM64_AM::SXTB) - .Case("SXTH", ARM64_AM::SXTH) - .Case("SXTW", ARM64_AM::SXTW) - .Case("SXTX", ARM64_AM::SXTX) - .Default(ARM64_AM::InvalidExtend); - if (ExtOp == ARM64_AM::InvalidExtend) - return true; - - SMLoc S = Tok.getLoc(); - Parser.Lex(); - - if (getLexer().is(AsmToken::EndOfStatement) || - getLexer().is(AsmToken::Comma)) { - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back( - ARM64Operand::CreateExtend(ExtOp, 0, S, E, getContext())); - return false; - } - - if (getLexer().isNot(AsmToken::Hash)) { - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back( - ARM64Operand::CreateExtend(ExtOp, 0, S, E, getContext())); - return false; - } - - Parser.Lex(); // Eat the '#'. - - const MCExpr *ImmVal; - if (getParser().parseExpression(ImmVal)) - return true; - - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) - return TokError("immediate value expected for extend operand"); - - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back( - ARM64Operand::CreateExtend(ExtOp, MCE->getValue(), S, E, getContext())); - return false; -} - -/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for -/// the SYS instruction. Parse them specially so that we create a SYS MCInst. -bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, - OperandVector &Operands) { - if (Name.find('.') != StringRef::npos) - return TokError("invalid operand"); - - Mnemonic = Name; - Operands.push_back( - ARM64Operand::CreateToken("sys", false, NameLoc, getContext())); - - const AsmToken &Tok = Parser.getTok(); - StringRef Op = Tok.getString(); - SMLoc S = Tok.getLoc(); - - const MCExpr *Expr = 0; - -#define SYS_ALIAS(op1, Cn, Cm, op2) \ - do { \ - Expr = MCConstantExpr::Create(op1, getContext()); \ - Operands.push_back( \ - ARM64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ - Operands.push_back( \ - ARM64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \ - Operands.push_back( \ - ARM64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \ - Expr = MCConstantExpr::Create(op2, getContext()); \ - Operands.push_back( \ - ARM64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ - } while (0) - - if (Mnemonic == "ic") { - if (!Op.compare_lower("ialluis")) { - // SYS #0, C7, C1, #0 - SYS_ALIAS(0, 7, 1, 0); - } else if (!Op.compare_lower("iallu")) { - // SYS #0, C7, C5, #0 - SYS_ALIAS(0, 7, 5, 0); - } else if (!Op.compare_lower("ivau")) { - // SYS #3, C7, C5, #1 - SYS_ALIAS(3, 7, 5, 1); - } else { - return TokError("invalid operand for IC instruction"); - } - } else if (Mnemonic == "dc") { - if (!Op.compare_lower("zva")) { - // SYS #3, C7, C4, #1 - SYS_ALIAS(3, 7, 4, 1); - } else if (!Op.compare_lower("ivac")) { - // SYS #3, C7, C6, #1 - SYS_ALIAS(0, 7, 6, 1); - } else if (!Op.compare_lower("isw")) { - // SYS #0, C7, C6, #2 - SYS_ALIAS(0, 7, 6, 2); - } else if (!Op.compare_lower("cvac")) { - // SYS #3, C7, C10, #1 - SYS_ALIAS(3, 7, 10, 1); - } else if (!Op.compare_lower("csw")) { - // SYS #0, C7, C10, #2 - SYS_ALIAS(0, 7, 10, 2); - } else if (!Op.compare_lower("cvau")) { - // SYS #3, C7, C11, #1 - SYS_ALIAS(3, 7, 11, 1); - } else if (!Op.compare_lower("civac")) { - // SYS #3, C7, C14, #1 - SYS_ALIAS(3, 7, 14, 1); - } else if (!Op.compare_lower("cisw")) { - // SYS #0, C7, C14, #2 - SYS_ALIAS(0, 7, 14, 2); - } else { - return TokError("invalid operand for DC instruction"); - } - } else if (Mnemonic == "at") { - if (!Op.compare_lower("s1e1r")) { - // SYS #0, C7, C8, #0 - SYS_ALIAS(0, 7, 8, 0); - } else if (!Op.compare_lower("s1e2r")) { - // SYS #4, C7, C8, #0 - SYS_ALIAS(4, 7, 8, 0); - } else if (!Op.compare_lower("s1e3r")) { - // SYS #6, C7, C8, #0 - SYS_ALIAS(6, 7, 8, 0); - } else if (!Op.compare_lower("s1e1w")) { - // SYS #0, C7, C8, #1 - SYS_ALIAS(0, 7, 8, 1); - } else if (!Op.compare_lower("s1e2w")) { - // SYS #4, C7, C8, #1 - SYS_ALIAS(4, 7, 8, 1); - } else if (!Op.compare_lower("s1e3w")) { - // SYS #6, C7, C8, #1 - SYS_ALIAS(6, 7, 8, 1); - } else if (!Op.compare_lower("s1e0r")) { - // SYS #0, C7, C8, #3 - SYS_ALIAS(0, 7, 8, 2); - } else if (!Op.compare_lower("s1e0w")) { - // SYS #0, C7, C8, #3 - SYS_ALIAS(0, 7, 8, 3); - } else if (!Op.compare_lower("s12e1r")) { - // SYS #4, C7, C8, #4 - SYS_ALIAS(4, 7, 8, 4); - } else if (!Op.compare_lower("s12e1w")) { - // SYS #4, C7, C8, #5 - SYS_ALIAS(4, 7, 8, 5); - } else if (!Op.compare_lower("s12e0r")) { - // SYS #4, C7, C8, #6 - SYS_ALIAS(4, 7, 8, 6); - } else if (!Op.compare_lower("s12e0w")) { - // SYS #4, C7, C8, #7 - SYS_ALIAS(4, 7, 8, 7); - } else { - return TokError("invalid operand for AT instruction"); - } - } else if (Mnemonic == "tlbi") { - if (!Op.compare_lower("vmalle1is")) { - // SYS #0, C8, C3, #0 - SYS_ALIAS(0, 8, 3, 0); - } else if (!Op.compare_lower("alle2is")) { - // SYS #4, C8, C3, #0 - SYS_ALIAS(4, 8, 3, 0); - } else if (!Op.compare_lower("alle3is")) { - // SYS #6, C8, C3, #0 - SYS_ALIAS(6, 8, 3, 0); - } else if (!Op.compare_lower("vae1is")) { - // SYS #0, C8, C3, #1 - SYS_ALIAS(0, 8, 3, 1); - } else if (!Op.compare_lower("vae2is")) { - // SYS #4, C8, C3, #1 - SYS_ALIAS(4, 8, 3, 1); - } else if (!Op.compare_lower("vae3is")) { - // SYS #6, C8, C3, #1 - SYS_ALIAS(6, 8, 3, 1); - } else if (!Op.compare_lower("aside1is")) { - // SYS #0, C8, C3, #2 - SYS_ALIAS(0, 8, 3, 2); - } else if (!Op.compare_lower("vaae1is")) { - // SYS #0, C8, C3, #3 - SYS_ALIAS(0, 8, 3, 3); - } else if (!Op.compare_lower("alle1is")) { - // SYS #4, C8, C3, #4 - SYS_ALIAS(4, 8, 3, 4); - } else if (!Op.compare_lower("vale1is")) { - // SYS #0, C8, C3, #5 - SYS_ALIAS(0, 8, 3, 5); - } else if (!Op.compare_lower("vaale1is")) { - // SYS #0, C8, C3, #7 - SYS_ALIAS(0, 8, 3, 7); - } else if (!Op.compare_lower("vmalle1")) { - // SYS #0, C8, C7, #0 - SYS_ALIAS(0, 8, 7, 0); - } else if (!Op.compare_lower("alle2")) { - // SYS #4, C8, C7, #0 - SYS_ALIAS(4, 8, 7, 0); - } else if (!Op.compare_lower("vale2is")) { - // SYS #4, C8, C3, #5 - SYS_ALIAS(4, 8, 3, 5); - } else if (!Op.compare_lower("vale3is")) { - // SYS #6, C8, C3, #5 - SYS_ALIAS(6, 8, 3, 5); - } else if (!Op.compare_lower("alle3")) { - // SYS #6, C8, C7, #0 - SYS_ALIAS(6, 8, 7, 0); - } else if (!Op.compare_lower("vae1")) { - // SYS #0, C8, C7, #1 - SYS_ALIAS(0, 8, 7, 1); - } else if (!Op.compare_lower("vae2")) { - // SYS #4, C8, C7, #1 - SYS_ALIAS(4, 8, 7, 1); - } else if (!Op.compare_lower("vae3")) { - // SYS #6, C8, C7, #1 - SYS_ALIAS(6, 8, 7, 1); - } else if (!Op.compare_lower("aside1")) { - // SYS #0, C8, C7, #2 - SYS_ALIAS(0, 8, 7, 2); - } else if (!Op.compare_lower("vaae1")) { - // SYS #0, C8, C7, #3 - SYS_ALIAS(0, 8, 7, 3); - } else if (!Op.compare_lower("alle1")) { - // SYS #4, C8, C7, #4 - SYS_ALIAS(4, 8, 7, 4); - } else if (!Op.compare_lower("vale1")) { - // SYS #0, C8, C7, #5 - SYS_ALIAS(0, 8, 7, 5); - } else if (!Op.compare_lower("vale2")) { - // SYS #4, C8, C7, #5 - SYS_ALIAS(4, 8, 7, 5); - } else if (!Op.compare_lower("vale3")) { - // SYS #6, C8, C7, #5 - SYS_ALIAS(6, 8, 7, 5); - } else if (!Op.compare_lower("vaale1")) { - // SYS #0, C8, C7, #7 - SYS_ALIAS(0, 8, 7, 7); - } else if (!Op.compare_lower("ipas2e1")) { - // SYS #4, C8, C4, #1 - SYS_ALIAS(4, 8, 4, 1); - } else if (!Op.compare_lower("ipas2le1")) { - // SYS #4, C8, C4, #5 - SYS_ALIAS(4, 8, 4, 5); - } else if (!Op.compare_lower("vmalls12e1")) { - // SYS #4, C8, C7, #6 - SYS_ALIAS(4, 8, 7, 6); - } else if (!Op.compare_lower("vmalls12e1is")) { - // SYS #4, C8, C3, #6 - SYS_ALIAS(4, 8, 3, 6); - } else { - return TokError("invalid operand for TLBI instruction"); - } - } - -#undef SYS_ALIAS - - Parser.Lex(); // Eat operand. - - // Check for the optional register operand. - if (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); // Eat comma. - - if (Tok.isNot(AsmToken::Identifier) || parseRegister(Operands)) - return TokError("expected register operand"); - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - Parser.eatToEndOfStatement(); - return TokError("unexpected token in argument list"); - } - - Parser.Lex(); // Consume the EndOfStatement - return false; -} - -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { - const AsmToken &Tok = Parser.getTok(); - - // Can be either a #imm style literal or an option name - if (Tok.is(AsmToken::Hash)) { - // Immediate operand. - Parser.Lex(); // Eat the '#' - const MCExpr *ImmVal; - SMLoc ExprLoc = getLoc(); - if (getParser().parseExpression(ImmVal)) - return MatchOperand_ParseFail; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) { - Error(ExprLoc, "immediate value expected for barrier operand"); - return MatchOperand_ParseFail; - } - if (MCE->getValue() < 0 || MCE->getValue() > 15) { - Error(ExprLoc, "barrier operand out of range"); - return MatchOperand_ParseFail; - } - Operands.push_back( - ARM64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext())); - return MatchOperand_Success; - } - - if (Tok.isNot(AsmToken::Identifier)) { - TokError("invalid operand for instruction"); - return MatchOperand_ParseFail; - } - - unsigned Opt = StringSwitch<unsigned>(Tok.getString()) - .Case("oshld", ARM64SYS::OSHLD) - .Case("oshst", ARM64SYS::OSHST) - .Case("osh", ARM64SYS::OSH) - .Case("nshld", ARM64SYS::NSHLD) - .Case("nshst", ARM64SYS::NSHST) - .Case("nsh", ARM64SYS::NSH) - .Case("ishld", ARM64SYS::ISHLD) - .Case("ishst", ARM64SYS::ISHST) - .Case("ish", ARM64SYS::ISH) - .Case("ld", ARM64SYS::LD) - .Case("st", ARM64SYS::ST) - .Case("sy", ARM64SYS::SY) - .Default(ARM64SYS::InvalidBarrier); - if (Opt == ARM64SYS::InvalidBarrier) { - TokError("invalid barrier option name"); - return MatchOperand_ParseFail; - } - - // The only valid named option for ISB is 'sy' - if (Mnemonic == "isb" && Opt != ARM64SYS::SY) { - TokError("'sy' or #imm operand expected"); - return MatchOperand_ParseFail; - } - - Operands.push_back(ARM64Operand::CreateBarrier(Opt, getLoc(), getContext())); - Parser.Lex(); // Consume the option - - return MatchOperand_Success; -} - -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseSystemRegister(OperandVector &Operands) { - const AsmToken &Tok = Parser.getTok(); - - // It can be specified as a symbolic name. - if (Tok.isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - - auto ID = Tok.getString().lower(); - ARM64SYS::SystemRegister Reg = - StringSwitch<ARM64SYS::SystemRegister>(ID) - .Case("spsr_el1", ARM64SYS::SPSR_svc) - .Case("spsr_svc", ARM64SYS::SPSR_svc) - .Case("elr_el1", ARM64SYS::ELR_EL1) - .Case("sp_el0", ARM64SYS::SP_EL0) - .Case("spsel", ARM64SYS::SPSel) - .Case("daif", ARM64SYS::DAIF) - .Case("currentel", ARM64SYS::CurrentEL) - .Case("nzcv", ARM64SYS::NZCV) - .Case("fpcr", ARM64SYS::FPCR) - .Case("fpsr", ARM64SYS::FPSR) - .Case("dspsr", ARM64SYS::DSPSR) - .Case("dlr", ARM64SYS::DLR) - .Case("spsr_el2", ARM64SYS::SPSR_hyp) - .Case("spsr_hyp", ARM64SYS::SPSR_hyp) - .Case("elr_el2", ARM64SYS::ELR_EL2) - .Case("sp_el1", ARM64SYS::SP_EL1) - .Case("spsr_irq", ARM64SYS::SPSR_irq) - .Case("spsr_abt", ARM64SYS::SPSR_abt) - .Case("spsr_und", ARM64SYS::SPSR_und) - .Case("spsr_fiq", ARM64SYS::SPSR_fiq) - .Case("spsr_el3", ARM64SYS::SPSR_EL3) - .Case("elr_el3", ARM64SYS::ELR_EL3) - .Case("sp_el2", ARM64SYS::SP_EL2) - .Case("midr_el1", ARM64SYS::MIDR_EL1) - .Case("ctr_el0", ARM64SYS::CTR_EL0) - .Case("mpidr_el1", ARM64SYS::MPIDR_EL1) - .Case("ecoidr_el1", ARM64SYS::ECOIDR_EL1) - .Case("dczid_el0", ARM64SYS::DCZID_EL0) - .Case("mvfr0_el1", ARM64SYS::MVFR0_EL1) - .Case("mvfr1_el1", ARM64SYS::MVFR1_EL1) - .Case("id_aa64pfr0_el1", ARM64SYS::ID_AA64PFR0_EL1) - .Case("id_aa64pfr1_el1", ARM64SYS::ID_AA64PFR1_EL1) - .Case("id_aa64dfr0_el1", ARM64SYS::ID_AA64DFR0_EL1) - .Case("id_aa64dfr1_el1", ARM64SYS::ID_AA64DFR1_EL1) - .Case("id_aa64isar0_el1", ARM64SYS::ID_AA64ISAR0_EL1) - .Case("id_aa64isar1_el1", ARM64SYS::ID_AA64ISAR1_EL1) - .Case("id_aa64mmfr0_el1", ARM64SYS::ID_AA64MMFR0_EL1) - .Case("id_aa64mmfr1_el1", ARM64SYS::ID_AA64MMFR1_EL1) - .Case("ccsidr_el1", ARM64SYS::CCSIDR_EL1) - .Case("clidr_el1", ARM64SYS::CLIDR_EL1) - .Case("aidr_el1", ARM64SYS::AIDR_EL1) - .Case("csselr_el1", ARM64SYS::CSSELR_EL1) - .Case("vpidr_el2", ARM64SYS::VPIDR_EL2) - .Case("vmpidr_el2", ARM64SYS::VMPIDR_EL2) - .Case("sctlr_el1", ARM64SYS::SCTLR_EL1) - .Case("sctlr_el2", ARM64SYS::SCTLR_EL2) - .Case("sctlr_el3", ARM64SYS::SCTLR_EL3) - .Case("actlr_el1", ARM64SYS::ACTLR_EL1) - .Case("actlr_el2", ARM64SYS::ACTLR_EL2) - .Case("actlr_el3", ARM64SYS::ACTLR_EL3) - .Case("cpacr_el1", ARM64SYS::CPACR_EL1) - .Case("cptr_el2", ARM64SYS::CPTR_EL2) - .Case("cptr_el3", ARM64SYS::CPTR_EL3) - .Case("scr_el3", ARM64SYS::SCR_EL3) - .Case("hcr_el2", ARM64SYS::HCR_EL2) - .Case("mdcr_el2", ARM64SYS::MDCR_EL2) - .Case("mdcr_el3", ARM64SYS::MDCR_EL3) - .Case("hstr_el2", ARM64SYS::HSTR_EL2) - .Case("hacr_el2", ARM64SYS::HACR_EL2) - .Case("ttbr0_el1", ARM64SYS::TTBR0_EL1) - .Case("ttbr1_el1", ARM64SYS::TTBR1_EL1) - .Case("ttbr0_el2", ARM64SYS::TTBR0_EL2) - .Case("ttbr0_el3", ARM64SYS::TTBR0_EL3) - .Case("vttbr_el2", ARM64SYS::VTTBR_EL2) - .Case("tcr_el1", ARM64SYS::TCR_EL1) - .Case("tcr_el2", ARM64SYS::TCR_EL2) - .Case("tcr_el3", ARM64SYS::TCR_EL3) - .Case("vtcr_el2", ARM64SYS::VTCR_EL2) - .Case("adfsr_el1", ARM64SYS::ADFSR_EL1) - .Case("aifsr_el1", ARM64SYS::AIFSR_EL1) - .Case("adfsr_el2", ARM64SYS::ADFSR_EL2) - .Case("aifsr_el2", ARM64SYS::AIFSR_EL2) - .Case("adfsr_el3", ARM64SYS::ADFSR_EL3) - .Case("aifsr_el3", ARM64SYS::AIFSR_EL3) - .Case("esr_el1", ARM64SYS::ESR_EL1) - .Case("esr_el2", ARM64SYS::ESR_EL2) - .Case("esr_el3", ARM64SYS::ESR_EL3) - .Case("far_el1", ARM64SYS::FAR_EL1) - .Case("far_el2", ARM64SYS::FAR_EL2) - .Case("far_el3", ARM64SYS::FAR_EL3) - .Case("hpfar_el2", ARM64SYS::HPFAR_EL2) - .Case("par_el1", ARM64SYS::PAR_EL1) - .Case("mair_el1", ARM64SYS::MAIR_EL1) - .Case("mair_el2", ARM64SYS::MAIR_EL2) - .Case("mair_el3", ARM64SYS::MAIR_EL3) - .Case("amair_el1", ARM64SYS::AMAIR_EL1) - .Case("amair_el2", ARM64SYS::AMAIR_EL2) - .Case("amair_el3", ARM64SYS::AMAIR_EL3) - .Case("vbar_el1", ARM64SYS::VBAR_EL1) - .Case("vbar_el2", ARM64SYS::VBAR_EL2) - .Case("vbar_el3", ARM64SYS::VBAR_EL3) - .Case("rvbar_el1", ARM64SYS::RVBAR_EL1) - .Case("rvbar_el2", ARM64SYS::RVBAR_EL2) - .Case("rvbar_el3", ARM64SYS::RVBAR_EL3) - .Case("isr_el1", ARM64SYS::ISR_EL1) - .Case("contextidr_el1", ARM64SYS::CONTEXTIDR_EL1) - .Case("tpidr_el0", ARM64SYS::TPIDR_EL0) - .Case("tpidrro_el0", ARM64SYS::TPIDRRO_EL0) - .Case("tpidr_el1", ARM64SYS::TPIDR_EL1) - .Case("tpidr_el2", ARM64SYS::TPIDR_EL2) - .Case("tpidr_el3", ARM64SYS::TPIDR_EL3) - .Case("teecr32_el1", ARM64SYS::TEECR32_EL1) - .Case("cntfrq_el0", ARM64SYS::CNTFRQ_EL0) - .Case("cntpct_el0", ARM64SYS::CNTPCT_EL0) - .Case("cntvct_el0", ARM64SYS::CNTVCT_EL0) - .Case("cntvoff_el2", ARM64SYS::CNTVOFF_EL2) - .Case("cntkctl_el1", ARM64SYS::CNTKCTL_EL1) - .Case("cnthctl_el2", ARM64SYS::CNTHCTL_EL2) - .Case("cntp_tval_el0", ARM64SYS::CNTP_TVAL_EL0) - .Case("cntp_ctl_el0", ARM64SYS::CNTP_CTL_EL0) - .Case("cntp_cval_el0", ARM64SYS::CNTP_CVAL_EL0) - .Case("cntv_tval_el0", ARM64SYS::CNTV_TVAL_EL0) - .Case("cntv_ctl_el0", ARM64SYS::CNTV_CTL_EL0) - .Case("cntv_cval_el0", ARM64SYS::CNTV_CVAL_EL0) - .Case("cnthp_tval_el2", ARM64SYS::CNTHP_TVAL_EL2) - .Case("cnthp_ctl_el2", ARM64SYS::CNTHP_CTL_EL2) - .Case("cnthp_cval_el2", ARM64SYS::CNTHP_CVAL_EL2) - .Case("cntps_tval_el1", ARM64SYS::CNTPS_TVAL_EL1) - .Case("cntps_ctl_el1", ARM64SYS::CNTPS_CTL_EL1) - .Case("cntps_cval_el1", ARM64SYS::CNTPS_CVAL_EL1) - .Case("dacr32_el2", ARM64SYS::DACR32_EL2) - .Case("ifsr32_el2", ARM64SYS::IFSR32_EL2) - .Case("teehbr32_el1", ARM64SYS::TEEHBR32_EL1) - .Case("sder32_el3", ARM64SYS::SDER32_EL3) - .Case("fpexc32_el2", ARM64SYS::FPEXC32_EL2) - .Case("current_el", ARM64SYS::CurrentEL) - .Case("pmevcntr0_el0", ARM64SYS::PMEVCNTR0_EL0) - .Case("pmevcntr1_el0", ARM64SYS::PMEVCNTR1_EL0) - .Case("pmevcntr2_el0", ARM64SYS::PMEVCNTR2_EL0) - .Case("pmevcntr3_el0", ARM64SYS::PMEVCNTR3_EL0) - .Case("pmevcntr4_el0", ARM64SYS::PMEVCNTR4_EL0) - .Case("pmevcntr5_el0", ARM64SYS::PMEVCNTR5_EL0) - .Case("pmevcntr6_el0", ARM64SYS::PMEVCNTR6_EL0) - .Case("pmevcntr7_el0", ARM64SYS::PMEVCNTR7_EL0) - .Case("pmevcntr8_el0", ARM64SYS::PMEVCNTR8_EL0) - .Case("pmevcntr9_el0", ARM64SYS::PMEVCNTR9_EL0) - .Case("pmevcntr10_el0", ARM64SYS::PMEVCNTR10_EL0) - .Case("pmevcntr11_el0", ARM64SYS::PMEVCNTR11_EL0) - .Case("pmevcntr12_el0", ARM64SYS::PMEVCNTR12_EL0) - .Case("pmevcntr13_el0", ARM64SYS::PMEVCNTR13_EL0) - .Case("pmevcntr14_el0", ARM64SYS::PMEVCNTR14_EL0) - .Case("pmevcntr15_el0", ARM64SYS::PMEVCNTR15_EL0) - .Case("pmevcntr16_el0", ARM64SYS::PMEVCNTR16_EL0) - .Case("pmevcntr17_el0", ARM64SYS::PMEVCNTR17_EL0) - .Case("pmevcntr18_el0", ARM64SYS::PMEVCNTR18_EL0) - .Case("pmevcntr19_el0", ARM64SYS::PMEVCNTR19_EL0) - .Case("pmevcntr20_el0", ARM64SYS::PMEVCNTR20_EL0) - .Case("pmevcntr21_el0", ARM64SYS::PMEVCNTR21_EL0) - .Case("pmevcntr22_el0", ARM64SYS::PMEVCNTR22_EL0) - .Case("pmevcntr23_el0", ARM64SYS::PMEVCNTR23_EL0) - .Case("pmevcntr24_el0", ARM64SYS::PMEVCNTR24_EL0) - .Case("pmevcntr25_el0", ARM64SYS::PMEVCNTR25_EL0) - .Case("pmevcntr26_el0", ARM64SYS::PMEVCNTR26_EL0) - .Case("pmevcntr27_el0", ARM64SYS::PMEVCNTR27_EL0) - .Case("pmevcntr28_el0", ARM64SYS::PMEVCNTR28_EL0) - .Case("pmevcntr29_el0", ARM64SYS::PMEVCNTR29_EL0) - .Case("pmevcntr30_el0", ARM64SYS::PMEVCNTR30_EL0) - .Case("pmevtyper0_el0", ARM64SYS::PMEVTYPER0_EL0) - .Case("pmevtyper1_el0", ARM64SYS::PMEVTYPER1_EL0) - .Case("pmevtyper2_el0", ARM64SYS::PMEVTYPER2_EL0) - .Case("pmevtyper3_el0", ARM64SYS::PMEVTYPER3_EL0) - .Case("pmevtyper4_el0", ARM64SYS::PMEVTYPER4_EL0) - .Case("pmevtyper5_el0", ARM64SYS::PMEVTYPER5_EL0) - .Case("pmevtyper6_el0", ARM64SYS::PMEVTYPER6_EL0) - .Case("pmevtyper7_el0", ARM64SYS::PMEVTYPER7_EL0) - .Case("pmevtyper8_el0", ARM64SYS::PMEVTYPER8_EL0) - .Case("pmevtyper9_el0", ARM64SYS::PMEVTYPER9_EL0) - .Case("pmevtyper10_el0", ARM64SYS::PMEVTYPER10_EL0) - .Case("pmevtyper11_el0", ARM64SYS::PMEVTYPER11_EL0) - .Case("pmevtyper12_el0", ARM64SYS::PMEVTYPER12_EL0) - .Case("pmevtyper13_el0", ARM64SYS::PMEVTYPER13_EL0) - .Case("pmevtyper14_el0", ARM64SYS::PMEVTYPER14_EL0) - .Case("pmevtyper15_el0", ARM64SYS::PMEVTYPER15_EL0) - .Case("pmevtyper16_el0", ARM64SYS::PMEVTYPER16_EL0) - .Case("pmevtyper17_el0", ARM64SYS::PMEVTYPER17_EL0) - .Case("pmevtyper18_el0", ARM64SYS::PMEVTYPER18_EL0) - .Case("pmevtyper19_el0", ARM64SYS::PMEVTYPER19_EL0) - .Case("pmevtyper20_el0", ARM64SYS::PMEVTYPER20_EL0) - .Case("pmevtyper21_el0", ARM64SYS::PMEVTYPER21_EL0) - .Case("pmevtyper22_el0", ARM64SYS::PMEVTYPER22_EL0) - .Case("pmevtyper23_el0", ARM64SYS::PMEVTYPER23_EL0) - .Case("pmevtyper24_el0", ARM64SYS::PMEVTYPER24_EL0) - .Case("pmevtyper25_el0", ARM64SYS::PMEVTYPER25_EL0) - .Case("pmevtyper26_el0", ARM64SYS::PMEVTYPER26_EL0) - .Case("pmevtyper27_el0", ARM64SYS::PMEVTYPER27_EL0) - .Case("pmevtyper28_el0", ARM64SYS::PMEVTYPER28_EL0) - .Case("pmevtyper29_el0", ARM64SYS::PMEVTYPER29_EL0) - .Case("pmevtyper30_el0", ARM64SYS::PMEVTYPER30_EL0) - .Case("pmccfiltr_el0", ARM64SYS::PMCCFILTR_EL0) - .Case("rmr_el3", ARM64SYS::RMR_EL3) - .Case("rmr_el2", ARM64SYS::RMR_EL2) - .Case("rmr_el1", ARM64SYS::RMR_EL1) - .Case("cpm_ioacc_ctl_el3", ARM64SYS::CPM_IOACC_CTL_EL3) - .Case("mdccsr_el0", ARM64SYS::MDCCSR_EL0) - .Case("mdccint_el1", ARM64SYS::MDCCINT_EL1) - .Case("dbgdtr_el0", ARM64SYS::DBGDTR_EL0) - .Case("dbgdtrrx_el0", ARM64SYS::DBGDTRRX_EL0) - .Case("dbgdtrtx_el0", ARM64SYS::DBGDTRTX_EL0) - .Case("dbgvcr32_el2", ARM64SYS::DBGVCR32_EL2) - .Case("osdtrrx_el1", ARM64SYS::OSDTRRX_EL1) - .Case("mdscr_el1", ARM64SYS::MDSCR_EL1) - .Case("osdtrtx_el1", ARM64SYS::OSDTRTX_EL1) - .Case("oseccr_el11", ARM64SYS::OSECCR_EL11) - .Case("dbgbvr0_el1", ARM64SYS::DBGBVR0_EL1) - .Case("dbgbvr1_el1", ARM64SYS::DBGBVR1_EL1) - .Case("dbgbvr2_el1", ARM64SYS::DBGBVR2_EL1) - .Case("dbgbvr3_el1", ARM64SYS::DBGBVR3_EL1) - .Case("dbgbvr4_el1", ARM64SYS::DBGBVR4_EL1) - .Case("dbgbvr5_el1", ARM64SYS::DBGBVR5_EL1) - .Case("dbgbvr6_el1", ARM64SYS::DBGBVR6_EL1) - .Case("dbgbvr7_el1", ARM64SYS::DBGBVR7_EL1) - .Case("dbgbvr8_el1", ARM64SYS::DBGBVR8_EL1) - .Case("dbgbvr9_el1", ARM64SYS::DBGBVR9_EL1) - .Case("dbgbvr10_el1", ARM64SYS::DBGBVR10_EL1) - .Case("dbgbvr11_el1", ARM64SYS::DBGBVR11_EL1) - .Case("dbgbvr12_el1", ARM64SYS::DBGBVR12_EL1) - .Case("dbgbvr13_el1", ARM64SYS::DBGBVR13_EL1) - .Case("dbgbvr14_el1", ARM64SYS::DBGBVR14_EL1) - .Case("dbgbvr15_el1", ARM64SYS::DBGBVR15_EL1) - .Case("dbgbcr0_el1", ARM64SYS::DBGBCR0_EL1) - .Case("dbgbcr1_el1", ARM64SYS::DBGBCR1_EL1) - .Case("dbgbcr2_el1", ARM64SYS::DBGBCR2_EL1) - .Case("dbgbcr3_el1", ARM64SYS::DBGBCR3_EL1) - .Case("dbgbcr4_el1", ARM64SYS::DBGBCR4_EL1) - .Case("dbgbcr5_el1", ARM64SYS::DBGBCR5_EL1) - .Case("dbgbcr6_el1", ARM64SYS::DBGBCR6_EL1) - .Case("dbgbcr7_el1", ARM64SYS::DBGBCR7_EL1) - .Case("dbgbcr8_el1", ARM64SYS::DBGBCR8_EL1) - .Case("dbgbcr9_el1", ARM64SYS::DBGBCR9_EL1) - .Case("dbgbcr10_el1", ARM64SYS::DBGBCR10_EL1) - .Case("dbgbcr11_el1", ARM64SYS::DBGBCR11_EL1) - .Case("dbgbcr12_el1", ARM64SYS::DBGBCR12_EL1) - .Case("dbgbcr13_el1", ARM64SYS::DBGBCR13_EL1) - .Case("dbgbcr14_el1", ARM64SYS::DBGBCR14_EL1) - .Case("dbgbcr15_el1", ARM64SYS::DBGBCR15_EL1) - .Case("dbgwvr0_el1", ARM64SYS::DBGWVR0_EL1) - .Case("dbgwvr1_el1", ARM64SYS::DBGWVR1_EL1) - .Case("dbgwvr2_el1", ARM64SYS::DBGWVR2_EL1) - .Case("dbgwvr3_el1", ARM64SYS::DBGWVR3_EL1) - .Case("dbgwvr4_el1", ARM64SYS::DBGWVR4_EL1) - .Case("dbgwvr5_el1", ARM64SYS::DBGWVR5_EL1) - .Case("dbgwvr6_el1", ARM64SYS::DBGWVR6_EL1) - .Case("dbgwvr7_el1", ARM64SYS::DBGWVR7_EL1) - .Case("dbgwvr8_el1", ARM64SYS::DBGWVR8_EL1) - .Case("dbgwvr9_el1", ARM64SYS::DBGWVR9_EL1) - .Case("dbgwvr10_el1", ARM64SYS::DBGWVR10_EL1) - .Case("dbgwvr11_el1", ARM64SYS::DBGWVR11_EL1) - .Case("dbgwvr12_el1", ARM64SYS::DBGWVR12_EL1) - .Case("dbgwvr13_el1", ARM64SYS::DBGWVR13_EL1) - .Case("dbgwvr14_el1", ARM64SYS::DBGWVR14_EL1) - .Case("dbgwvr15_el1", ARM64SYS::DBGWVR15_EL1) - .Case("dbgwcr0_el1", ARM64SYS::DBGWCR0_EL1) - .Case("dbgwcr1_el1", ARM64SYS::DBGWCR1_EL1) - .Case("dbgwcr2_el1", ARM64SYS::DBGWCR2_EL1) - .Case("dbgwcr3_el1", ARM64SYS::DBGWCR3_EL1) - .Case("dbgwcr4_el1", ARM64SYS::DBGWCR4_EL1) - .Case("dbgwcr5_el1", ARM64SYS::DBGWCR5_EL1) - .Case("dbgwcr6_el1", ARM64SYS::DBGWCR6_EL1) - .Case("dbgwcr7_el1", ARM64SYS::DBGWCR7_EL1) - .Case("dbgwcr8_el1", ARM64SYS::DBGWCR8_EL1) - .Case("dbgwcr9_el1", ARM64SYS::DBGWCR9_EL1) - .Case("dbgwcr10_el1", ARM64SYS::DBGWCR10_EL1) - .Case("dbgwcr11_el1", ARM64SYS::DBGWCR11_EL1) - .Case("dbgwcr12_el1", ARM64SYS::DBGWCR12_EL1) - .Case("dbgwcr13_el1", ARM64SYS::DBGWCR13_EL1) - .Case("dbgwcr14_el1", ARM64SYS::DBGWCR14_EL1) - .Case("dbgwcr15_el1", ARM64SYS::DBGWCR15_EL1) - .Case("mdrar_el1", ARM64SYS::MDRAR_EL1) - .Case("oslar_el1", ARM64SYS::OSLAR_EL1) - .Case("oslsr_el1", ARM64SYS::OSLSR_EL1) - .Case("osdlr_el1", ARM64SYS::OSDLR_EL1) - .Case("dbgprcr_el1", ARM64SYS::DBGPRCR_EL1) - .Case("dbgclaimset_el1", ARM64SYS::DBGCLAIMSET_EL1) - .Case("dbgclaimclr_el1", ARM64SYS::DBGCLAIMCLR_EL1) - .Case("dbgauthstatus_el1", ARM64SYS::DBGAUTHSTATUS_EL1) - .Case("dbgdevid2", ARM64SYS::DBGDEVID2) - .Case("dbgdevid1", ARM64SYS::DBGDEVID1) - .Case("dbgdevid0", ARM64SYS::DBGDEVID0) - .Case("id_pfr0_el1", ARM64SYS::ID_PFR0_EL1) - .Case("id_pfr1_el1", ARM64SYS::ID_PFR1_EL1) - .Case("id_dfr0_el1", ARM64SYS::ID_DFR0_EL1) - .Case("id_afr0_el1", ARM64SYS::ID_AFR0_EL1) - .Case("id_isar0_el1", ARM64SYS::ID_ISAR0_EL1) - .Case("id_isar1_el1", ARM64SYS::ID_ISAR1_EL1) - .Case("id_isar2_el1", ARM64SYS::ID_ISAR2_EL1) - .Case("id_isar3_el1", ARM64SYS::ID_ISAR3_EL1) - .Case("id_isar4_el1", ARM64SYS::ID_ISAR4_EL1) - .Case("id_isar5_el1", ARM64SYS::ID_ISAR5_EL1) - .Case("afsr1_el1", ARM64SYS::AFSR1_EL1) - .Case("afsr0_el1", ARM64SYS::AFSR0_EL1) - .Case("revidr_el1", ARM64SYS::REVIDR_EL1) - .Default(ARM64SYS::InvalidSystemReg); - if (Reg != ARM64SYS::InvalidSystemReg) { - // We matched a reg name, so create the operand. - Operands.push_back( - ARM64Operand::CreateSystemRegister(Reg, getLoc(), getContext())); - Parser.Lex(); // Consume the register name. - return MatchOperand_Success; - } - - // Or we may have an identifier that encodes the sub-operands. - // For example, s3_2_c15_c0_0. - unsigned op0, op1, CRn, CRm, op2; - std::string Desc = ID; - if (std::sscanf(Desc.c_str(), "s%u_%u_c%u_c%u_%u", &op0, &op1, &CRn, &CRm, - &op2) != 5) - return MatchOperand_NoMatch; - if ((op0 != 2 && op0 != 3) || op1 > 7 || CRn > 15 || CRm > 15 || op2 > 7) - return MatchOperand_NoMatch; - - unsigned Val = op0 << 14 | op1 << 11 | CRn << 7 | CRm << 3 | op2; - Operands.push_back( - ARM64Operand::CreateSystemRegister(Val, getLoc(), getContext())); - Parser.Lex(); // Consume the register name. - - return MatchOperand_Success; -} - -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseCPSRField(OperandVector &Operands) { - const AsmToken &Tok = Parser.getTok(); - - if (Tok.isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - - ARM64SYS::CPSRField Field = - StringSwitch<ARM64SYS::CPSRField>(Tok.getString().lower()) - .Case("spsel", ARM64SYS::cpsr_SPSel) - .Case("daifset", ARM64SYS::cpsr_DAIFSet) - .Case("daifclr", ARM64SYS::cpsr_DAIFClr) - .Default(ARM64SYS::InvalidCPSRField); - if (Field == ARM64SYS::InvalidCPSRField) - return MatchOperand_NoMatch; - Operands.push_back( - ARM64Operand::CreateCPSRField(Field, getLoc(), getContext())); - Parser.Lex(); // Consume the register name. - - return MatchOperand_Success; -} - -/// tryParseVectorRegister - Parse a vector register operand. -bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) { - if (Parser.getTok().isNot(AsmToken::Identifier)) - return true; - - SMLoc S = getLoc(); - // Check for a vector register specifier first. - StringRef Kind; - int64_t Reg = tryMatchVectorRegister(Kind); - if (Reg == -1) - return true; - Operands.push_back( - ARM64Operand::CreateReg(Reg, true, S, getLoc(), getContext())); - // If there was an explicit qualifier, that goes on as a literal text - // operand. - if (!Kind.empty()) - Operands.push_back(ARM64Operand::CreateToken(Kind, false, S, getContext())); - - // If there is an index specifier following the register, parse that too. - if (Parser.getTok().is(AsmToken::LBrac)) { - SMLoc SIdx = getLoc(); - Parser.Lex(); // Eat left bracket token. - - const MCExpr *ImmVal; - if (getParser().parseExpression(ImmVal)) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) { - TokError("immediate value expected for vector index"); - return false; - } - - SMLoc E = getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return false; - } - - Parser.Lex(); // Eat right bracket token. - - Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E, - getContext())); - } - - return false; -} - -/// parseRegister - Parse a non-vector register operand. -bool ARM64AsmParser::parseRegister(OperandVector &Operands) { - SMLoc S = getLoc(); - // Try for a vector register. - if (!tryParseVectorRegister(Operands)) - return false; - - // Try for a scalar register. - int64_t Reg = tryParseRegister(); - if (Reg == -1) - return true; - Operands.push_back( - ARM64Operand::CreateReg(Reg, false, S, getLoc(), getContext())); - - // A small number of instructions (FMOVXDhighr, for example) have "[1]" - // as a string token in the instruction itself. - if (getLexer().getKind() == AsmToken::LBrac) { - SMLoc LBracS = getLoc(); - Parser.Lex(); - const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Integer)) { - SMLoc IntS = getLoc(); - int64_t Val = Tok.getIntVal(); - if (Val == 1) { - Parser.Lex(); - if (getLexer().getKind() == AsmToken::RBrac) { - SMLoc RBracS = getLoc(); - Parser.Lex(); - Operands.push_back( - ARM64Operand::CreateToken("[", false, LBracS, getContext())); - Operands.push_back( - ARM64Operand::CreateToken("1", false, IntS, getContext())); - Operands.push_back( - ARM64Operand::CreateToken("]", false, RBracS, getContext())); - return false; - } - } - } - } - - return false; -} - -/// tryParseNoIndexMemory - Custom parser method for memory operands that -/// do not allow base regisrer writeback modes, -/// or those that handle writeback separately from -/// the memory operand (like the AdvSIMD ldX/stX -/// instructions. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseNoIndexMemory(OperandVector &Operands) { - if (Parser.getTok().isNot(AsmToken::LBrac)) - return MatchOperand_NoMatch; - SMLoc S = getLoc(); - Parser.Lex(); // Eat left bracket token. - - const AsmToken &BaseRegTok = Parser.getTok(); - if (BaseRegTok.isNot(AsmToken::Identifier)) { - Error(BaseRegTok.getLoc(), "register expected"); - return MatchOperand_ParseFail; - } - - int64_t Reg = tryParseRegister(); - if (Reg == -1) { - Error(BaseRegTok.getLoc(), "register expected"); - return MatchOperand_ParseFail; - } - - SMLoc E = getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // Eat right bracket token. - - Operands.push_back(ARM64Operand::CreateMem(Reg, 0, S, E, E, getContext())); - return MatchOperand_Success; -} - -/// parseMemory - Parse a memory operand for a basic load/store instruction. -bool ARM64AsmParser::parseMemory(OperandVector &Operands) { - assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); - SMLoc S = getLoc(); - Parser.Lex(); // Eat left bracket token. - - const AsmToken &BaseRegTok = Parser.getTok(); - if (BaseRegTok.isNot(AsmToken::Identifier)) - return Error(BaseRegTok.getLoc(), "register expected"); - - int64_t Reg = tryParseRegister(); - if (Reg == -1) - return Error(BaseRegTok.getLoc(), "register expected"); - - // If there is an offset expression, parse it. - const MCExpr *OffsetExpr = 0; - SMLoc OffsetLoc; - if (Parser.getTok().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - OffsetLoc = getLoc(); - - // Register offset - const AsmToken &OffsetRegTok = Parser.getTok(); - int Reg2 = OffsetRegTok.is(AsmToken::Identifier) ? tryParseRegister() : -1; - if (Reg2 != -1) { - // Default shift is LSL, with an omitted shift. We use the third bit of - // the extend value to indicate presence/omission of the immediate offset. - ARM64_AM::ExtendType ExtOp = ARM64_AM::UXTX; - int64_t ShiftVal = 0; - bool ExplicitShift = false; - - if (Parser.getTok().is(AsmToken::Comma)) { - // Embedded extend operand. - Parser.Lex(); // Eat the comma - - SMLoc ExtLoc = getLoc(); - const AsmToken &Tok = Parser.getTok(); - ExtOp = StringSwitch<ARM64_AM::ExtendType>(Tok.getString()) - .Case("uxtw", ARM64_AM::UXTW) - .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX - .Case("sxtw", ARM64_AM::SXTW) - .Case("sxtx", ARM64_AM::SXTX) - .Case("UXTW", ARM64_AM::UXTW) - .Case("LSL", ARM64_AM::UXTX) // Alias for UXTX - .Case("SXTW", ARM64_AM::SXTW) - .Case("SXTX", ARM64_AM::SXTX) - .Default(ARM64_AM::InvalidExtend); - if (ExtOp == ARM64_AM::InvalidExtend) - return Error(ExtLoc, "expected valid extend operation"); - - Parser.Lex(); // Eat the extend op. - - if (getLexer().is(AsmToken::RBrac)) { - // No immediate operand. - if (ExtOp == ARM64_AM::UXTX) - return Error(ExtLoc, "LSL extend requires immediate operand"); - } else if (getLexer().is(AsmToken::Hash)) { - // Immediate operand. - Parser.Lex(); // Eat the '#' - const MCExpr *ImmVal; - SMLoc ExprLoc = getLoc(); - if (getParser().parseExpression(ImmVal)) - return true; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) - return TokError("immediate value expected for extend operand"); - - ExplicitShift = true; - ShiftVal = MCE->getValue(); - if (ShiftVal < 0 || ShiftVal > 4) - return Error(ExprLoc, "immediate operand out of range"); - } else - return Error(getLoc(), "expected immediate operand"); - } - - if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(getLoc(), "']' expected"); - - Parser.Lex(); // Eat right bracket token. - - SMLoc E = getLoc(); - Operands.push_back(ARM64Operand::CreateRegOffsetMem( - Reg, Reg2, ExtOp, ShiftVal, ExplicitShift, S, E, getContext())); - return false; - - // Immediate expressions. - } else if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat hash token. - - if (parseSymbolicImmVal(OffsetExpr)) - return true; - } else { - // FIXME: We really should make sure that we're dealing with a LDR/STR - // instruction that can legally have a symbolic expression here. - // Symbol reference. - if (Parser.getTok().isNot(AsmToken::Identifier) && - Parser.getTok().isNot(AsmToken::String)) - return Error(getLoc(), "identifier or immediate expression expected"); - if (getParser().parseExpression(OffsetExpr)) - return true; - // If this is a plain ref, Make sure a legal variant kind was specified. - // Otherwise, it's a more complicated expression and we have to just - // assume it's OK and let the relocation stuff puke if it's not. - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (classifySymbolRef(OffsetExpr, ELFRefKind, DarwinRefKind, Addend) && - Addend == 0) { - assert(ELFRefKind == ARM64MCExpr::VK_INVALID && - "ELF symbol modifiers not supported here yet"); - - switch (DarwinRefKind) { - default: - return Error(getLoc(), "expected @pageoff or @gotpageoff modifier"); - case MCSymbolRefExpr::VK_GOTPAGEOFF: - case MCSymbolRefExpr::VK_PAGEOFF: - case MCSymbolRefExpr::VK_TLVPPAGEOFF: - // These are what we're expecting. - break; - } - } - } - } - - SMLoc E = getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); - - Parser.Lex(); // Eat right bracket token. - - // Create the memory operand. - Operands.push_back( - ARM64Operand::CreateMem(Reg, OffsetExpr, S, E, OffsetLoc, getContext())); - - // Check for a '!', indicating pre-indexed addressing with writeback. - if (Parser.getTok().is(AsmToken::Exclaim)) { - // There needs to have been an immediate or wback doesn't make sense. - if (!OffsetExpr) - return Error(E, "missing offset for pre-indexed addressing"); - // Pre-indexed with writeback must have a constant expression for the - // offset. FIXME: Theoretically, we'd like to allow fixups so long - // as they don't require a relocation. - if (!isa<MCConstantExpr>(OffsetExpr)) - return Error(OffsetLoc, "constant immediate expression expected"); - - // Create the Token operand for the '!'. - Operands.push_back(ARM64Operand::CreateToken( - "!", false, Parser.getTok().getLoc(), getContext())); - Parser.Lex(); // Eat the '!' token. - } - - return false; -} - -bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { - bool HasELFModifier = false; - ARM64MCExpr::VariantKind RefKind; - - if (Parser.getTok().is(AsmToken::Colon)) { - Parser.Lex(); // Eat ':" - HasELFModifier = true; - - if (Parser.getTok().isNot(AsmToken::Identifier)) { - Error(Parser.getTok().getLoc(), - "expect relocation specifier in operand after ':'"); - return true; - } - - std::string LowerCase = Parser.getTok().getIdentifier().lower(); - RefKind = StringSwitch<ARM64MCExpr::VariantKind>(LowerCase) - .Case("lo12", ARM64MCExpr::VK_LO12) - .Case("abs_g3", ARM64MCExpr::VK_ABS_G3) - .Case("abs_g2", ARM64MCExpr::VK_ABS_G2) - .Case("abs_g2_nc", ARM64MCExpr::VK_ABS_G2_NC) - .Case("abs_g1", ARM64MCExpr::VK_ABS_G1) - .Case("abs_g1_nc", ARM64MCExpr::VK_ABS_G1_NC) - .Case("abs_g0", ARM64MCExpr::VK_ABS_G0) - .Case("abs_g0_nc", ARM64MCExpr::VK_ABS_G0_NC) - .Case("dtprel_g2", ARM64MCExpr::VK_DTPREL_G2) - .Case("dtprel_g1", ARM64MCExpr::VK_DTPREL_G1) - .Case("dtprel_g1_nc", ARM64MCExpr::VK_DTPREL_G1_NC) - .Case("dtprel_g0", ARM64MCExpr::VK_DTPREL_G0) - .Case("dtprel_g0_nc", ARM64MCExpr::VK_DTPREL_G0_NC) - .Case("dtprel_lo12", ARM64MCExpr::VK_DTPREL_LO12) - .Case("dtprel_lo12_nc", ARM64MCExpr::VK_DTPREL_LO12_NC) - .Case("tprel_g2", ARM64MCExpr::VK_TPREL_G2) - .Case("tprel_g1", ARM64MCExpr::VK_TPREL_G1) - .Case("tprel_g1_nc", ARM64MCExpr::VK_TPREL_G1_NC) - .Case("tprel_g0", ARM64MCExpr::VK_TPREL_G0) - .Case("tprel_g0_nc", ARM64MCExpr::VK_TPREL_G0_NC) - .Case("tprel_lo12", ARM64MCExpr::VK_TPREL_LO12) - .Case("tprel_lo12_nc", ARM64MCExpr::VK_TPREL_LO12_NC) - .Case("tlsdesc_lo12", ARM64MCExpr::VK_TLSDESC_LO12) - .Case("got", ARM64MCExpr::VK_GOT_PAGE) - .Case("got_lo12", ARM64MCExpr::VK_GOT_LO12) - .Case("gottprel", ARM64MCExpr::VK_GOTTPREL_PAGE) - .Case("gottprel_lo12", ARM64MCExpr::VK_GOTTPREL_LO12_NC) - .Case("gottprel_g1", ARM64MCExpr::VK_GOTTPREL_G1) - .Case("gottprel_g0_nc", ARM64MCExpr::VK_GOTTPREL_G0_NC) - .Case("tlsdesc", ARM64MCExpr::VK_TLSDESC_PAGE) - .Default(ARM64MCExpr::VK_INVALID); - - if (RefKind == ARM64MCExpr::VK_INVALID) { - Error(Parser.getTok().getLoc(), - "expect relocation specifier in operand after ':'"); - return true; - } - - Parser.Lex(); // Eat identifier - - if (Parser.getTok().isNot(AsmToken::Colon)) { - Error(Parser.getTok().getLoc(), "expect ':' after relocation specifier"); - return true; - } - Parser.Lex(); // Eat ':' - } - - if (getParser().parseExpression(ImmVal)) - return true; - - if (HasELFModifier) - ImmVal = ARM64MCExpr::Create(ImmVal, RefKind, getContext()); - - return false; -} - -/// parseVectorList - Parse a vector list operand for AdvSIMD instructions. -bool ARM64AsmParser::parseVectorList(OperandVector &Operands) { - assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket"); - SMLoc S = getLoc(); - Parser.Lex(); // Eat left bracket token. - StringRef Kind; - int64_t FirstReg = tryMatchVectorRegister(Kind); - if (FirstReg == -1) - return Error(getLoc(), "vector register expected"); - int64_t PrevReg = FirstReg; - unsigned Count = 1; - while (Parser.getTok().isNot(AsmToken::RCurly)) { - if (Parser.getTok().is(AsmToken::EndOfStatement)) - Error(getLoc(), "'}' expected"); - - if (Parser.getTok().isNot(AsmToken::Comma)) - return Error(getLoc(), "',' expected"); - Parser.Lex(); // Eat the comma token. - - SMLoc Loc = getLoc(); - StringRef NextKind; - int64_t Reg = tryMatchVectorRegister(NextKind); - if (Reg == -1) - return Error(Loc, "vector register expected"); - // Any Kind suffices must match on all regs in the list. - if (Kind != NextKind) - return Error(Loc, "mismatched register size suffix"); - - // Registers must be incremental (with wraparound at 31) - if (getContext().getRegisterInfo()->getEncodingValue(Reg) != - (getContext().getRegisterInfo()->getEncodingValue(PrevReg) + 1) % 32) - return Error(Loc, "registers must be sequential"); - - PrevReg = Reg; - ++Count; - } - Parser.Lex(); // Eat the '}' token. - - unsigned NumElements = 0; - char ElementKind = 0; - if (!Kind.empty()) - parseValidVectorKind(Kind, NumElements, ElementKind); - - Operands.push_back(ARM64Operand::CreateVectorList( - FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext())); - - // If there is an index specifier following the list, parse that too. - if (Parser.getTok().is(AsmToken::LBrac)) { - SMLoc SIdx = getLoc(); - Parser.Lex(); // Eat left bracket token. - - const MCExpr *ImmVal; - if (getParser().parseExpression(ImmVal)) - return false; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) { - TokError("immediate value expected for vector index"); - return false; - } - - SMLoc E = getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return false; - } - - Parser.Lex(); // Eat right bracket token. - - Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E, - getContext())); - } - return false; -} - -/// parseOperand - Parse a arm instruction operand. For now this parses the -/// operand regardless of the mnemonic. -bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, - bool invertCondCode) { - // Check if the current operand has a custom associated parser, if so, try to - // custom parse the operand, or fallback to the general approach. - OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); - if (ResTy == MatchOperand_Success) - return false; - // If there wasn't a custom match, try the generic matcher below. Otherwise, - // there was a match, but an error occurred, in which case, just return that - // the operand parsing failed. - if (ResTy == MatchOperand_ParseFail) - return true; - - // Nothing custom, so do general case parsing. - SMLoc S, E; - switch (getLexer().getKind()) { - default: { - SMLoc S = getLoc(); - const MCExpr *Expr; - if (parseSymbolicImmVal(Expr)) - return Error(S, "invalid operand"); - - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext())); - return false; - } - case AsmToken::LBrac: - return parseMemory(Operands); - case AsmToken::LCurly: - return parseVectorList(Operands); - case AsmToken::Identifier: { - // If we're expecting a Condition Code operand, then just parse that. - if (isCondCode) - return parseCondCode(Operands, invertCondCode); - - // If it's a register name, parse it. - if (!parseRegister(Operands)) - return false; - - // This could be an optional "shift" operand. - if (!parseOptionalShift(Operands)) - return false; - - // Or maybe it could be an optional "extend" operand. - if (!parseOptionalExtend(Operands)) - return false; - - // This was not a register so parse other operands that start with an - // identifier (like labels) as expressions and create them as immediates. - const MCExpr *IdVal; - S = getLoc(); - if (getParser().parseExpression(IdVal)) - return true; - - E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(IdVal, S, E, getContext())); - return false; - } - case AsmToken::Hash: { - // #42 -> immediate. - S = getLoc(); - Parser.Lex(); - - // The only Real that should come through here is a literal #0.0 for - // the fcmp[e] r, #0.0 instructions. They expect raw token operands, - // so convert the value. - const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Real)) { - APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); - uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - if (IntVal != 0 || (Mnemonic != "fcmp" && Mnemonic != "fcmpe")) - return TokError("unexpected floating point literal"); - Parser.Lex(); // Eat the token. - - Operands.push_back( - ARM64Operand::CreateToken("#0", false, S, getContext())); - Operands.push_back( - ARM64Operand::CreateToken(".0", false, S, getContext())); - return false; - } - - const MCExpr *ImmVal; - if (parseSymbolicImmVal(ImmVal)) - return true; - - E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(ImmVal, S, E, getContext())); - return false; - } - } -} - -/// ParseInstruction - Parse an ARM64 instruction mnemonic followed by its -/// operands. -bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, SMLoc NameLoc, - OperandVector &Operands) { - // Create the leading tokens for the mnemonic, split by '.' characters. - size_t Start = 0, Next = Name.find('.'); - StringRef Head = Name.slice(Start, Next); - - // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction. - if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi") - return parseSysAlias(Head, NameLoc, Operands); - - Operands.push_back( - ARM64Operand::CreateToken(Head, false, NameLoc, getContext())); - Mnemonic = Head; - - // Handle condition codes for a branch mnemonic - if (Head == "b" && Next != StringRef::npos) { - Start = Next; - Next = Name.find('.', Start + 1); - Head = Name.slice(Start + 1, Next); - - SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + - (Head.data() - Name.data())); - unsigned CC = parseCondCodeString(Head); - if (CC == ~0U) - return Error(SuffixLoc, "invalid condition code"); - const MCExpr *CCExpr = MCConstantExpr::Create(CC, getContext()); - Operands.push_back( - ARM64Operand::CreateImm(CCExpr, NameLoc, NameLoc, getContext())); - } - - // Add the remaining tokens in the mnemonic. - while (Next != StringRef::npos) { - Start = Next; - Next = Name.find('.', Start + 1); - Head = Name.slice(Start, Next); - SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + - (Head.data() - Name.data()) + 1); - Operands.push_back( - ARM64Operand::CreateToken(Head, true, SuffixLoc, getContext())); - } - - // Conditional compare instructions have a Condition Code operand, which needs - // to be parsed and an immediate operand created. - bool condCodeFourthOperand = - (Head == "ccmp" || Head == "ccmn" || Head == "fccmp" || - Head == "fccmpe" || Head == "fcsel" || Head == "csel" || - Head == "csinc" || Head == "csinv" || Head == "csneg"); - - // These instructions are aliases to some of the conditional select - // instructions. However, the condition code is inverted in the aliased - // instruction. - // - // FIXME: Is this the correct way to handle these? Or should the parser - // generate the aliased instructions directly? - bool condCodeSecondOperand = (Head == "cset" || Head == "csetm"); - bool condCodeThirdOperand = - (Head == "cinc" || Head == "cinv" || Head == "cneg"); - - // Read the remaining operands. - if (getLexer().isNot(AsmToken::EndOfStatement)) { - // Read the first operand. - if (parseOperand(Operands, false, false)) { - Parser.eatToEndOfStatement(); - return true; - } - - unsigned N = 2; - while (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - - // Parse and remember the operand. - if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) || - (N == 3 && condCodeThirdOperand) || - (N == 2 && condCodeSecondOperand), - condCodeSecondOperand || condCodeThirdOperand)) { - Parser.eatToEndOfStatement(); - return true; - } - - ++N; - } - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - SMLoc Loc = Parser.getTok().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, "unexpected token in argument list"); - } - - Parser.Lex(); // Consume the EndOfStatement - return false; -} - -/// isFPR32Register - Check if a register is in the FPR32 register class. -/// (The parser does not have the target register info to check the register -/// class directly.) -static bool isFPR32Register(unsigned Reg) { - using namespace ARM64; - switch (Reg) { - default: - break; - case S0: case S1: case S2: case S3: case S4: case S5: case S6: - case S7: case S8: case S9: case S10: case S11: case S12: case S13: - case S14: case S15: case S16: case S17: case S18: case S19: case S20: - case S21: case S22: case S23: case S24: case S25: case S26: case S27: - case S28: case S29: case S30: case S31: - return true; - } - return false; -} - -/// isGPR32Register - Check if a register is in the GPR32sp register class. -/// (The parser does not have the target register info to check the register -/// class directly.) -static bool isGPR32Register(unsigned Reg) { - using namespace ARM64; - switch (Reg) { - default: - break; - case W0: case W1: case W2: case W3: case W4: case W5: case W6: - case W7: case W8: case W9: case W10: case W11: case W12: case W13: - case W14: case W15: case W16: case W17: case W18: case W19: case W20: - case W21: case W22: case W23: case W24: case W25: case W26: case W27: - case W28: case W29: case W30: case WSP: - return true; - } - return false; -} - -static bool isGPR64Reg(unsigned Reg) { - using namespace ARM64; - switch (Reg) { - case X0: case X1: case X2: case X3: case X4: case X5: case X6: - case X7: case X8: case X9: case X10: case X11: case X12: case X13: - case X14: case X15: case X16: case X17: case X18: case X19: case X20: - case X21: case X22: case X23: case X24: case X25: case X26: case X27: - case X28: case FP: case LR: case SP: case XZR: - return true; - default: - return false; - } -} - - -// FIXME: This entire function is a giant hack to provide us with decent -// operand range validation/diagnostics until TableGen/MC can be extended -// to support autogeneration of this kind of validation. -bool ARM64AsmParser::validateInstruction(MCInst &Inst, - SmallVectorImpl<SMLoc> &Loc) { - const MCRegisterInfo *RI = getContext().getRegisterInfo(); - // Check for indexed addressing modes w/ the base register being the - // same as a destination/source register or pair load where - // the Rt == Rt2. All of those are undefined behaviour. - switch (Inst.getOpcode()) { - case ARM64::LDPSWpre: - case ARM64::LDPWpost: - case ARM64::LDPWpre: - case ARM64::LDPXpost: - case ARM64::LDPXpre: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rt2 = Inst.getOperand(1).getReg(); - unsigned Rn = Inst.getOperand(2).getReg(); - if (RI->isSubRegisterEq(Rn, Rt)) - return Error(Loc[0], "unpredictable LDP instruction, writeback base " - "is also a destination"); - if (RI->isSubRegisterEq(Rn, Rt2)) - return Error(Loc[1], "unpredictable LDP instruction, writeback base " - "is also a destination"); - // FALLTHROUGH - } - case ARM64::LDPDpost: - case ARM64::LDPDpre: - case ARM64::LDPQpost: - case ARM64::LDPQpre: - case ARM64::LDPSpost: - case ARM64::LDPSpre: - case ARM64::LDPSWpost: - case ARM64::LDPDi: - case ARM64::LDPQi: - case ARM64::LDPSi: - case ARM64::LDPSWi: - case ARM64::LDPWi: - case ARM64::LDPXi: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rt2 = Inst.getOperand(1).getReg(); - if (Rt == Rt2) - return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt"); - break; - } - case ARM64::STPDpost: - case ARM64::STPDpre: - case ARM64::STPQpost: - case ARM64::STPQpre: - case ARM64::STPSpost: - case ARM64::STPSpre: - case ARM64::STPWpost: - case ARM64::STPWpre: - case ARM64::STPXpost: - case ARM64::STPXpre: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rt2 = Inst.getOperand(1).getReg(); - unsigned Rn = Inst.getOperand(2).getReg(); - if (RI->isSubRegisterEq(Rn, Rt)) - return Error(Loc[0], "unpredictable STP instruction, writeback base " - "is also a source"); - if (RI->isSubRegisterEq(Rn, Rt2)) - return Error(Loc[1], "unpredictable STP instruction, writeback base " - "is also a source"); - break; - } - case ARM64::LDRBBpre: - case ARM64::LDRBpre: - case ARM64::LDRHHpre: - case ARM64::LDRHpre: - case ARM64::LDRSBWpre: - case ARM64::LDRSBXpre: - case ARM64::LDRSHWpre: - case ARM64::LDRSHXpre: - case ARM64::LDRSWpre: - case ARM64::LDRWpre: - case ARM64::LDRXpre: - case ARM64::LDRBBpost: - case ARM64::LDRBpost: - case ARM64::LDRHHpost: - case ARM64::LDRHpost: - case ARM64::LDRSBWpost: - case ARM64::LDRSBXpost: - case ARM64::LDRSHWpost: - case ARM64::LDRSHXpost: - case ARM64::LDRSWpost: - case ARM64::LDRWpost: - case ARM64::LDRXpost: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rn = Inst.getOperand(1).getReg(); - if (RI->isSubRegisterEq(Rn, Rt)) - return Error(Loc[0], "unpredictable LDR instruction, writeback base " - "is also a source"); - break; - } - case ARM64::STRBBpost: - case ARM64::STRBpost: - case ARM64::STRHHpost: - case ARM64::STRHpost: - case ARM64::STRWpost: - case ARM64::STRXpost: - case ARM64::STRBBpre: - case ARM64::STRBpre: - case ARM64::STRHHpre: - case ARM64::STRHpre: - case ARM64::STRWpre: - case ARM64::STRXpre: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rn = Inst.getOperand(1).getReg(); - if (RI->isSubRegisterEq(Rn, Rt)) - return Error(Loc[0], "unpredictable STR instruction, writeback base " - "is also a source"); - break; - } - } - - // Now check immediate ranges. Separate from the above as there is overlap - // in the instructions being checked and this keeps the nested conditionals - // to a minimum. - switch (Inst.getOpcode()) { - case ARM64::ANDWrs: - case ARM64::ANDSWrs: - case ARM64::EORWrs: - case ARM64::ORRWrs: { - if (!Inst.getOperand(3).isImm()) - return Error(Loc[3], "immediate value expected"); - int64_t shifter = Inst.getOperand(3).getImm(); - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(shifter); - if (ST == ARM64_AM::LSL && shifter > 31) - return Error(Loc[3], "shift value out of range"); - return false; - } - case ARM64::ADDSWri: - case ARM64::ADDSXri: - case ARM64::ADDWri: - case ARM64::ADDXri: - case ARM64::SUBSWri: - case ARM64::SUBSXri: - case ARM64::SUBWri: - case ARM64::SUBXri: { - if (!Inst.getOperand(3).isImm()) - return Error(Loc[3], "immediate value expected"); - int64_t shifter = Inst.getOperand(3).getImm(); - if (shifter != 0 && shifter != 12) - return Error(Loc[3], "shift value out of range"); - // The imm12 operand can be an expression. Validate that it's legit. - // FIXME: We really, really want to allow arbitrary expressions here - // and resolve the value and validate the result at fixup time, but - // that's hard as we have long since lost any source information we - // need to generate good diagnostics by that point. - if (Inst.getOpcode() == ARM64::ADDXri && Inst.getOperand(2).isExpr()) { - const MCExpr *Expr = Inst.getOperand(2).getExpr(); - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) { - return Error(Loc[2], "invalid immediate expression"); - } - - if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || - DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF || - ELFRefKind == ARM64MCExpr::VK_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) { - // Note that we don't range-check the addend. It's adjusted - // modulo page size when converted, so there is no "out of range" - // condition when using @pageoff. Any validity checking for the value - // was done in the is*() predicate function. - return false; - } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF) { - // @gotpageoff can only be used directly, not with an addend. - return Addend != 0; - } - - // Otherwise, we're not sure, so don't allow it for now. - return Error(Loc[2], "invalid immediate expression"); - } - - // If it's anything but an immediate, it's not legit. - if (!Inst.getOperand(2).isImm()) - return Error(Loc[2], "invalid immediate expression"); - int64_t imm = Inst.getOperand(2).getImm(); - if (imm > 4095 || imm < 0) - return Error(Loc[2], "immediate value out of range"); - return false; - } - case ARM64::LDRBpre: - case ARM64::LDRHpre: - case ARM64::LDRSBWpre: - case ARM64::LDRSBXpre: - case ARM64::LDRSHWpre: - case ARM64::LDRSHXpre: - case ARM64::LDRWpre: - case ARM64::LDRXpre: - case ARM64::LDRSpre: - case ARM64::LDRDpre: - case ARM64::LDRQpre: - case ARM64::STRBpre: - case ARM64::STRHpre: - case ARM64::STRWpre: - case ARM64::STRXpre: - case ARM64::STRSpre: - case ARM64::STRDpre: - case ARM64::STRQpre: - case ARM64::LDRBpost: - case ARM64::LDRHpost: - case ARM64::LDRSBWpost: - case ARM64::LDRSBXpost: - case ARM64::LDRSHWpost: - case ARM64::LDRSHXpost: - case ARM64::LDRWpost: - case ARM64::LDRXpost: - case ARM64::LDRSpost: - case ARM64::LDRDpost: - case ARM64::LDRQpost: - case ARM64::STRBpost: - case ARM64::STRHpost: - case ARM64::STRWpost: - case ARM64::STRXpost: - case ARM64::STRSpost: - case ARM64::STRDpost: - case ARM64::STRQpost: - case ARM64::LDTRXi: - case ARM64::LDTRWi: - case ARM64::LDTRHi: - case ARM64::LDTRBi: - case ARM64::LDTRSHWi: - case ARM64::LDTRSHXi: - case ARM64::LDTRSBWi: - case ARM64::LDTRSBXi: - case ARM64::LDTRSWi: - case ARM64::STTRWi: - case ARM64::STTRXi: - case ARM64::STTRHi: - case ARM64::STTRBi: - case ARM64::LDURWi: - case ARM64::LDURXi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURHi: - case ARM64::LDURBi: - case ARM64::LDURSHWi: - case ARM64::LDURSHXi: - case ARM64::LDURSBWi: - case ARM64::LDURSBXi: - case ARM64::LDURSWi: - case ARM64::PRFUMi: - case ARM64::STURWi: - case ARM64::STURXi: - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURHi: - case ARM64::STURBi: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(2).isImm()) - return Error(Loc[1], "immediate value expected"); - int64_t offset = Inst.getOperand(2).getImm(); - if (offset > 255 || offset < -256) - return Error(Loc[1], "offset value out of range"); - return false; - } - case ARM64::LDRSro: - case ARM64::LDRWro: - case ARM64::LDRSWro: - case ARM64::STRWro: - case ARM64::STRSro: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(3).isImm()) - return Error(Loc[1], "immediate value expected"); - int64_t shift = Inst.getOperand(3).getImm(); - ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift); - if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX && - type != ARM64_AM::SXTW && type != ARM64_AM::SXTX) - return Error(Loc[1], "shift type invalid"); - return false; - } - case ARM64::LDRDro: - case ARM64::LDRQro: - case ARM64::LDRXro: - case ARM64::PRFMro: - case ARM64::STRXro: - case ARM64::STRDro: - case ARM64::STRQro: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(3).isImm()) - return Error(Loc[1], "immediate value expected"); - int64_t shift = Inst.getOperand(3).getImm(); - ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift); - if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX && - type != ARM64_AM::SXTW && type != ARM64_AM::SXTX) - return Error(Loc[1], "shift type invalid"); - return false; - } - case ARM64::LDRHro: - case ARM64::LDRHHro: - case ARM64::LDRSHWro: - case ARM64::LDRSHXro: - case ARM64::STRHro: - case ARM64::STRHHro: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(3).isImm()) - return Error(Loc[1], "immediate value expected"); - int64_t shift = Inst.getOperand(3).getImm(); - ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift); - if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX && - type != ARM64_AM::SXTW && type != ARM64_AM::SXTX) - return Error(Loc[1], "shift type invalid"); - return false; - } - case ARM64::LDRBro: - case ARM64::LDRBBro: - case ARM64::LDRSBWro: - case ARM64::LDRSBXro: - case ARM64::STRBro: - case ARM64::STRBBro: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(3).isImm()) - return Error(Loc[1], "immediate value expected"); - int64_t shift = Inst.getOperand(3).getImm(); - ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift); - if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX && - type != ARM64_AM::SXTW && type != ARM64_AM::SXTX) - return Error(Loc[1], "shift type invalid"); - return false; - } - case ARM64::LDPWi: - case ARM64::LDPXi: - case ARM64::LDPSi: - case ARM64::LDPDi: - case ARM64::LDPQi: - case ARM64::LDPSWi: - case ARM64::STPWi: - case ARM64::STPXi: - case ARM64::STPSi: - case ARM64::STPDi: - case ARM64::STPQi: - case ARM64::LDPWpre: - case ARM64::LDPXpre: - case ARM64::LDPSpre: - case ARM64::LDPDpre: - case ARM64::LDPQpre: - case ARM64::LDPSWpre: - case ARM64::STPWpre: - case ARM64::STPXpre: - case ARM64::STPSpre: - case ARM64::STPDpre: - case ARM64::STPQpre: - case ARM64::LDPWpost: - case ARM64::LDPXpost: - case ARM64::LDPSpost: - case ARM64::LDPDpost: - case ARM64::LDPQpost: - case ARM64::LDPSWpost: - case ARM64::STPWpost: - case ARM64::STPXpost: - case ARM64::STPSpost: - case ARM64::STPDpost: - case ARM64::STPQpost: - case ARM64::LDNPWi: - case ARM64::LDNPXi: - case ARM64::LDNPSi: - case ARM64::LDNPDi: - case ARM64::LDNPQi: - case ARM64::STNPWi: - case ARM64::STNPXi: - case ARM64::STNPSi: - case ARM64::STNPDi: - case ARM64::STNPQi: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(3).isImm()) - return Error(Loc[2], "immediate value expected"); - int64_t offset = Inst.getOperand(3).getImm(); - if (offset > 63 || offset < -64) - return Error(Loc[2], "offset value out of range"); - return false; - } - default: - return false; - } -} - -static void rewriteMOV(ARM64AsmParser::OperandVector &Operands, - StringRef mnemonic, uint64_t imm, unsigned shift, - MCContext &Context) { - ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]); - ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]); - Operands[0] = - ARM64Operand::CreateToken(mnemonic, false, Op->getStartLoc(), Context); - - const MCExpr *NewImm = MCConstantExpr::Create(imm >> shift, Context); - Operands[2] = ARM64Operand::CreateImm(NewImm, Op2->getStartLoc(), - Op2->getEndLoc(), Context); - - Operands.push_back(ARM64Operand::CreateShifter( - ARM64_AM::LSL, shift, Op2->getStartLoc(), Op2->getEndLoc(), Context)); - delete Op2; - delete Op; -} - -bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { - switch (ErrCode) { - case Match_MissingFeature: - return Error(Loc, - "instruction requires a CPU feature not currently enabled"); - case Match_InvalidOperand: - return Error(Loc, "invalid operand for instruction"); - case Match_InvalidSuffix: - return Error(Loc, "invalid type suffix for instruction"); - case Match_InvalidMemoryIndexedSImm9: - return Error(Loc, "index must be an integer in range [-256,255]."); - case Match_InvalidMemoryIndexed32SImm7: - return Error(Loc, "index must be a multiple of 4 in range [-256,252]."); - case Match_InvalidMemoryIndexed64SImm7: - return Error(Loc, "index must be a multiple of 8 in range [-512,504]."); - case Match_InvalidMemoryIndexed128SImm7: - return Error(Loc, "index must be a multiple of 16 in range [-1024,1008]."); - case Match_InvalidMemoryIndexed8: - return Error(Loc, "index must be an integer in range [0,4095]."); - case Match_InvalidMemoryIndexed16: - return Error(Loc, "index must be a multiple of 2 in range [0,8190]."); - case Match_InvalidMemoryIndexed32: - return Error(Loc, "index must be a multiple of 4 in range [0,16380]."); - case Match_InvalidMemoryIndexed64: - return Error(Loc, "index must be a multiple of 8 in range [0,32760]."); - case Match_InvalidMemoryIndexed128: - return Error(Loc, "index must be a multiple of 16 in range [0,65520]."); - case Match_InvalidImm1_8: - return Error(Loc, "immediate must be an integer in range [1,8]."); - case Match_InvalidImm1_16: - return Error(Loc, "immediate must be an integer in range [1,16]."); - case Match_InvalidImm1_32: - return Error(Loc, "immediate must be an integer in range [1,32]."); - case Match_InvalidImm1_64: - return Error(Loc, "immediate must be an integer in range [1,64]."); - case Match_MnemonicFail: - return Error(Loc, "unrecognized instruction mnemonic"); - default: - assert(0 && "unexpected error code!"); - return Error(Loc, "invalid instruction format"); - } -} - -bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, - MCStreamer &Out, - unsigned &ErrorInfo, - bool MatchingInlineAsm) { - assert(!Operands.empty() && "Unexpect empty operand list!"); - ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]); - assert(Op->isToken() && "Leading operand should always be a mnemonic!"); - - StringRef Tok = Op->getToken(); - // Translate CMN/CMP pseudos to ADDS/SUBS with zero register destination. - // This needs to be done before the special handling of ADD/SUB immediates. - if (Tok == "cmp" || Tok == "cmn") { - // Replace the opcode with either ADDS or SUBS. - const char *Repl = StringSwitch<const char *>(Tok) - .Case("cmp", "subs") - .Case("cmn", "adds") - .Default(0); - assert(Repl && "Unknown compare instruction"); - delete Operands[0]; - Operands[0] = ARM64Operand::CreateToken(Repl, false, IDLoc, getContext()); - - // Insert WZR or XZR as destination operand. - ARM64Operand *RegOp = static_cast<ARM64Operand *>(Operands[1]); - unsigned ZeroReg; - if (RegOp->isReg() && - (isGPR32Register(RegOp->getReg()) || RegOp->getReg() == ARM64::WZR)) - ZeroReg = ARM64::WZR; - else - ZeroReg = ARM64::XZR; - Operands.insert( - Operands.begin() + 1, - ARM64Operand::CreateReg(ZeroReg, false, IDLoc, IDLoc, getContext())); - // Update since we modified it above. - ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]); - Tok = Op->getToken(); - } - - unsigned NumOperands = Operands.size(); - - if (Tok == "mov" && NumOperands == 3) { - // The MOV mnemomic is aliased to movn/movz, depending on the value of - // the immediate being instantiated. - // FIXME: Catching this here is a total hack, and we should use tblgen - // support to implement this instead as soon as it is available. - - ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]); - if (Op2->isImm()) { - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op2->getImm())) { - uint64_t Val = CE->getValue(); - uint64_t NVal = ~Val; - - // If this is a 32-bit register and the value has none of the upper - // set, clear the complemented upper 32-bits so the logic below works - // for 32-bit registers too. - ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]); - if (Op1->isReg() && isGPR32Register(Op1->getReg()) && - (Val & 0xFFFFFFFFULL) == Val) - NVal &= 0x00000000FFFFFFFFULL; - - // MOVK Rd, imm << 0 - if ((Val & 0xFFFF) == Val) - rewriteMOV(Operands, "movz", Val, 0, getContext()); - - // MOVK Rd, imm << 16 - else if ((Val & 0xFFFF0000ULL) == Val) - rewriteMOV(Operands, "movz", Val, 16, getContext()); - - // MOVK Rd, imm << 32 - else if ((Val & 0xFFFF00000000ULL) == Val) - rewriteMOV(Operands, "movz", Val, 32, getContext()); - - // MOVK Rd, imm << 48 - else if ((Val & 0xFFFF000000000000ULL) == Val) - rewriteMOV(Operands, "movz", Val, 48, getContext()); - - // MOVN Rd, (~imm << 0) - else if ((NVal & 0xFFFFULL) == NVal) - rewriteMOV(Operands, "movn", NVal, 0, getContext()); - - // MOVN Rd, ~(imm << 16) - else if ((NVal & 0xFFFF0000ULL) == NVal) - rewriteMOV(Operands, "movn", NVal, 16, getContext()); - - // MOVN Rd, ~(imm << 32) - else if ((NVal & 0xFFFF00000000ULL) == NVal) - rewriteMOV(Operands, "movn", NVal, 32, getContext()); - - // MOVN Rd, ~(imm << 48) - else if ((NVal & 0xFFFF000000000000ULL) == NVal) - rewriteMOV(Operands, "movn", NVal, 48, getContext()); - } - } - } else if (NumOperands == 4) { - if (Tok == "add" || Tok == "adds" || Tok == "sub" || Tok == "subs") { - // Handle the uimm24 immediate form, where the shift is not specified. - ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]); - if (Op3->isImm()) { - if (const MCConstantExpr *CE = - dyn_cast<MCConstantExpr>(Op3->getImm())) { - uint64_t Val = CE->getValue(); - if (Val >= (1 << 24)) { - Error(IDLoc, "immediate value is too large"); - return true; - } - if (Val < (1 << 12)) { - Operands.push_back(ARM64Operand::CreateShifter( - ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext())); - } else if ((Val & 0xfff) == 0) { - delete Operands[3]; - CE = MCConstantExpr::Create(Val >> 12, getContext()); - Operands[3] = - ARM64Operand::CreateImm(CE, IDLoc, IDLoc, getContext()); - Operands.push_back(ARM64Operand::CreateShifter( - ARM64_AM::LSL, 12, IDLoc, IDLoc, getContext())); - } else { - Error(IDLoc, "immediate value is too large"); - return true; - } - } else { - Operands.push_back(ARM64Operand::CreateShifter( - ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext())); - } - } - - // FIXME: Horible hack to handle the LSL -> UBFM alias. - } else if (NumOperands == 4 && Tok == "lsl") { - ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]); - ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]); - if (Op2->isReg() && Op3->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm()); - if (Op3CE) { - uint64_t Op3Val = Op3CE->getValue(); - uint64_t NewOp3Val = 0; - uint64_t NewOp4Val = 0; - if (isGPR32Register(Op2->getReg()) || Op2->getReg() == ARM64::WZR) { - NewOp3Val = (32 - Op3Val) & 0x1f; - NewOp4Val = 31 - Op3Val; - } else { - NewOp3Val = (64 - Op3Val) & 0x3f; - NewOp4Val = 63 - Op3Val; - } - - const MCExpr *NewOp3 = - MCConstantExpr::Create(NewOp3Val, getContext()); - const MCExpr *NewOp4 = - MCConstantExpr::Create(NewOp4Val, getContext()); - - Operands[0] = ARM64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); - Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(), - Op3->getEndLoc(), getContext()); - Operands.push_back(ARM64Operand::CreateImm( - NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext())); - delete Op3; - delete Op; - } - } - - // FIXME: Horrible hack to handle the optional LSL shift for vector - // instructions. - } else if (NumOperands == 4 && (Tok == "bic" || Tok == "orr")) { - ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]); - ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]); - ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]); - if ((Op1->isToken() && Op2->isVectorReg() && Op3->isImm()) || - (Op1->isVectorReg() && Op2->isToken() && Op3->isImm())) - Operands.push_back(ARM64Operand::CreateShifter(ARM64_AM::LSL, 0, IDLoc, - IDLoc, getContext())); - } else if (NumOperands == 4 && (Tok == "movi" || Tok == "mvni")) { - ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]); - ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]); - ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]); - if ((Op1->isToken() && Op2->isVectorReg() && Op3->isImm()) || - (Op1->isVectorReg() && Op2->isToken() && Op3->isImm())) { - StringRef Suffix = Op1->isToken() ? Op1->getToken() : Op2->getToken(); - // Canonicalize on lower-case for ease of comparison. - std::string CanonicalSuffix = Suffix.lower(); - if (Tok != "movi" || - (CanonicalSuffix != ".1d" && CanonicalSuffix != ".2d" && - CanonicalSuffix != ".8b" && CanonicalSuffix != ".16b")) - Operands.push_back(ARM64Operand::CreateShifter( - ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext())); - } - } - } else if (NumOperands == 5) { - // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and - // UBFIZ -> UBFM aliases. - if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") { - ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]); - ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]); - ARM64Operand *Op4 = static_cast<ARM64Operand *>(Operands[4]); - - if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm()); - const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm()); - - if (Op3CE && Op4CE) { - uint64_t Op3Val = Op3CE->getValue(); - uint64_t Op4Val = Op4CE->getValue(); - - uint64_t NewOp3Val = 0; - if (isGPR32Register(Op1->getReg())) - NewOp3Val = (32 - Op3Val) & 0x1f; - else - NewOp3Val = (64 - Op3Val) & 0x3f; - - uint64_t NewOp4Val = Op4Val - 1; - - const MCExpr *NewOp3 = - MCConstantExpr::Create(NewOp3Val, getContext()); - const MCExpr *NewOp4 = - MCConstantExpr::Create(NewOp4Val, getContext()); - Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(), - Op3->getEndLoc(), getContext()); - Operands[4] = ARM64Operand::CreateImm(NewOp4, Op4->getStartLoc(), - Op4->getEndLoc(), getContext()); - if (Tok == "bfi") - Operands[0] = ARM64Operand::CreateToken( - "bfm", false, Op->getStartLoc(), getContext()); - else if (Tok == "sbfiz") - Operands[0] = ARM64Operand::CreateToken( - "sbfm", false, Op->getStartLoc(), getContext()); - else if (Tok == "ubfiz") - Operands[0] = ARM64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); - else - llvm_unreachable("No valid mnemonic for alias?"); - - delete Op; - delete Op3; - delete Op4; - } - } - - // FIXME: Horrible hack to handle the BFXIL->BFM, SBFX->SBFM, and - // UBFX -> UBFM aliases. - } else if (NumOperands == 5 && - (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) { - ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]); - ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]); - ARM64Operand *Op4 = static_cast<ARM64Operand *>(Operands[4]); - - if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm()); - const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm()); - - if (Op3CE && Op4CE) { - uint64_t Op3Val = Op3CE->getValue(); - uint64_t Op4Val = Op4CE->getValue(); - uint64_t NewOp4Val = Op3Val + Op4Val - 1; - - if (NewOp4Val >= Op3Val) { - const MCExpr *NewOp4 = - MCConstantExpr::Create(NewOp4Val, getContext()); - Operands[4] = ARM64Operand::CreateImm( - NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext()); - if (Tok == "bfxil") - Operands[0] = ARM64Operand::CreateToken( - "bfm", false, Op->getStartLoc(), getContext()); - else if (Tok == "sbfx") - Operands[0] = ARM64Operand::CreateToken( - "sbfm", false, Op->getStartLoc(), getContext()); - else if (Tok == "ubfx") - Operands[0] = ARM64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); - else - llvm_unreachable("No valid mnemonic for alias?"); - - delete Op; - delete Op4; - } - } - } - } - } - // FIXME: Horrible hack for tbz and tbnz with Wn register operand. - // InstAlias can't quite handle this since the reg classes aren't - // subclasses. - if (NumOperands == 4 && (Tok == "tbz" || Tok == "tbnz")) { - ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]); - if (Op->isImm()) { - if (const MCConstantExpr *OpCE = dyn_cast<MCConstantExpr>(Op->getImm())) { - if (OpCE->getValue() < 32) { - // The source register can be Wn here, but the matcher expects a - // GPR64. Twiddle it here if necessary. - ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]); - if (Op->isReg()) { - unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[1] = ARM64Operand::CreateReg( - Reg, false, Op->getStartLoc(), Op->getEndLoc(), getContext()); - delete Op; - } - } - } - } - } - // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands. - // InstAlias can't quite handle this since the reg classes aren't - // subclasses. - if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) { - // The source register can be Wn here, but the matcher expects a - // GPR64. Twiddle it here if necessary. - ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]); - if (Op->isReg()) { - unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete Op; - } - } - // FIXME: Likewise for [su]xt[bh] with a Xd dst operand - else if (NumOperands == 3 && - (Tok == "sxtb" || Tok == "uxtb" || Tok == "sxth" || Tok == "uxth")) { - ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]); - if (Op->isReg() && isGPR64Reg(Op->getReg())) { - // The source register can be Wn here, but the matcher expects a - // GPR64. Twiddle it here if necessary. - ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]); - if (Op->isReg()) { - unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete Op; - } - } - } - - // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR. - if (NumOperands == 3 && Tok == "fmov") { - ARM64Operand *RegOp = static_cast<ARM64Operand *>(Operands[1]); - ARM64Operand *ImmOp = static_cast<ARM64Operand *>(Operands[2]); - if (RegOp->isReg() && ImmOp->isFPImm() && - ImmOp->getFPImm() == (unsigned)-1) { - unsigned zreg = - isFPR32Register(RegOp->getReg()) ? ARM64::WZR : ARM64::XZR; - Operands[2] = ARM64Operand::CreateReg(zreg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete ImmOp; - } - } - - // FIXME: Horrible hack to handle the literal .d[1] vector index on - // FMOV instructions. The index isn't an actual instruction operand - // but rather syntactic sugar. It really should be part of the mnemonic, - // not the operand, but whatever. - if ((NumOperands == 5) && Tok == "fmov") { - // If the last operand is a vectorindex of '1', then replace it with - // a '[' '1' ']' token sequence, which is what the matcher - // (annoyingly) expects for a literal vector index operand. - ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[NumOperands - 1]); - if (Op->isVectorIndexD() && Op->getVectorIndex() == 1) { - SMLoc Loc = Op->getStartLoc(); - Operands.pop_back(); - Operands.push_back( - ARM64Operand::CreateToken("[", false, Loc, getContext())); - Operands.push_back( - ARM64Operand::CreateToken("1", false, Loc, getContext())); - Operands.push_back( - ARM64Operand::CreateToken("]", false, Loc, getContext())); - } else if (Op->isReg()) { - // Similarly, check the destination operand for the GPR->High-lane - // variant. - unsigned OpNo = NumOperands - 2; - ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[OpNo]); - if (Op->isVectorIndexD() && Op->getVectorIndex() == 1) { - SMLoc Loc = Op->getStartLoc(); - Operands[OpNo] = - ARM64Operand::CreateToken("[", false, Loc, getContext()); - Operands.insert( - Operands.begin() + OpNo + 1, - ARM64Operand::CreateToken("1", false, Loc, getContext())); - Operands.insert( - Operands.begin() + OpNo + 2, - ARM64Operand::CreateToken("]", false, Loc, getContext())); - } - } - } - - MCInst Inst; - // First try to match against the secondary set of tables containing the - // short-form NEON instructions (e.g. "fadd.2s v0, v1, v2"). - unsigned MatchResult = - MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 1); - - // If that fails, try against the alternate table containing long-form NEON: - // "fadd v0.2s, v1.2s, v2.2s" - if (MatchResult != Match_Success) - MatchResult = - MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0); - - switch (MatchResult) { - case Match_Success: { - // Perform range checking and other semantic validations - SmallVector<SMLoc, 8> OperandLocs; - NumOperands = Operands.size(); - for (unsigned i = 1; i < NumOperands; ++i) - OperandLocs.push_back(Operands[i]->getStartLoc()); - if (validateInstruction(Inst, OperandLocs)) - return true; - - Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, STI); - return false; - } - case Match_MissingFeature: - case Match_MnemonicFail: - return showMatchError(IDLoc, MatchResult); - case Match_InvalidOperand: { - SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0U) { - if (ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); - - ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc(); - if (ErrorLoc == SMLoc()) - ErrorLoc = IDLoc; - } - // If the match failed on a suffix token operand, tweak the diagnostic - // accordingly. - if (((ARM64Operand *)Operands[ErrorInfo])->isToken() && - ((ARM64Operand *)Operands[ErrorInfo])->isTokenSuffix()) - MatchResult = Match_InvalidSuffix; - - return showMatchError(ErrorLoc, MatchResult); - } - case Match_InvalidMemoryIndexedSImm9: { - // If there is not a '!' after the memory operand that failed, we really - // want the diagnostic for the non-pre-indexed instruction variant instead. - // Be careful to check for the post-indexed variant as well, which also - // uses this match diagnostic. Also exclude the explicitly unscaled - // mnemonics, as they want the unscaled diagnostic as well. - if (Operands.size() == ErrorInfo + 1 && - !((ARM64Operand *)Operands[ErrorInfo])->isImm() && - !Tok.startswith("stur") && !Tok.startswith("ldur")) { - // whether we want an Indexed64 or Indexed32 diagnostic depends on - // the register class of the previous operand. Default to 64 in case - // we see something unexpected. - MatchResult = Match_InvalidMemoryIndexed64; - if (ErrorInfo) { - ARM64Operand *PrevOp = (ARM64Operand *)Operands[ErrorInfo - 1]; - if (PrevOp->isReg() && ARM64MCRegisterClasses[ARM64::GPR32RegClassID] - .contains(PrevOp->getReg())) - MatchResult = Match_InvalidMemoryIndexed32; - } - } - SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc(); - if (ErrorLoc == SMLoc()) - ErrorLoc = IDLoc; - return showMatchError(ErrorLoc, MatchResult); - } - case Match_InvalidMemoryIndexed32: - case Match_InvalidMemoryIndexed64: - case Match_InvalidMemoryIndexed128: - // If there is a '!' after the memory operand that failed, we really - // want the diagnostic for the pre-indexed instruction variant instead. - if (Operands.size() > ErrorInfo + 1 && - ((ARM64Operand *)Operands[ErrorInfo + 1])->isTokenEqual("!")) - MatchResult = Match_InvalidMemoryIndexedSImm9; - // FALL THROUGH - case Match_InvalidMemoryIndexed8: - case Match_InvalidMemoryIndexed16: - case Match_InvalidMemoryIndexed32SImm7: - case Match_InvalidMemoryIndexed64SImm7: - case Match_InvalidMemoryIndexed128SImm7: - case Match_InvalidImm1_8: - case Match_InvalidImm1_16: - case Match_InvalidImm1_32: - case Match_InvalidImm1_64: { - // Any time we get here, there's nothing fancy to do. Just get the - // operand SMLoc and display the diagnostic. - SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc(); - // If it's a memory operand, the error is with the offset immediate, - // so get that location instead. - if (((ARM64Operand *)Operands[ErrorInfo])->isMem()) - ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getOffsetLoc(); - if (ErrorLoc == SMLoc()) - ErrorLoc = IDLoc; - return showMatchError(ErrorLoc, MatchResult); - } - } - - llvm_unreachable("Implement any new match types added!"); - return true; -} - -/// ParseDirective parses the arm specific directives -bool ARM64AsmParser::ParseDirective(AsmToken DirectiveID) { - StringRef IDVal = DirectiveID.getIdentifier(); - SMLoc Loc = DirectiveID.getLoc(); - if (IDVal == ".hword") - return parseDirectiveWord(2, Loc); - if (IDVal == ".word") - return parseDirectiveWord(4, Loc); - if (IDVal == ".xword") - return parseDirectiveWord(8, Loc); - if (IDVal == ".tlsdesccall") - return parseDirectiveTLSDescCall(Loc); - - return parseDirectiveLOH(IDVal, Loc); -} - -/// parseDirectiveWord -/// ::= .word [ expression (, expression)* ] -bool ARM64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { - if (getLexer().isNot(AsmToken::EndOfStatement)) { - for (;;) { - const MCExpr *Value; - if (getParser().parseExpression(Value)) - return true; - - getParser().getStreamer().EmitValue(Value, Size); - - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) - return Error(L, "unexpected token in directive"); - Parser.Lex(); - } - } - - Parser.Lex(); - return false; -} - -// parseDirectiveTLSDescCall: -// ::= .tlsdesccall symbol -bool ARM64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { - StringRef Name; - if (getParser().parseIdentifier(Name)) - return Error(L, "expected symbol after directive"); - - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); - Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_TLSDESC, getContext()); - - MCInst Inst; - Inst.setOpcode(ARM64::TLSDESCCALL); - Inst.addOperand(MCOperand::CreateExpr(Expr)); - - getParser().getStreamer().EmitInstruction(Inst, STI); - return false; -} - -/// ::= .loh <lohName | lohId> label1, ..., labelN -/// The number of arguments depends on the loh identifier. -bool ARM64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { - if (IDVal != MCLOHDirectiveName()) - return true; - MCLOHType Kind; - if (getParser().getTok().isNot(AsmToken::Identifier)) { - if (getParser().getTok().isNot(AsmToken::Integer)) - return TokError("expected an identifier or a number in directive"); - // We successfully get a numeric value for the identifier. - // Check if it is valid. - int64_t Id = getParser().getTok().getIntVal(); - Kind = (MCLOHType)Id; - // Check that Id does not overflow MCLOHType. - if (!isValidMCLOHType(Kind) || Id != Kind) - return TokError("invalid numeric identifier in directive"); - } else { - StringRef Name = getTok().getIdentifier(); - // We successfully parse an identifier. - // Check if it is a recognized one. - int Id = MCLOHNameToId(Name); - - if (Id == -1) - return TokError("invalid identifier in directive"); - Kind = (MCLOHType)Id; - } - // Consume the identifier. - Lex(); - // Get the number of arguments of this LOH. - int NbArgs = MCLOHIdToNbArgs(Kind); - - assert(NbArgs != -1 && "Invalid number of arguments"); - - SmallVector<MCSymbol *, 3> Args; - for (int Idx = 0; Idx < NbArgs; ++Idx) { - StringRef Name; - if (getParser().parseIdentifier(Name)) - return TokError("expected identifier in directive"); - Args.push_back(getContext().GetOrCreateSymbol(Name)); - - if (Idx + 1 == NbArgs) - break; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in '" + Twine(IDVal) + "' directive"); - Lex(); - } - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '" + Twine(IDVal) + "' directive"); - - getStreamer().EmitLOHDirective((MCLOHType)Kind, Args); - return false; -} - -bool -ARM64AsmParser::classifySymbolRef(const MCExpr *Expr, - ARM64MCExpr::VariantKind &ELFRefKind, - MCSymbolRefExpr::VariantKind &DarwinRefKind, - const MCConstantExpr *&Addend) { - ELFRefKind = ARM64MCExpr::VK_INVALID; - DarwinRefKind = MCSymbolRefExpr::VK_None; - - if (const ARM64MCExpr *AE = dyn_cast<ARM64MCExpr>(Expr)) { - ELFRefKind = AE->getKind(); - Expr = AE->getSubExpr(); - } - - const MCSymbolRefExpr *SE = dyn_cast<MCSymbolRefExpr>(Expr); - if (SE) { - // It's a simple symbol reference with no addend. - DarwinRefKind = SE->getKind(); - Addend = 0; - return true; - } - - const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr); - if (!BE) - return false; - - SE = dyn_cast<MCSymbolRefExpr>(BE->getLHS()); - if (!SE) - return false; - DarwinRefKind = SE->getKind(); - - if (BE->getOpcode() != MCBinaryExpr::Add) - return false; - - // See if the addend is is a constant, otherwise there's more going - // on here than we can deal with. - Addend = dyn_cast<MCConstantExpr>(BE->getRHS()); - if (!Addend) - return false; - - // It's some symbol reference + a constant addend, but really - // shouldn't use both Darwin and ELF syntax. - return ELFRefKind == ARM64MCExpr::VK_INVALID || - DarwinRefKind == MCSymbolRefExpr::VK_None; -} - -/// Force static initialization. -extern "C" void LLVMInitializeARM64AsmParser() { - RegisterMCAsmParser<ARM64AsmParser> X(TheARM64Target); -} - -#define GET_REGISTER_MATCHER -#define GET_MATCHER_IMPLEMENTATION -#include "ARM64GenAsmMatcher.inc" - -// Define this matcher function after the auto-generated include so we -// have the match class enum definitions. -unsigned ARM64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, - unsigned Kind) { - ARM64Operand *Op = static_cast<ARM64Operand *>(AsmOp); - // If the kind is a token for a literal immediate, check if our asm - // operand matches. This is for InstAliases which have a fixed-value - // immediate in the syntax. - int64_t ExpectedVal; - switch (Kind) { - default: - return Match_InvalidOperand; - case MCK__35_0: - ExpectedVal = 0; - break; - case MCK__35_1: - ExpectedVal = 1; - break; - case MCK__35_12: - ExpectedVal = 12; - break; - case MCK__35_16: - ExpectedVal = 16; - break; - case MCK__35_2: - ExpectedVal = 2; - break; - case MCK__35_24: - ExpectedVal = 24; - break; - case MCK__35_3: - ExpectedVal = 3; - break; - case MCK__35_32: - ExpectedVal = 32; - break; - case MCK__35_4: - ExpectedVal = 4; - break; - case MCK__35_48: - ExpectedVal = 48; - break; - case MCK__35_6: - ExpectedVal = 6; - break; - case MCK__35_64: - ExpectedVal = 64; - break; - case MCK__35_8: - ExpectedVal = 8; - break; - } - if (!Op->isImm()) - return Match_InvalidOperand; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (!CE) - return Match_InvalidOperand; - if (CE->getValue() == ExpectedVal) - return Match_Success; - return Match_InvalidOperand; -} diff --git a/lib/Target/ARM64/AsmParser/CMakeLists.txt b/lib/Target/ARM64/AsmParser/CMakeLists.txt deleted file mode 100644 index 826158b..0000000 --- a/lib/Target/ARM64/AsmParser/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARM64AsmParser - ARM64AsmParser.cpp - ) - diff --git a/lib/Target/ARM64/AsmParser/LLVMBuild.txt b/lib/Target/ARM64/AsmParser/LLVMBuild.txt deleted file mode 100644 index 2c8fafe..0000000 --- a/lib/Target/ARM64/AsmParser/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/ARM64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ARM64AsmParser -parent = ARM64 -required_libraries = ARM64Desc ARM64Info MC MCParser Support -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/AsmParser/Makefile b/lib/Target/ARM64/AsmParser/Makefile deleted file mode 100644 index d25c47f..0000000 --- a/lib/Target/ARM64/AsmParser/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64AsmParser - -# Hack: we need to include 'main' ARM target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/CMakeLists.txt b/lib/Target/ARM64/CMakeLists.txt deleted file mode 100644 index 6de861c..0000000 --- a/lib/Target/ARM64/CMakeLists.txt +++ /dev/null @@ -1,50 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS ARM64.td) - -tablegen(LLVM ARM64GenRegisterInfo.inc -gen-register-info) -tablegen(LLVM ARM64GenInstrInfo.inc -gen-instr-info) -tablegen(LLVM ARM64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) -tablegen(LLVM ARM64GenMCPseudoLowering.inc -gen-pseudo-lowering) -tablegen(LLVM ARM64GenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM ARM64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) -tablegen(LLVM ARM64GenAsmMatcher.inc -gen-asm-matcher) -tablegen(LLVM ARM64GenDAGISel.inc -gen-dag-isel) -tablegen(LLVM ARM64GenFastISel.inc -gen-fast-isel) -tablegen(LLVM ARM64GenCallingConv.inc -gen-callingconv) -tablegen(LLVM ARM64GenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM ARM64GenDisassemblerTables.inc -gen-disassembler) -add_public_tablegen_target(ARM64CommonTableGen) - -add_llvm_target(ARM64CodeGen - ARM64AddressTypePromotion.cpp - ARM64AdvSIMDScalarPass.cpp - ARM64AsmPrinter.cpp - ARM64BranchRelaxation.cpp - ARM64CleanupLocalDynamicTLSPass.cpp - ARM64CollectLOH.cpp - ARM64ConditionalCompares.cpp - ARM64DeadRegisterDefinitionsPass.cpp - ARM64ExpandPseudoInsts.cpp - ARM64FastISel.cpp - ARM64FrameLowering.cpp - ARM64ISelDAGToDAG.cpp - ARM64ISelLowering.cpp - ARM64InstrInfo.cpp - ARM64LoadStoreOptimizer.cpp - ARM64MCInstLower.cpp - ARM64PromoteConstant.cpp - ARM64RegisterInfo.cpp - ARM64SelectionDAGInfo.cpp - ARM64StorePairSuppress.cpp - ARM64Subtarget.cpp - ARM64TargetMachine.cpp - ARM64TargetObjectFile.cpp - ARM64TargetTransformInfo.cpp -) - -add_dependencies(LLVMARM64CodeGen intrinsics_gen) - -add_subdirectory(TargetInfo) -add_subdirectory(AsmParser) -add_subdirectory(Disassembler) -add_subdirectory(InstPrinter) -add_subdirectory(MCTargetDesc) diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp deleted file mode 100644 index 44c501f..0000000 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp +++ /dev/null @@ -1,2142 +0,0 @@ -//===- ARM64Disassembler.cpp - Disassembler for ARM64 -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-disassembler" - -#include "ARM64Disassembler.h" -#include "ARM64Subtarget.h" -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCFixedLenDisassembler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MemoryObject.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" - -// Pull DecodeStatus and its enum values into the global namespace. -typedef llvm::MCDisassembler::DecodeStatus DecodeStatus; - -// Forward declare these because the autogenerated code will reference them. -// Definitions are further down. -static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR128_loRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR64spRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR32spRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeFixedPointScaleImm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeSystemRegister(llvm::MCInst &Inst, unsigned Imm, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, - uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeSystemCPSRInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSIMDLdStPost(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeSIMDLdStSingle(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeSIMDLdStSingleTied(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder); - -static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder); -static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder); -static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder); -static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); - -#include "ARM64GenDisassemblerTables.inc" -#include "ARM64GenInstrInfo.inc" - -using namespace llvm; - -#define Success llvm::MCDisassembler::Success -#define Fail llvm::MCDisassembler::Fail - -static MCDisassembler *createARM64Disassembler(const Target &T, - const MCSubtargetInfo &STI) { - return new ARM64Disassembler(STI); -} - -DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os, - raw_ostream &cs) const { - CommentStream = &cs; - - uint8_t bytes[4]; - - Size = 0; - // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, (uint8_t *)bytes) == -1) - return Fail; - Size = 4; - - // Encoded as a small-endian 32-bit word in the stream. - uint32_t insn = - (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | (bytes[0] << 0); - - // Calling the auto-generated decoder function. - DecodeStatus result = - decodeInstruction(DecoderTable32, MI, insn, Address, this, STI); - if (!result) - return Fail; - - return Success; -} - -static MCSymbolRefExpr::VariantKind -getVariant(uint64_t LLVMDisassembler_VariantKind) { - switch (LLVMDisassembler_VariantKind) { - case LLVMDisassembler_VariantKind_None: - return MCSymbolRefExpr::VK_None; - case LLVMDisassembler_VariantKind_ARM64_PAGE: - return MCSymbolRefExpr::VK_PAGE; - case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: - return MCSymbolRefExpr::VK_PAGEOFF; - case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: - return MCSymbolRefExpr::VK_GOTPAGE; - case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: - return MCSymbolRefExpr::VK_GOTPAGEOFF; - case LLVMDisassembler_VariantKind_ARM64_TLVP: - case LLVMDisassembler_VariantKind_ARM64_TLVOFF: - default: - assert(0 && "bad LLVMDisassembler_VariantKind"); - return MCSymbolRefExpr::VK_None; - } -} - -/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic -/// operand in place of the immediate Value in the MCInst. The immediate -/// Value has not had any PC adjustment made by the caller. If the instruction -/// is a branch that adds the PC to the immediate Value then isBranch is -/// Success, else Fail. If the getOpInfo() function was set as part of the -/// setupForSymbolicDisassembly() call then that function is called to get any -/// symbolic information at the Address for this instrution. If that returns -/// non-zero then the symbolic information it returns is used to create an -/// MCExpr and that is added as an operand to the MCInst. If getOpInfo() -/// returns zero and isBranch is Success then a symbol look up for -/// Address + Value is done and if a symbol is found an MCExpr is created with -/// that, else an MCExpr with Address + Value is created. If getOpInfo() -/// returns zero and isBranch is Fail then the the Opcode of the MCInst is -/// tested and for ADRP an other instructions that help to load of pointers -/// a symbol look up is done to see it is returns a specific reference type -/// to add to the comment stream. This function returns Success if it adds -/// an operand to the MCInst and Fail otherwise. -bool ARM64Disassembler::tryAddingSymbolicOperand(uint64_t Address, int Value, - bool isBranch, - uint64_t InstSize, MCInst &MI, - uint32_t insn) const { - LLVMOpInfoCallback getOpInfo = getLLVMOpInfoCallback(); - - struct LLVMOpInfo1 SymbolicOp; - memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); - SymbolicOp.Value = Value; - void *DisInfo = getDisInfoBlock(); - uint64_t ReferenceType; - const char *ReferenceName; - const char *Name; - LLVMSymbolLookupCallback SymbolLookUp = getLLVMSymbolLookupCallback(); - if (!getOpInfo || - !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { - if (isBranch) { - if (SymbolLookUp) { - ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - if (Name) { - SymbolicOp.AddSymbol.Name = Name; - SymbolicOp.AddSymbol.Present = Success; - SymbolicOp.Value = 0; - } else { - SymbolicOp.Value = Address + Value; - } - if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) - (*CommentStream) << "symbol stub for: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message) - (*CommentStream) << "Objc message: " << ReferenceName; - } else { - return false; - } - } else if (MI.getOpcode() == ARM64::ADRP) { - if (SymbolLookUp) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; - Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address, - &ReferenceName); - (*CommentStream) << format("0x%llx", - 0xfffffffffffff000LL & (Address + Value)); - } else { - return false; - } - } else if (MI.getOpcode() == ARM64::ADDXri || - MI.getOpcode() == ARM64::LDRXui || - MI.getOpcode() == ARM64::LDRXl || MI.getOpcode() == ARM64::ADR) { - if (SymbolLookUp) { - if (MI.getOpcode() == ARM64::ADDXri) - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; - else if (MI.getOpcode() == ARM64::LDRXui) - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; - if (MI.getOpcode() == ARM64::LDRXl) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - } else if (MI.getOpcode() == ARM64::ADR) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - } else { - Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address, - &ReferenceName); - } - if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) - (*CommentStream) << "literal pool symbol address: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) - (*CommentStream) << "literal pool for: \"" << ReferenceName << "\""; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) - (*CommentStream) << "Objc cfstring ref: @\"" << ReferenceName << "\""; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message) - (*CommentStream) << "Objc message: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) - (*CommentStream) << "Objc message ref: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) - (*CommentStream) << "Objc selector ref: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) - (*CommentStream) << "Objc class ref: " << ReferenceName; - // For these instructions, the SymbolLookUp() above is just to get the - // ReferenceType and ReferenceName. We want to make sure not to - // fall through so we don't build an MCExpr to leave the disassembly - // of the immediate values of these instructions to the InstPrinter. - return false; - } else { - return false; - } - } else { - return false; - } - } - - MCContext *Ctx = getMCContext(); - const MCExpr *Add = NULL; - if (SymbolicOp.AddSymbol.Present) { - if (SymbolicOp.AddSymbol.Name) { - StringRef Name(SymbolicOp.AddSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); - if (Variant != MCSymbolRefExpr::VK_None) - Add = MCSymbolRefExpr::Create(Sym, Variant, *Ctx); - else - Add = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); - } - } - - const MCExpr *Sub = NULL; - if (SymbolicOp.SubtractSymbol.Present) { - if (SymbolicOp.SubtractSymbol.Name) { - StringRef Name(SymbolicOp.SubtractSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); - } - } - - const MCExpr *Off = NULL; - if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); - - const MCExpr *Expr; - if (Sub) { - const MCExpr *LHS; - if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); - else - LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); - else - Expr = LHS; - } else if (Add) { - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); - else - Expr = Add; - } else { - if (Off != 0) - Expr = Off; - else - Expr = MCConstantExpr::Create(0, *Ctx); - } - - MI.addOperand(MCOperand::CreateExpr(Expr)); - - return true; -} - -extern "C" void LLVMInitializeARM64Disassembler() { - TargetRegistry::RegisterMCDisassembler(TheARM64Target, - createARM64Disassembler); -} - -static const unsigned FPR128DecoderTable[] = { - ARM64::Q0, ARM64::Q1, ARM64::Q2, ARM64::Q3, ARM64::Q4, ARM64::Q5, - ARM64::Q6, ARM64::Q7, ARM64::Q8, ARM64::Q9, ARM64::Q10, ARM64::Q11, - ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17, - ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23, - ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29, - ARM64::Q30, ARM64::Q31 -}; - -static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = FPR128DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 15) - return Fail; - return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder); -} - -static const unsigned FPR64DecoderTable[] = { - ARM64::D0, ARM64::D1, ARM64::D2, ARM64::D3, ARM64::D4, ARM64::D5, - ARM64::D6, ARM64::D7, ARM64::D8, ARM64::D9, ARM64::D10, ARM64::D11, - ARM64::D12, ARM64::D13, ARM64::D14, ARM64::D15, ARM64::D16, ARM64::D17, - ARM64::D18, ARM64::D19, ARM64::D20, ARM64::D21, ARM64::D22, ARM64::D23, - ARM64::D24, ARM64::D25, ARM64::D26, ARM64::D27, ARM64::D28, ARM64::D29, - ARM64::D30, ARM64::D31 -}; - -static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = FPR64DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned FPR32DecoderTable[] = { - ARM64::S0, ARM64::S1, ARM64::S2, ARM64::S3, ARM64::S4, ARM64::S5, - ARM64::S6, ARM64::S7, ARM64::S8, ARM64::S9, ARM64::S10, ARM64::S11, - ARM64::S12, ARM64::S13, ARM64::S14, ARM64::S15, ARM64::S16, ARM64::S17, - ARM64::S18, ARM64::S19, ARM64::S20, ARM64::S21, ARM64::S22, ARM64::S23, - ARM64::S24, ARM64::S25, ARM64::S26, ARM64::S27, ARM64::S28, ARM64::S29, - ARM64::S30, ARM64::S31 -}; - -static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = FPR32DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned FPR16DecoderTable[] = { - ARM64::H0, ARM64::H1, ARM64::H2, ARM64::H3, ARM64::H4, ARM64::H5, - ARM64::H6, ARM64::H7, ARM64::H8, ARM64::H9, ARM64::H10, ARM64::H11, - ARM64::H12, ARM64::H13, ARM64::H14, ARM64::H15, ARM64::H16, ARM64::H17, - ARM64::H18, ARM64::H19, ARM64::H20, ARM64::H21, ARM64::H22, ARM64::H23, - ARM64::H24, ARM64::H25, ARM64::H26, ARM64::H27, ARM64::H28, ARM64::H29, - ARM64::H30, ARM64::H31 -}; - -static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = FPR16DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned FPR8DecoderTable[] = { - ARM64::B0, ARM64::B1, ARM64::B2, ARM64::B3, ARM64::B4, ARM64::B5, - ARM64::B6, ARM64::B7, ARM64::B8, ARM64::B9, ARM64::B10, ARM64::B11, - ARM64::B12, ARM64::B13, ARM64::B14, ARM64::B15, ARM64::B16, ARM64::B17, - ARM64::B18, ARM64::B19, ARM64::B20, ARM64::B21, ARM64::B22, ARM64::B23, - ARM64::B24, ARM64::B25, ARM64::B26, ARM64::B27, ARM64::B28, ARM64::B29, - ARM64::B30, ARM64::B31 -}; - -static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = FPR8DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned GPR64DecoderTable[] = { - ARM64::X0, ARM64::X1, ARM64::X2, ARM64::X3, ARM64::X4, ARM64::X5, - ARM64::X6, ARM64::X7, ARM64::X8, ARM64::X9, ARM64::X10, ARM64::X11, - ARM64::X12, ARM64::X13, ARM64::X14, ARM64::X15, ARM64::X16, ARM64::X17, - ARM64::X18, ARM64::X19, ARM64::X20, ARM64::X21, ARM64::X22, ARM64::X23, - ARM64::X24, ARM64::X25, ARM64::X26, ARM64::X27, ARM64::X28, ARM64::FP, - ARM64::LR, ARM64::XZR -}; - -static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = GPR64DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = GPR64DecoderTable[RegNo]; - if (Register == ARM64::XZR) - Register = ARM64::SP; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned GPR32DecoderTable[] = { - ARM64::W0, ARM64::W1, ARM64::W2, ARM64::W3, ARM64::W4, ARM64::W5, - ARM64::W6, ARM64::W7, ARM64::W8, ARM64::W9, ARM64::W10, ARM64::W11, - ARM64::W12, ARM64::W13, ARM64::W14, ARM64::W15, ARM64::W16, ARM64::W17, - ARM64::W18, ARM64::W19, ARM64::W20, ARM64::W21, ARM64::W22, ARM64::W23, - ARM64::W24, ARM64::W25, ARM64::W26, ARM64::W27, ARM64::W28, ARM64::W29, - ARM64::W30, ARM64::WZR -}; - -static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = GPR32DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = GPR32DecoderTable[RegNo]; - if (Register == ARM64::WZR) - Register = ARM64::WSP; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned VectorDecoderTable[] = { - ARM64::Q0, ARM64::Q1, ARM64::Q2, ARM64::Q3, ARM64::Q4, ARM64::Q5, - ARM64::Q6, ARM64::Q7, ARM64::Q8, ARM64::Q9, ARM64::Q10, ARM64::Q11, - ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17, - ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23, - ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29, - ARM64::Q30, ARM64::Q31 -}; - -static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = VectorDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned QQDecoderTable[] = { - ARM64::Q0_Q1, ARM64::Q1_Q2, ARM64::Q2_Q3, ARM64::Q3_Q4, - ARM64::Q4_Q5, ARM64::Q5_Q6, ARM64::Q6_Q7, ARM64::Q7_Q8, - ARM64::Q8_Q9, ARM64::Q9_Q10, ARM64::Q10_Q11, ARM64::Q11_Q12, - ARM64::Q12_Q13, ARM64::Q13_Q14, ARM64::Q14_Q15, ARM64::Q15_Q16, - ARM64::Q16_Q17, ARM64::Q17_Q18, ARM64::Q18_Q19, ARM64::Q19_Q20, - ARM64::Q20_Q21, ARM64::Q21_Q22, ARM64::Q22_Q23, ARM64::Q23_Q24, - ARM64::Q24_Q25, ARM64::Q25_Q26, ARM64::Q26_Q27, ARM64::Q27_Q28, - ARM64::Q28_Q29, ARM64::Q29_Q30, ARM64::Q30_Q31, ARM64::Q31_Q0 -}; - -static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = QQDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned QQQDecoderTable[] = { - ARM64::Q0_Q1_Q2, ARM64::Q1_Q2_Q3, ARM64::Q2_Q3_Q4, - ARM64::Q3_Q4_Q5, ARM64::Q4_Q5_Q6, ARM64::Q5_Q6_Q7, - ARM64::Q6_Q7_Q8, ARM64::Q7_Q8_Q9, ARM64::Q8_Q9_Q10, - ARM64::Q9_Q10_Q11, ARM64::Q10_Q11_Q12, ARM64::Q11_Q12_Q13, - ARM64::Q12_Q13_Q14, ARM64::Q13_Q14_Q15, ARM64::Q14_Q15_Q16, - ARM64::Q15_Q16_Q17, ARM64::Q16_Q17_Q18, ARM64::Q17_Q18_Q19, - ARM64::Q18_Q19_Q20, ARM64::Q19_Q20_Q21, ARM64::Q20_Q21_Q22, - ARM64::Q21_Q22_Q23, ARM64::Q22_Q23_Q24, ARM64::Q23_Q24_Q25, - ARM64::Q24_Q25_Q26, ARM64::Q25_Q26_Q27, ARM64::Q26_Q27_Q28, - ARM64::Q27_Q28_Q29, ARM64::Q28_Q29_Q30, ARM64::Q29_Q30_Q31, - ARM64::Q30_Q31_Q0, ARM64::Q31_Q0_Q1 -}; - -static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = QQQDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned QQQQDecoderTable[] = { - ARM64::Q0_Q1_Q2_Q3, ARM64::Q1_Q2_Q3_Q4, ARM64::Q2_Q3_Q4_Q5, - ARM64::Q3_Q4_Q5_Q6, ARM64::Q4_Q5_Q6_Q7, ARM64::Q5_Q6_Q7_Q8, - ARM64::Q6_Q7_Q8_Q9, ARM64::Q7_Q8_Q9_Q10, ARM64::Q8_Q9_Q10_Q11, - ARM64::Q9_Q10_Q11_Q12, ARM64::Q10_Q11_Q12_Q13, ARM64::Q11_Q12_Q13_Q14, - ARM64::Q12_Q13_Q14_Q15, ARM64::Q13_Q14_Q15_Q16, ARM64::Q14_Q15_Q16_Q17, - ARM64::Q15_Q16_Q17_Q18, ARM64::Q16_Q17_Q18_Q19, ARM64::Q17_Q18_Q19_Q20, - ARM64::Q18_Q19_Q20_Q21, ARM64::Q19_Q20_Q21_Q22, ARM64::Q20_Q21_Q22_Q23, - ARM64::Q21_Q22_Q23_Q24, ARM64::Q22_Q23_Q24_Q25, ARM64::Q23_Q24_Q25_Q26, - ARM64::Q24_Q25_Q26_Q27, ARM64::Q25_Q26_Q27_Q28, ARM64::Q26_Q27_Q28_Q29, - ARM64::Q27_Q28_Q29_Q30, ARM64::Q28_Q29_Q30_Q31, ARM64::Q29_Q30_Q31_Q0, - ARM64::Q30_Q31_Q0_Q1, ARM64::Q31_Q0_Q1_Q2 -}; - -static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = QQQQDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned DDDecoderTable[] = { - ARM64::D0_D1, ARM64::D1_D2, ARM64::D2_D3, ARM64::D3_D4, - ARM64::D4_D5, ARM64::D5_D6, ARM64::D6_D7, ARM64::D7_D8, - ARM64::D8_D9, ARM64::D9_D10, ARM64::D10_D11, ARM64::D11_D12, - ARM64::D12_D13, ARM64::D13_D14, ARM64::D14_D15, ARM64::D15_D16, - ARM64::D16_D17, ARM64::D17_D18, ARM64::D18_D19, ARM64::D19_D20, - ARM64::D20_D21, ARM64::D21_D22, ARM64::D22_D23, ARM64::D23_D24, - ARM64::D24_D25, ARM64::D25_D26, ARM64::D26_D27, ARM64::D27_D28, - ARM64::D28_D29, ARM64::D29_D30, ARM64::D30_D31, ARM64::D31_D0 -}; - -static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = DDDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned DDDDecoderTable[] = { - ARM64::D0_D1_D2, ARM64::D1_D2_D3, ARM64::D2_D3_D4, - ARM64::D3_D4_D5, ARM64::D4_D5_D6, ARM64::D5_D6_D7, - ARM64::D6_D7_D8, ARM64::D7_D8_D9, ARM64::D8_D9_D10, - ARM64::D9_D10_D11, ARM64::D10_D11_D12, ARM64::D11_D12_D13, - ARM64::D12_D13_D14, ARM64::D13_D14_D15, ARM64::D14_D15_D16, - ARM64::D15_D16_D17, ARM64::D16_D17_D18, ARM64::D17_D18_D19, - ARM64::D18_D19_D20, ARM64::D19_D20_D21, ARM64::D20_D21_D22, - ARM64::D21_D22_D23, ARM64::D22_D23_D24, ARM64::D23_D24_D25, - ARM64::D24_D25_D26, ARM64::D25_D26_D27, ARM64::D26_D27_D28, - ARM64::D27_D28_D29, ARM64::D28_D29_D30, ARM64::D29_D30_D31, - ARM64::D30_D31_D0, ARM64::D31_D0_D1 -}; - -static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = DDDDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned DDDDDecoderTable[] = { - ARM64::D0_D1_D2_D3, ARM64::D1_D2_D3_D4, ARM64::D2_D3_D4_D5, - ARM64::D3_D4_D5_D6, ARM64::D4_D5_D6_D7, ARM64::D5_D6_D7_D8, - ARM64::D6_D7_D8_D9, ARM64::D7_D8_D9_D10, ARM64::D8_D9_D10_D11, - ARM64::D9_D10_D11_D12, ARM64::D10_D11_D12_D13, ARM64::D11_D12_D13_D14, - ARM64::D12_D13_D14_D15, ARM64::D13_D14_D15_D16, ARM64::D14_D15_D16_D17, - ARM64::D15_D16_D17_D18, ARM64::D16_D17_D18_D19, ARM64::D17_D18_D19_D20, - ARM64::D18_D19_D20_D21, ARM64::D19_D20_D21_D22, ARM64::D20_D21_D22_D23, - ARM64::D21_D22_D23_D24, ARM64::D22_D23_D24_D25, ARM64::D23_D24_D25_D26, - ARM64::D24_D25_D26_D27, ARM64::D25_D26_D27_D28, ARM64::D26_D27_D28_D29, - ARM64::D27_D28_D29_D30, ARM64::D28_D29_D30_D31, ARM64::D29_D30_D31_D0, - ARM64::D30_D31_D0_D1, ARM64::D31_D0_D1_D2 -}; - -static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = DDDDDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static DecodeStatus DecodeFixedPointScaleImm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(64 - Imm)); - return Success; -} - -static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - int64_t ImmVal = Imm; - const ARM64Disassembler *Dis = - static_cast<const ARM64Disassembler *>(Decoder); - - // Sign-extend 19-bit immediate. - if (ImmVal & (1 << (19 - 1))) - ImmVal |= ~((1LL << 19) - 1); - - if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal << 2, - Inst.getOpcode() != ARM64::LDRXl, 4, Inst)) - Inst.addOperand(MCOperand::CreateImm(ImmVal)); - return Success; -} - -static DecodeStatus DecodeSystemRegister(llvm::MCInst &Inst, unsigned Imm, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(Imm | 0x8000)); - return Success; -} - -static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm, - unsigned Add) { - Inst.addOperand(MCOperand::CreateImm(Add - Imm)); - return Success; -} - -static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm, - unsigned Add) { - Inst.addOperand(MCOperand::CreateImm((Imm + Add) & (Add - 1))); - return Success; -} - -static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm, 64); -} - -static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm | 0x20, 64); -} - -static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm, 32); -} - -static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm | 0x10, 32); -} - -static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm, 16); -} - -static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm | 0x8, 16); -} - -static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm, 8); -} - -static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftLImm(Inst, Imm, 64); -} - -static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftLImm(Inst, Imm, 32); -} - -static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftLImm(Inst, Imm, 16); -} - -static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftLImm(Inst, Imm, 8); -} - -static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rm = fieldFromInstruction(insn, 16, 5); - unsigned shiftHi = fieldFromInstruction(insn, 22, 2); - unsigned shiftLo = fieldFromInstruction(insn, 10, 6); - unsigned shift = (shiftHi << 6) | shiftLo; - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::ANDWrs: - case ARM64::ANDSWrs: - case ARM64::BICWrs: - case ARM64::BICSWrs: - case ARM64::ORRWrs: - case ARM64::ORNWrs: - case ARM64::EORWrs: - case ARM64::EONWrs: - case ARM64::ADDWrs: - case ARM64::ADDSWrs: - case ARM64::SUBWrs: - case ARM64::SUBSWrs: { - DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); - break; - } - case ARM64::ANDXrs: - case ARM64::ANDSXrs: - case ARM64::BICXrs: - case ARM64::BICSXrs: - case ARM64::ORRXrs: - case ARM64::ORNXrs: - case ARM64::EORXrs: - case ARM64::EONXrs: - case ARM64::ADDXrs: - case ARM64::ADDSXrs: - case ARM64::SUBXrs: - case ARM64::SUBSXrs: - DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - break; - } - - Inst.addOperand(MCOperand::CreateImm(shift)); - return Success; -} - -static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned imm = fieldFromInstruction(insn, 5, 16); - unsigned shift = fieldFromInstruction(insn, 21, 2); - shift <<= 4; - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::MOVZWi: - case ARM64::MOVNWi: - case ARM64::MOVKWi: - DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::MOVZXi: - case ARM64::MOVNXi: - case ARM64::MOVKXi: - DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - break; - } - - if (Inst.getOpcode() == ARM64::MOVKWi || Inst.getOpcode() == ARM64::MOVKXi) - Inst.addOperand(Inst.getOperand(0)); - - Inst.addOperand(MCOperand::CreateImm(imm)); - Inst.addOperand(MCOperand::CreateImm(shift)); - return Success; -} - -static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned offset = fieldFromInstruction(insn, 10, 12); - const ARM64Disassembler *Dis = - static_cast<const ARM64Disassembler *>(Decoder); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::PRFMui: - // Rt is an immediate in prefetch. - Inst.addOperand(MCOperand::CreateImm(Rt)); - break; - case ARM64::STRBBui: - case ARM64::LDRBBui: - case ARM64::LDRSBWui: - case ARM64::STRHHui: - case ARM64::LDRHHui: - case ARM64::LDRSHWui: - case ARM64::STRWui: - case ARM64::LDRWui: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSBXui: - case ARM64::LDRSHXui: - case ARM64::LDRSWui: - case ARM64::STRXui: - case ARM64::LDRXui: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRQui: - case ARM64::STRQui: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRDui: - case ARM64::STRDui: - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSui: - case ARM64::STRSui: - DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRHui: - case ARM64::STRHui: - DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRBui: - case ARM64::STRBui: - DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); - break; - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - if (!Dis->tryAddingSymbolicOperand(Addr, offset, Fail, 4, Inst, insn)) - Inst.addOperand(MCOperand::CreateImm(offset)); - return Success; -} - -static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - int64_t offset = fieldFromInstruction(insn, 12, 9); - - // offset is a 9-bit signed immediate, so sign extend it to - // fill the unsigned. - if (offset & (1 << (9 - 1))) - offset |= ~((1LL << 9) - 1); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::PRFUMi: - // Rt is an immediate in prefetch. - Inst.addOperand(MCOperand::CreateImm(Rt)); - break; - case ARM64::STURBBi: - case ARM64::LDURBBi: - case ARM64::LDURSBWi: - case ARM64::STURHHi: - case ARM64::LDURHHi: - case ARM64::LDURSHWi: - case ARM64::STURWi: - case ARM64::LDURWi: - case ARM64::LDTRSBWi: - case ARM64::LDTRSHWi: - case ARM64::STTRWi: - case ARM64::LDTRWi: - case ARM64::STTRHi: - case ARM64::LDTRHi: - case ARM64::LDTRBi: - case ARM64::STTRBi: - case ARM64::LDRSBWpre: - case ARM64::LDRSHWpre: - case ARM64::STRBBpre: - case ARM64::LDRBBpre: - case ARM64::STRHHpre: - case ARM64::LDRHHpre: - case ARM64::STRWpre: - case ARM64::LDRWpre: - case ARM64::LDRSBWpost: - case ARM64::LDRSHWpost: - case ARM64::STRBBpost: - case ARM64::LDRBBpost: - case ARM64::STRHHpost: - case ARM64::LDRHHpost: - case ARM64::STRWpost: - case ARM64::LDRWpost: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURSBXi: - case ARM64::LDURSHXi: - case ARM64::LDURSWi: - case ARM64::STURXi: - case ARM64::LDURXi: - case ARM64::LDTRSBXi: - case ARM64::LDTRSHXi: - case ARM64::LDTRSWi: - case ARM64::STTRXi: - case ARM64::LDTRXi: - case ARM64::LDRSBXpre: - case ARM64::LDRSHXpre: - case ARM64::STRXpre: - case ARM64::LDRSWpre: - case ARM64::LDRXpre: - case ARM64::LDRSBXpost: - case ARM64::LDRSHXpost: - case ARM64::STRXpost: - case ARM64::LDRSWpost: - case ARM64::LDRXpost: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURQi: - case ARM64::STURQi: - case ARM64::LDRQpre: - case ARM64::STRQpre: - case ARM64::LDRQpost: - case ARM64::STRQpost: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURDi: - case ARM64::STURDi: - case ARM64::LDRDpre: - case ARM64::STRDpre: - case ARM64::LDRDpost: - case ARM64::STRDpost: - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURSi: - case ARM64::STURSi: - case ARM64::LDRSpre: - case ARM64::STRSpre: - case ARM64::LDRSpost: - case ARM64::STRSpost: - DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURHi: - case ARM64::STURHi: - case ARM64::LDRHpre: - case ARM64::STRHpre: - case ARM64::LDRHpost: - case ARM64::STRHpost: - DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURBi: - case ARM64::STURBi: - case ARM64::LDRBpre: - case ARM64::STRBpre: - case ARM64::LDRBpost: - case ARM64::STRBpost: - DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); - break; - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(offset)); - return Success; -} - -static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rt2 = fieldFromInstruction(insn, 10, 5); - unsigned Rs = fieldFromInstruction(insn, 16, 5); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::STLXRW: - case ARM64::STLXRB: - case ARM64::STLXRH: - case ARM64::STXRW: - case ARM64::STXRB: - case ARM64::STXRH: - DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); - // FALLTHROUGH - case ARM64::LDARW: - case ARM64::LDARB: - case ARM64::LDARH: - case ARM64::LDAXRW: - case ARM64::LDAXRB: - case ARM64::LDAXRH: - case ARM64::LDXRW: - case ARM64::LDXRB: - case ARM64::LDXRH: - case ARM64::STLRW: - case ARM64::STLRB: - case ARM64::STLRH: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::STLXRX: - case ARM64::STXRX: - DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); - // FALLTHROUGH - case ARM64::LDARX: - case ARM64::LDAXRX: - case ARM64::LDXRX: - case ARM64::STLRX: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::STLXPW: - case ARM64::STXPW: - DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); - // FALLTHROUGH - case ARM64::LDAXPW: - case ARM64::LDXPW: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder); - break; - case ARM64::STLXPX: - case ARM64::STXPX: - DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); - // FALLTHROUGH - case ARM64::LDAXPX: - case ARM64::LDXPX: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder); - break; - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - return Success; -} - -static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rt2 = fieldFromInstruction(insn, 10, 5); - int64_t offset = fieldFromInstruction(insn, 15, 7); - - // offset is a 7-bit signed immediate, so sign extend it to - // fill the unsigned. - if (offset & (1 << (7 - 1))) - offset |= ~((1LL << 7) - 1); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::LDNPXi: - case ARM64::STNPXi: - case ARM64::LDPXpost: - case ARM64::STPXpost: - case ARM64::LDPSWpost: - case ARM64::LDPXi: - case ARM64::STPXi: - case ARM64::LDPSWi: - case ARM64::LDPXpre: - case ARM64::STPXpre: - case ARM64::LDPSWpre: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder); - break; - case ARM64::LDNPWi: - case ARM64::STNPWi: - case ARM64::LDPWpost: - case ARM64::STPWpost: - case ARM64::LDPWi: - case ARM64::STPWi: - case ARM64::LDPWpre: - case ARM64::STPWpre: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder); - break; - case ARM64::LDNPQi: - case ARM64::STNPQi: - case ARM64::LDPQpost: - case ARM64::STPQpost: - case ARM64::LDPQi: - case ARM64::STPQi: - case ARM64::LDPQpre: - case ARM64::STPQpre: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - DecodeFPR128RegisterClass(Inst, Rt2, Addr, Decoder); - break; - case ARM64::LDNPDi: - case ARM64::STNPDi: - case ARM64::LDPDpost: - case ARM64::STPDpost: - case ARM64::LDPDi: - case ARM64::STPDi: - case ARM64::LDPDpre: - case ARM64::STPDpre: - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - DecodeFPR64RegisterClass(Inst, Rt2, Addr, Decoder); - break; - case ARM64::LDNPSi: - case ARM64::STNPSi: - case ARM64::LDPSpost: - case ARM64::STPSpost: - case ARM64::LDPSi: - case ARM64::STPSi: - case ARM64::LDPSpre: - case ARM64::STPSpre: - DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); - DecodeFPR32RegisterClass(Inst, Rt2, Addr, Decoder); - break; - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(offset)); - return Success; -} - -static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rm = fieldFromInstruction(insn, 16, 5); - unsigned extendHi = fieldFromInstruction(insn, 13, 3); - unsigned extendLo = fieldFromInstruction(insn, 12, 1); - unsigned extend = 0; - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::LDRSWro: - extend = (extendHi << 1) | extendLo; - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRXro: - case ARM64::STRXro: - extend = (extendHi << 1) | extendLo; - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRWro: - case ARM64::STRWro: - extend = (extendHi << 1) | extendLo; - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRQro: - case ARM64::STRQro: - extend = (extendHi << 1) | extendLo; - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRDro: - case ARM64::STRDro: - extend = (extendHi << 1) | extendLo; - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSro: - case ARM64::STRSro: - extend = (extendHi << 1) | extendLo; - DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRHro: - extend = (extendHi << 1) | extendLo; - DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRBro: - extend = (extendHi << 1) | extendLo; - DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRBBro: - case ARM64::STRBBro: - case ARM64::LDRSBWro: - extend = (extendHi << 1) | extendLo; - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRHHro: - case ARM64::STRHHro: - case ARM64::LDRSHWro: - extend = (extendHi << 1) | extendLo; - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSHXro: - extend = (extendHi << 1) | extendLo; - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSBXro: - extend = (extendHi << 1) | extendLo; - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::PRFMro: - extend = (extendHi << 1) | extendLo; - Inst.addOperand(MCOperand::CreateImm(Rt)); - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - - if (extendHi == 0x3) - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - else - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - - Inst.addOperand(MCOperand::CreateImm(extend)); - return Success; -} - -static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rm = fieldFromInstruction(insn, 16, 5); - unsigned extend = fieldFromInstruction(insn, 10, 6); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::ADDWrx: - case ARM64::SUBWrx: - DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); - break; - case ARM64::ADDSWrx: - case ARM64::SUBSWrx: - DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); - break; - case ARM64::ADDXrx: - case ARM64::SUBXrx: - DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); - break; - case ARM64::ADDSXrx: - case ARM64::SUBSXrx: - DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); - break; - case ARM64::ADDXrx64: - case ARM64::ADDSXrx64: - case ARM64::SUBXrx64: - case ARM64::SUBSXrx64: - DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - break; - } - - Inst.addOperand(MCOperand::CreateImm(extend)); - return Success; -} - -static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Datasize = fieldFromInstruction(insn, 31, 1); - unsigned imm; - - if (Datasize) { - DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); - imm = fieldFromInstruction(insn, 10, 13); - if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 64)) - return Fail; - } else { - DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder); - imm = fieldFromInstruction(insn, 10, 12); - if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 32)) - return Fail; - } - Inst.addOperand(MCOperand::CreateImm(imm)); - return Success; -} - -static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned cmode = fieldFromInstruction(insn, 12, 4); - unsigned imm = fieldFromInstruction(insn, 16, 3) << 5; - imm |= fieldFromInstruction(insn, 5, 5); - - if (Inst.getOpcode() == ARM64::MOVID) - DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder); - else - DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); - - Inst.addOperand(MCOperand::CreateImm(imm)); - - switch (Inst.getOpcode()) { - default: - break; - case ARM64::MOVIv4i16: - case ARM64::MOVIv8i16: - case ARM64::MVNIv4i16: - case ARM64::MVNIv8i16: - case ARM64::MOVIv2i32: - case ARM64::MOVIv4i32: - case ARM64::MVNIv2i32: - case ARM64::MVNIv4i32: - Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2)); - break; - case ARM64::MOVIv2s_msl: - case ARM64::MOVIv4s_msl: - case ARM64::MVNIv2s_msl: - case ARM64::MVNIv4s_msl: - Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108)); - break; - } - - return Success; -} - -static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned cmode = fieldFromInstruction(insn, 12, 4); - unsigned imm = fieldFromInstruction(insn, 16, 3) << 5; - imm |= fieldFromInstruction(insn, 5, 5); - - // Tied operands added twice. - DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); - DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); - - Inst.addOperand(MCOperand::CreateImm(imm)); - Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2)); - - return Success; -} - -static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - int64_t imm = fieldFromInstruction(insn, 5, 19) << 2; - imm |= fieldFromInstruction(insn, 29, 2); - const ARM64Disassembler *Dis = - static_cast<const ARM64Disassembler *>(Decoder); - - // Sign-extend the 21-bit immediate. - if (imm & (1 << (21 - 1))) - imm |= ~((1LL << 21) - 1); - - DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - if (!Dis->tryAddingSymbolicOperand(Addr, imm, Fail, 4, Inst, insn)) - Inst.addOperand(MCOperand::CreateImm(imm)); - - return Success; -} - -static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Imm = fieldFromInstruction(insn, 10, 14); - unsigned S = fieldFromInstruction(insn, 29, 1); - unsigned Datasize = fieldFromInstruction(insn, 31, 1); - - unsigned ShifterVal = (Imm >> 12) & 3; - unsigned ImmVal = Imm & 0xFFF; - const ARM64Disassembler *Dis = - static_cast<const ARM64Disassembler *>(Decoder); - - if (ShifterVal != 0 && ShifterVal != 1) - return Fail; - - if (Datasize) { - if (Rd == 31 && !S) - DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); - else - DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - } else { - if (Rd == 31 && !S) - DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder); - else - DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); - } - - if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal, Fail, 4, Inst, insn)) - Inst.addOperand(MCOperand::CreateImm(ImmVal)); - Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal)); - return Success; -} - -static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { - int64_t imm = fieldFromInstruction(insn, 0, 26); - const ARM64Disassembler *Dis = - static_cast<const ARM64Disassembler *>(Decoder); - - // Sign-extend the 26-bit immediate. - if (imm & (1 << (26 - 1))) - imm |= ~((1LL << 26) - 1); - - if (!Dis->tryAddingSymbolicOperand(Addr, imm << 2, true, 4, Inst)) - Inst.addOperand(MCOperand::CreateImm(imm)); - - return Success; -} - -static DecodeStatus DecodeSystemCPSRInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - uint64_t op1 = fieldFromInstruction(insn, 16, 3); - uint64_t op2 = fieldFromInstruction(insn, 5, 3); - uint64_t crm = fieldFromInstruction(insn, 8, 4); - - Inst.addOperand(MCOperand::CreateImm((op1 << 3) | op2)); - Inst.addOperand(MCOperand::CreateImm(crm)); - - return Success; -} - -static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { - uint64_t Rt = fieldFromInstruction(insn, 0, 5); - uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5; - bit |= fieldFromInstruction(insn, 19, 5); - int64_t dst = fieldFromInstruction(insn, 5, 14); - const ARM64Disassembler *Dis = - static_cast<const ARM64Disassembler *>(Decoder); - - // Sign-extend 14-bit immediate. - if (dst & (1 << (14 - 1))) - dst |= ~((1LL << 14) - 1); - - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(bit)); - if (!Dis->tryAddingSymbolicOperand(Addr, dst << 2, true, 4, Inst)) - Inst.addOperand(MCOperand::CreateImm(dst)); - - return Success; -} - -static DecodeStatus DecodeSIMDLdStPost(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { - uint64_t Rd = fieldFromInstruction(insn, 0, 5); - uint64_t Rn = fieldFromInstruction(insn, 5, 5); - uint64_t Rm = fieldFromInstruction(insn, 16, 5); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::ST1Onev8b_POST: - case ARM64::ST1Onev4h_POST: - case ARM64::ST1Onev2s_POST: - case ARM64::ST1Onev1d_POST: - case ARM64::LD1Onev8b_POST: - case ARM64::LD1Onev4h_POST: - case ARM64::LD1Onev2s_POST: - case ARM64::LD1Onev1d_POST: - DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Onev16b_POST: - case ARM64::ST1Onev8h_POST: - case ARM64::ST1Onev4s_POST: - case ARM64::ST1Onev2d_POST: - case ARM64::LD1Onev16b_POST: - case ARM64::LD1Onev8h_POST: - case ARM64::LD1Onev4s_POST: - case ARM64::LD1Onev2d_POST: - DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Twov8b_POST: - case ARM64::ST1Twov4h_POST: - case ARM64::ST1Twov2s_POST: - case ARM64::ST1Twov1d_POST: - case ARM64::ST2Twov8b_POST: - case ARM64::ST2Twov4h_POST: - case ARM64::ST2Twov2s_POST: - case ARM64::LD1Twov8b_POST: - case ARM64::LD1Twov4h_POST: - case ARM64::LD1Twov2s_POST: - case ARM64::LD1Twov1d_POST: - case ARM64::LD2Twov8b_POST: - case ARM64::LD2Twov4h_POST: - case ARM64::LD2Twov2s_POST: - DecodeDDRegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Threev8b_POST: - case ARM64::ST1Threev4h_POST: - case ARM64::ST1Threev2s_POST: - case ARM64::ST1Threev1d_POST: - case ARM64::ST3Threev8b_POST: - case ARM64::ST3Threev4h_POST: - case ARM64::ST3Threev2s_POST: - case ARM64::LD1Threev8b_POST: - case ARM64::LD1Threev4h_POST: - case ARM64::LD1Threev2s_POST: - case ARM64::LD1Threev1d_POST: - case ARM64::LD3Threev8b_POST: - case ARM64::LD3Threev4h_POST: - case ARM64::LD3Threev2s_POST: - DecodeDDDRegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Fourv8b_POST: - case ARM64::ST1Fourv4h_POST: - case ARM64::ST1Fourv2s_POST: - case ARM64::ST1Fourv1d_POST: - case ARM64::ST4Fourv8b_POST: - case ARM64::ST4Fourv4h_POST: - case ARM64::ST4Fourv2s_POST: - case ARM64::LD1Fourv8b_POST: - case ARM64::LD1Fourv4h_POST: - case ARM64::LD1Fourv2s_POST: - case ARM64::LD1Fourv1d_POST: - case ARM64::LD4Fourv8b_POST: - case ARM64::LD4Fourv4h_POST: - case ARM64::LD4Fourv2s_POST: - DecodeDDDDRegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Twov16b_POST: - case ARM64::ST1Twov8h_POST: - case ARM64::ST1Twov4s_POST: - case ARM64::ST1Twov2d_POST: - case ARM64::ST2Twov16b_POST: - case ARM64::ST2Twov8h_POST: - case ARM64::ST2Twov4s_POST: - case ARM64::ST2Twov2d_POST: - case ARM64::LD1Twov16b_POST: - case ARM64::LD1Twov8h_POST: - case ARM64::LD1Twov4s_POST: - case ARM64::LD1Twov2d_POST: - case ARM64::LD2Twov16b_POST: - case ARM64::LD2Twov8h_POST: - case ARM64::LD2Twov4s_POST: - case ARM64::LD2Twov2d_POST: - DecodeQQRegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Threev16b_POST: - case ARM64::ST1Threev8h_POST: - case ARM64::ST1Threev4s_POST: - case ARM64::ST1Threev2d_POST: - case ARM64::ST3Threev16b_POST: - case ARM64::ST3Threev8h_POST: - case ARM64::ST3Threev4s_POST: - case ARM64::ST3Threev2d_POST: - case ARM64::LD1Threev16b_POST: - case ARM64::LD1Threev8h_POST: - case ARM64::LD1Threev4s_POST: - case ARM64::LD1Threev2d_POST: - case ARM64::LD3Threev16b_POST: - case ARM64::LD3Threev8h_POST: - case ARM64::LD3Threev4s_POST: - case ARM64::LD3Threev2d_POST: - DecodeQQQRegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Fourv16b_POST: - case ARM64::ST1Fourv8h_POST: - case ARM64::ST1Fourv4s_POST: - case ARM64::ST1Fourv2d_POST: - case ARM64::ST4Fourv16b_POST: - case ARM64::ST4Fourv8h_POST: - case ARM64::ST4Fourv4s_POST: - case ARM64::ST4Fourv2d_POST: - case ARM64::LD1Fourv16b_POST: - case ARM64::LD1Fourv8h_POST: - case ARM64::LD1Fourv4s_POST: - case ARM64::LD1Fourv2d_POST: - case ARM64::LD4Fourv16b_POST: - case ARM64::LD4Fourv8h_POST: - case ARM64::LD4Fourv4s_POST: - case ARM64::LD4Fourv2d_POST: - DecodeQQQQRegisterClass(Inst, Rd, Addr, Decoder); - break; - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - return Success; -} - -static DecodeStatus DecodeSIMDLdStSingle(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { - uint64_t Rt = fieldFromInstruction(insn, 0, 5); - uint64_t Rn = fieldFromInstruction(insn, 5, 5); - uint64_t Rm = fieldFromInstruction(insn, 16, 5); - uint64_t size = fieldFromInstruction(insn, 10, 2); - uint64_t S = fieldFromInstruction(insn, 12, 1); - uint64_t Q = fieldFromInstruction(insn, 30, 1); - uint64_t index = 0; - - switch (Inst.getOpcode()) { - case ARM64::ST1i8: - case ARM64::ST1i8_POST: - case ARM64::ST2i8: - case ARM64::ST2i8_POST: - case ARM64::ST3i8_POST: - case ARM64::ST3i8: - case ARM64::ST4i8_POST: - case ARM64::ST4i8: - index = (Q << 3) | (S << 2) | size; - break; - case ARM64::ST1i16: - case ARM64::ST1i16_POST: - case ARM64::ST2i16: - case ARM64::ST2i16_POST: - case ARM64::ST3i16_POST: - case ARM64::ST3i16: - case ARM64::ST4i16_POST: - case ARM64::ST4i16: - index = (Q << 2) | (S << 1) | (size >> 1); - break; - case ARM64::ST1i32: - case ARM64::ST1i32_POST: - case ARM64::ST2i32: - case ARM64::ST2i32_POST: - case ARM64::ST3i32_POST: - case ARM64::ST3i32: - case ARM64::ST4i32_POST: - case ARM64::ST4i32: - index = (Q << 1) | S; - break; - case ARM64::ST1i64: - case ARM64::ST1i64_POST: - case ARM64::ST2i64: - case ARM64::ST2i64_POST: - case ARM64::ST3i64_POST: - case ARM64::ST3i64: - case ARM64::ST4i64_POST: - case ARM64::ST4i64: - index = Q; - break; - } - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::LD1Rv8b: - case ARM64::LD1Rv8b_POST: - case ARM64::LD1Rv4h: - case ARM64::LD1Rv4h_POST: - case ARM64::LD1Rv2s: - case ARM64::LD1Rv2s_POST: - case ARM64::LD1Rv1d: - case ARM64::LD1Rv1d_POST: - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD1Rv16b: - case ARM64::LD1Rv16b_POST: - case ARM64::LD1Rv8h: - case ARM64::LD1Rv8h_POST: - case ARM64::LD1Rv4s: - case ARM64::LD1Rv4s_POST: - case ARM64::LD1Rv2d: - case ARM64::LD1Rv2d_POST: - case ARM64::ST1i8: - case ARM64::ST1i8_POST: - case ARM64::ST1i16: - case ARM64::ST1i16_POST: - case ARM64::ST1i32: - case ARM64::ST1i32_POST: - case ARM64::ST1i64: - case ARM64::ST1i64_POST: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD2Rv16b: - case ARM64::LD2Rv16b_POST: - case ARM64::LD2Rv8h: - case ARM64::LD2Rv8h_POST: - case ARM64::LD2Rv4s: - case ARM64::LD2Rv4s_POST: - case ARM64::LD2Rv2d: - case ARM64::LD2Rv2d_POST: - case ARM64::ST2i8: - case ARM64::ST2i8_POST: - case ARM64::ST2i16: - case ARM64::ST2i16_POST: - case ARM64::ST2i32: - case ARM64::ST2i32_POST: - case ARM64::ST2i64: - case ARM64::ST2i64_POST: - DecodeQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD2Rv8b: - case ARM64::LD2Rv8b_POST: - case ARM64::LD2Rv4h: - case ARM64::LD2Rv4h_POST: - case ARM64::LD2Rv2s: - case ARM64::LD2Rv2s_POST: - case ARM64::LD2Rv1d: - case ARM64::LD2Rv1d_POST: - DecodeDDRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD3Rv8b: - case ARM64::LD3Rv8b_POST: - case ARM64::LD3Rv4h: - case ARM64::LD3Rv4h_POST: - case ARM64::LD3Rv2s: - case ARM64::LD3Rv2s_POST: - case ARM64::LD3Rv1d: - case ARM64::LD3Rv1d_POST: - DecodeDDDRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD3Rv16b: - case ARM64::LD3Rv16b_POST: - case ARM64::LD3Rv8h: - case ARM64::LD3Rv8h_POST: - case ARM64::LD3Rv4s: - case ARM64::LD3Rv4s_POST: - case ARM64::LD3Rv2d: - case ARM64::LD3Rv2d_POST: - case ARM64::ST3i8: - case ARM64::ST3i8_POST: - case ARM64::ST3i16: - case ARM64::ST3i16_POST: - case ARM64::ST3i32: - case ARM64::ST3i32_POST: - case ARM64::ST3i64: - case ARM64::ST3i64_POST: - DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD4Rv8b: - case ARM64::LD4Rv8b_POST: - case ARM64::LD4Rv4h: - case ARM64::LD4Rv4h_POST: - case ARM64::LD4Rv2s: - case ARM64::LD4Rv2s_POST: - case ARM64::LD4Rv1d: - case ARM64::LD4Rv1d_POST: - DecodeDDDDRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD4Rv16b: - case ARM64::LD4Rv16b_POST: - case ARM64::LD4Rv8h: - case ARM64::LD4Rv8h_POST: - case ARM64::LD4Rv4s: - case ARM64::LD4Rv4s_POST: - case ARM64::LD4Rv2d: - case ARM64::LD4Rv2d_POST: - case ARM64::ST4i8: - case ARM64::ST4i8_POST: - case ARM64::ST4i16: - case ARM64::ST4i16_POST: - case ARM64::ST4i32: - case ARM64::ST4i32_POST: - case ARM64::ST4i64: - case ARM64::ST4i64_POST: - DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - } - - switch (Inst.getOpcode()) { - case ARM64::LD1Rv8b: - case ARM64::LD1Rv8b_POST: - case ARM64::LD1Rv16b: - case ARM64::LD1Rv16b_POST: - case ARM64::LD1Rv4h: - case ARM64::LD1Rv4h_POST: - case ARM64::LD1Rv8h: - case ARM64::LD1Rv8h_POST: - case ARM64::LD1Rv4s: - case ARM64::LD1Rv4s_POST: - case ARM64::LD1Rv2s: - case ARM64::LD1Rv2s_POST: - case ARM64::LD1Rv1d: - case ARM64::LD1Rv1d_POST: - case ARM64::LD1Rv2d: - case ARM64::LD1Rv2d_POST: - case ARM64::LD2Rv8b: - case ARM64::LD2Rv8b_POST: - case ARM64::LD2Rv16b: - case ARM64::LD2Rv16b_POST: - case ARM64::LD2Rv4h: - case ARM64::LD2Rv4h_POST: - case ARM64::LD2Rv8h: - case ARM64::LD2Rv8h_POST: - case ARM64::LD2Rv2s: - case ARM64::LD2Rv2s_POST: - case ARM64::LD2Rv4s: - case ARM64::LD2Rv4s_POST: - case ARM64::LD2Rv2d: - case ARM64::LD2Rv2d_POST: - case ARM64::LD2Rv1d: - case ARM64::LD2Rv1d_POST: - case ARM64::LD3Rv8b: - case ARM64::LD3Rv8b_POST: - case ARM64::LD3Rv16b: - case ARM64::LD3Rv16b_POST: - case ARM64::LD3Rv4h: - case ARM64::LD3Rv4h_POST: - case ARM64::LD3Rv8h: - case ARM64::LD3Rv8h_POST: - case ARM64::LD3Rv2s: - case ARM64::LD3Rv2s_POST: - case ARM64::LD3Rv4s: - case ARM64::LD3Rv4s_POST: - case ARM64::LD3Rv2d: - case ARM64::LD3Rv2d_POST: - case ARM64::LD3Rv1d: - case ARM64::LD3Rv1d_POST: - case ARM64::LD4Rv8b: - case ARM64::LD4Rv8b_POST: - case ARM64::LD4Rv16b: - case ARM64::LD4Rv16b_POST: - case ARM64::LD4Rv4h: - case ARM64::LD4Rv4h_POST: - case ARM64::LD4Rv8h: - case ARM64::LD4Rv8h_POST: - case ARM64::LD4Rv2s: - case ARM64::LD4Rv2s_POST: - case ARM64::LD4Rv4s: - case ARM64::LD4Rv4s_POST: - case ARM64::LD4Rv2d: - case ARM64::LD4Rv2d_POST: - case ARM64::LD4Rv1d: - case ARM64::LD4Rv1d_POST: - break; - default: - Inst.addOperand(MCOperand::CreateImm(index)); - } - - DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); - - switch (Inst.getOpcode()) { - case ARM64::ST1i8_POST: - case ARM64::ST1i16_POST: - case ARM64::ST1i32_POST: - case ARM64::ST1i64_POST: - case ARM64::LD1Rv8b_POST: - case ARM64::LD1Rv16b_POST: - case ARM64::LD1Rv4h_POST: - case ARM64::LD1Rv8h_POST: - case ARM64::LD1Rv2s_POST: - case ARM64::LD1Rv4s_POST: - case ARM64::LD1Rv1d_POST: - case ARM64::LD1Rv2d_POST: - case ARM64::ST2i8_POST: - case ARM64::ST2i16_POST: - case ARM64::ST2i32_POST: - case ARM64::ST2i64_POST: - case ARM64::LD2Rv8b_POST: - case ARM64::LD2Rv16b_POST: - case ARM64::LD2Rv4h_POST: - case ARM64::LD2Rv8h_POST: - case ARM64::LD2Rv2s_POST: - case ARM64::LD2Rv4s_POST: - case ARM64::LD2Rv2d_POST: - case ARM64::LD2Rv1d_POST: - case ARM64::ST3i8_POST: - case ARM64::ST3i16_POST: - case ARM64::ST3i32_POST: - case ARM64::ST3i64_POST: - case ARM64::LD3Rv8b_POST: - case ARM64::LD3Rv16b_POST: - case ARM64::LD3Rv4h_POST: - case ARM64::LD3Rv8h_POST: - case ARM64::LD3Rv2s_POST: - case ARM64::LD3Rv4s_POST: - case ARM64::LD3Rv2d_POST: - case ARM64::LD3Rv1d_POST: - case ARM64::ST4i8_POST: - case ARM64::ST4i16_POST: - case ARM64::ST4i32_POST: - case ARM64::ST4i64_POST: - case ARM64::LD4Rv8b_POST: - case ARM64::LD4Rv16b_POST: - case ARM64::LD4Rv4h_POST: - case ARM64::LD4Rv8h_POST: - case ARM64::LD4Rv2s_POST: - case ARM64::LD4Rv4s_POST: - case ARM64::LD4Rv2d_POST: - case ARM64::LD4Rv1d_POST: - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - break; - } - return Success; -} - -static DecodeStatus DecodeSIMDLdStSingleTied(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { - uint64_t Rt = fieldFromInstruction(insn, 0, 5); - uint64_t Rn = fieldFromInstruction(insn, 5, 5); - uint64_t Rm = fieldFromInstruction(insn, 16, 5); - uint64_t size = fieldFromInstruction(insn, 10, 2); - uint64_t S = fieldFromInstruction(insn, 12, 1); - uint64_t Q = fieldFromInstruction(insn, 30, 1); - uint64_t index = 0; - - switch (Inst.getOpcode()) { - case ARM64::LD1i8: - case ARM64::LD1i8_POST: - case ARM64::LD2i8: - case ARM64::LD2i8_POST: - case ARM64::LD3i8_POST: - case ARM64::LD3i8: - case ARM64::LD4i8_POST: - case ARM64::LD4i8: - index = (Q << 3) | (S << 2) | size; - break; - case ARM64::LD1i16: - case ARM64::LD1i16_POST: - case ARM64::LD2i16: - case ARM64::LD2i16_POST: - case ARM64::LD3i16_POST: - case ARM64::LD3i16: - case ARM64::LD4i16_POST: - case ARM64::LD4i16: - index = (Q << 2) | (S << 1) | (size >> 1); - break; - case ARM64::LD1i32: - case ARM64::LD1i32_POST: - case ARM64::LD2i32: - case ARM64::LD2i32_POST: - case ARM64::LD3i32_POST: - case ARM64::LD3i32: - case ARM64::LD4i32_POST: - case ARM64::LD4i32: - index = (Q << 1) | S; - break; - case ARM64::LD1i64: - case ARM64::LD1i64_POST: - case ARM64::LD2i64: - case ARM64::LD2i64_POST: - case ARM64::LD3i64_POST: - case ARM64::LD3i64: - case ARM64::LD4i64_POST: - case ARM64::LD4i64: - index = Q; - break; - } - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::LD1i8: - case ARM64::LD1i8_POST: - case ARM64::LD1i16: - case ARM64::LD1i16_POST: - case ARM64::LD1i32: - case ARM64::LD1i32_POST: - case ARM64::LD1i64: - case ARM64::LD1i64_POST: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD2i8: - case ARM64::LD2i8_POST: - case ARM64::LD2i16: - case ARM64::LD2i16_POST: - case ARM64::LD2i32: - case ARM64::LD2i32_POST: - case ARM64::LD2i64: - case ARM64::LD2i64_POST: - DecodeQQRegisterClass(Inst, Rt, Addr, Decoder); - DecodeQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD3i8: - case ARM64::LD3i8_POST: - case ARM64::LD3i16: - case ARM64::LD3i16_POST: - case ARM64::LD3i32: - case ARM64::LD3i32_POST: - case ARM64::LD3i64: - case ARM64::LD3i64_POST: - DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder); - DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD4i8: - case ARM64::LD4i8_POST: - case ARM64::LD4i16: - case ARM64::LD4i16_POST: - case ARM64::LD4i32: - case ARM64::LD4i32_POST: - case ARM64::LD4i64: - case ARM64::LD4i64_POST: - DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder); - DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - } - - Inst.addOperand(MCOperand::CreateImm(index)); - DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); - - switch (Inst.getOpcode()) { - case ARM64::LD1i8_POST: - case ARM64::LD1i16_POST: - case ARM64::LD1i32_POST: - case ARM64::LD1i64_POST: - case ARM64::LD2i8_POST: - case ARM64::LD2i16_POST: - case ARM64::LD2i32_POST: - case ARM64::LD2i64_POST: - case ARM64::LD3i8_POST: - case ARM64::LD3i16_POST: - case ARM64::LD3i32_POST: - case ARM64::LD3i64_POST: - case ARM64::LD4i8_POST: - case ARM64::LD4i16_POST: - case ARM64::LD4i32_POST: - case ARM64::LD4i64_POST: - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - break; - } - return Success; -} diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h b/lib/Target/ARM64/Disassembler/ARM64Disassembler.h deleted file mode 100644 index 35efc8d..0000000 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h +++ /dev/null @@ -1,54 +0,0 @@ -//===- ARM64Disassembler.h - Disassembler for ARM64 -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64DISASSEMBLER_H -#define ARM64DISASSEMBLER_H - -#include "llvm/MC/MCDisassembler.h" - -namespace llvm { - -class MCInst; -class MemoryObject; -class raw_ostream; - -class ARM64Disassembler : public MCDisassembler { -public: - ARM64Disassembler(const MCSubtargetInfo &STI) : MCDisassembler(STI) {} - - ~ARM64Disassembler() {} - - /// getInstruction - See MCDisassembler. - MCDisassembler::DecodeStatus getInstruction(MCInst &instr, uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const; - - /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic - /// operand in place of the immediate Value in the MCInst. The immediate - /// Value has not had any PC adjustment made by the caller. If the instruction - /// adds the PC to the immediate Value then InstsAddsAddressToValue is true, - /// else false. If the getOpInfo() function was set as part of the - /// setupForSymbolicDisassembly() call then that function is called to get any - /// symbolic information at the Address for this instrution. If that returns - /// non-zero then the symbolic information it returns is used to create an - /// MCExpr and that is added as an operand to the MCInst. This function - /// returns true if it adds an operand to the MCInst and false otherwise. - bool tryAddingSymbolicOperand(uint64_t Address, int Value, - bool InstsAddsAddressToValue, uint64_t InstSize, - MCInst &MI, uint32_t insn = 0) const; -}; - -} // namespace llvm - -#endif diff --git a/lib/Target/ARM64/Disassembler/CMakeLists.txt b/lib/Target/ARM64/Disassembler/CMakeLists.txt deleted file mode 100644 index ad998c2..0000000 --- a/lib/Target/ARM64/Disassembler/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARM64Disassembler - ARM64Disassembler.cpp - ) -# workaround for hanging compilation on MSVC8, 9 and 10 -#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) -#set_property( -# SOURCE ARMDisassembler.cpp -# PROPERTY COMPILE_FLAGS "/Od" -# ) -#endif() -add_dependencies(LLVMARM64Disassembler ARM64CommonTableGen) diff --git a/lib/Target/ARM64/Disassembler/LLVMBuild.txt b/lib/Target/ARM64/Disassembler/LLVMBuild.txt deleted file mode 100644 index 5935ee6..0000000 --- a/lib/Target/ARM64/Disassembler/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/ARM64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ARM64Disassembler -parent = ARM64 -required_libraries = ARM64Desc ARM64Info MC Support -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/Disassembler/Makefile b/lib/Target/ARM64/Disassembler/Makefile deleted file mode 100644 index 479d00c..0000000 --- a/lib/Target/ARM64/Disassembler/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/ARM64/Disassembler/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64Disassembler - -# Hack: we need to include 'main' arm target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp deleted file mode 100644 index bb90707..0000000 --- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp +++ /dev/null @@ -1,1428 +0,0 @@ -//===-- ARM64InstPrinter.cpp - Convert ARM64 MCInst to assembly syntax ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an ARM64 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "ARM64InstPrinter.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define GET_INSTRUCTION_NAME -#define PRINT_ALIAS_INSTR -#include "ARM64GenAsmWriter.inc" -#define GET_INSTRUCTION_NAME -#define PRINT_ALIAS_INSTR -#include "ARM64GenAsmWriter1.inc" - -ARM64InstPrinter::ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} - -ARM64AppleInstPrinter::ARM64AppleInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : ARM64InstPrinter(MAI, MII, MRI, STI) {} - -void ARM64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - // This is for .cfi directives. - OS << getRegisterName(RegNo); -} - -void ARM64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - // Check for special encodings and print the cannonical alias instead. - - unsigned Opcode = MI->getOpcode(); - - if (Opcode == ARM64::SYS || Opcode == ARM64::SYSxt) - if (printSysAlias(MI, O)) { - printAnnotation(O, Annot); - return; - } - - // TBZ/TBNZ should print the register operand as a Wreg if the bit - // number is < 32. - if ((Opcode == ARM64::TBNZ || Opcode == ARM64::TBZ) && - MI->getOperand(1).getImm() < 32) { - MCInst newMI = *MI; - unsigned Reg = MI->getOperand(0).getReg(); - newMI.getOperand(0).setReg(getWRegFromXReg(Reg)); - printInstruction(&newMI, O); - printAnnotation(O, Annot); - return; - } - - // SBFM/UBFM should print to a nicer aliased form if possible. - if (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri || - Opcode == ARM64::UBFMXri || Opcode == ARM64::UBFMWri) { - const MCOperand &Op0 = MI->getOperand(0); - const MCOperand &Op1 = MI->getOperand(1); - const MCOperand &Op2 = MI->getOperand(2); - const MCOperand &Op3 = MI->getOperand(3); - - if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) { - bool IsSigned = (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri); - const char *AsmMnemonic = 0; - - switch (Op3.getImm()) { - default: - break; - case 7: - AsmMnemonic = IsSigned ? "sxtb" : "uxtb"; - break; - case 15: - AsmMnemonic = IsSigned ? "sxth" : "uxth"; - break; - case 31: - AsmMnemonic = IsSigned ? "sxtw" : "uxtw"; - break; - } - - if (AsmMnemonic) { - O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg()) - << ", " << getRegisterName(Op1.getReg()); - printAnnotation(O, Annot); - return; - } - } - - // All immediate shifts are aliases, implemented using the Bitfield - // instruction. In all cases the immediate shift amount shift must be in - // the range 0 to (reg.size -1). - if (Op2.isImm() && Op3.isImm()) { - const char *AsmMnemonic = 0; - int shift = 0; - int64_t immr = Op2.getImm(); - int64_t imms = Op3.getImm(); - if (Opcode == ARM64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) { - AsmMnemonic = "lsl"; - shift = 31 - imms; - } else if (Opcode == ARM64::UBFMXri && imms != 0x3f && - ((imms + 1 == immr))) { - AsmMnemonic = "lsl"; - shift = 63 - imms; - } else if (Opcode == ARM64::UBFMWri && imms == 0x1f) { - AsmMnemonic = "lsr"; - shift = immr; - } else if (Opcode == ARM64::UBFMXri && imms == 0x3f) { - AsmMnemonic = "lsr"; - shift = immr; - } else if (Opcode == ARM64::SBFMWri && imms == 0x1f) { - AsmMnemonic = "asr"; - shift = immr; - } else if (Opcode == ARM64::SBFMXri && imms == 0x3f) { - AsmMnemonic = "asr"; - shift = immr; - } - if (AsmMnemonic) { - O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg()) - << ", " << getRegisterName(Op1.getReg()) << ", #" << shift; - printAnnotation(O, Annot); - return; - } - } - } - - // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift - // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be - // printed. - if ((Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi || - Opcode == ARM64::MOVNXi || Opcode == ARM64::MOVNWi) && - MI->getOperand(1).isExpr()) { - if (Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi) - O << "\tmovz\t"; - else - O << "\tmovn\t"; - - O << getRegisterName(MI->getOperand(0).getReg()) << ", #" - << *MI->getOperand(1).getExpr(); - return; - } - - if ((Opcode == ARM64::MOVKXi || Opcode == ARM64::MOVKWi) && - MI->getOperand(2).isExpr()) { - O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #" - << *MI->getOperand(2).getExpr(); - return; - } - - // ANDS WZR, Wn, #imm ==> TST Wn, #imm - // ANDS XZR, Xn, #imm ==> TST Xn, #imm - if (Opcode == ARM64::ANDSWri && MI->getOperand(0).getReg() == ARM64::WZR) { - O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printLogicalImm32(MI, 2, O); - return; - } - if (Opcode == ARM64::ANDSXri && MI->getOperand(0).getReg() == ARM64::XZR) { - O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printLogicalImm64(MI, 2, O); - return; - } - // ANDS WZR, Wn, Wm{, lshift #imm} ==> TST Wn{, lshift #imm} - // ANDS XZR, Xn, Xm{, lshift #imm} ==> TST Xn{, lshift #imm} - if ((Opcode == ARM64::ANDSWrs && MI->getOperand(0).getReg() == ARM64::WZR) || - (Opcode == ARM64::ANDSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) { - O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printShiftedRegister(MI, 2, O); - return; - } - - // SUBS WZR, Wn, #imm ==> CMP Wn, #imm - // SUBS XZR, Xn, #imm ==> CMP Xn, #imm - if ((Opcode == ARM64::SUBSWri && MI->getOperand(0).getReg() == ARM64::WZR) || - (Opcode == ARM64::SUBSXri && MI->getOperand(0).getReg() == ARM64::XZR)) { - O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printAddSubImm(MI, 2, O); - return; - } - // SUBS WZR, Wn, Wm{, lshift #imm} ==> CMP Wn, Wm{, lshift #imm} - // SUBS XZR, Xn, Xm{, lshift #imm} ==> CMP Xn, Xm{, lshift #imm} - if ((Opcode == ARM64::SUBSWrs && MI->getOperand(0).getReg() == ARM64::WZR) || - (Opcode == ARM64::SUBSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) { - O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printShiftedRegister(MI, 2, O); - return; - } - // SUBS XZR, Xn, Wm, uxtb #imm ==> CMP Xn, uxtb #imm - // SUBS WZR, Wn, Xm, uxtb #imm ==> CMP Wn, uxtb #imm - if ((Opcode == ARM64::SUBSXrx && MI->getOperand(0).getReg() == ARM64::XZR) || - (Opcode == ARM64::SUBSWrx && MI->getOperand(0).getReg() == ARM64::WZR)) { - O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printExtendedRegister(MI, 2, O); - return; - } - // SUBS XZR, Xn, Xm, uxtx #imm ==> CMP Xn, uxtb #imm - if (Opcode == ARM64::SUBSXrx64 && MI->getOperand(0).getReg() == ARM64::XZR) { - O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", " - << getRegisterName(MI->getOperand(2).getReg()); - printExtend(MI, 3, O); - return; - } - - // ADDS WZR, Wn, #imm ==> CMN Wn, #imm - // ADDS XZR, Xn, #imm ==> CMN Xn, #imm - if ((Opcode == ARM64::ADDSWri && MI->getOperand(0).getReg() == ARM64::WZR) || - (Opcode == ARM64::ADDSXri && MI->getOperand(0).getReg() == ARM64::XZR)) { - O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printAddSubImm(MI, 2, O); - return; - } - // ADDS WZR, Wn, Wm{, lshift #imm} ==> CMN Wn, Wm{, lshift #imm} - // ADDS XZR, Xn, Xm{, lshift #imm} ==> CMN Xn, Xm{, lshift #imm} - if ((Opcode == ARM64::ADDSWrs && MI->getOperand(0).getReg() == ARM64::WZR) || - (Opcode == ARM64::ADDSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) { - O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printShiftedRegister(MI, 2, O); - return; - } - // ADDS XZR, Xn, Wm, uxtb #imm ==> CMN Xn, uxtb #imm - if (Opcode == ARM64::ADDSXrx && MI->getOperand(0).getReg() == ARM64::XZR) { - O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printExtendedRegister(MI, 2, O); - return; - } - // ADDS XZR, Xn, Xm, uxtx #imm ==> CMN Xn, uxtb #imm - if (Opcode == ARM64::ADDSXrx64 && MI->getOperand(0).getReg() == ARM64::XZR) { - O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", " - << getRegisterName(MI->getOperand(2).getReg()); - printExtend(MI, 3, O); - return; - } - - if (!printAliasInstr(MI, O)) - printInstruction(MI, O); - - printAnnotation(O, Annot); -} - -static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout, - bool &IsTbx) { - switch (Opcode) { - case ARM64::TBXv8i8One: - case ARM64::TBXv8i8Two: - case ARM64::TBXv8i8Three: - case ARM64::TBXv8i8Four: - IsTbx = true; - Layout = ".8b"; - return true; - case ARM64::TBLv8i8One: - case ARM64::TBLv8i8Two: - case ARM64::TBLv8i8Three: - case ARM64::TBLv8i8Four: - IsTbx = false; - Layout = ".8b"; - return true; - case ARM64::TBXv16i8One: - case ARM64::TBXv16i8Two: - case ARM64::TBXv16i8Three: - case ARM64::TBXv16i8Four: - IsTbx = true; - Layout = ".16b"; - return true; - case ARM64::TBLv16i8One: - case ARM64::TBLv16i8Two: - case ARM64::TBLv16i8Three: - case ARM64::TBLv16i8Four: - IsTbx = false; - Layout = ".16b"; - return true; - default: - return false; - } -} - -struct LdStNInstrDesc { - unsigned Opcode; - const char *Mnemonic; - const char *Layout; - int LaneOperand; - int NaturalOffset; -}; - -static LdStNInstrDesc LdStNInstInfo[] = { - { ARM64::LD1i8, "ld1", ".b", 2, 0 }, - { ARM64::LD1i16, "ld1", ".h", 2, 0 }, - { ARM64::LD1i32, "ld1", ".s", 2, 0 }, - { ARM64::LD1i64, "ld1", ".d", 2, 0 }, - { ARM64::LD1i8_POST, "ld1", ".b", 2, 1 }, - { ARM64::LD1i16_POST, "ld1", ".h", 2, 2 }, - { ARM64::LD1i32_POST, "ld1", ".s", 2, 4 }, - { ARM64::LD1i64_POST, "ld1", ".d", 2, 8 }, - { ARM64::LD1Rv16b, "ld1r", ".16b", 0, 0 }, - { ARM64::LD1Rv8h, "ld1r", ".8h", 0, 0 }, - { ARM64::LD1Rv4s, "ld1r", ".4s", 0, 0 }, - { ARM64::LD1Rv2d, "ld1r", ".2d", 0, 0 }, - { ARM64::LD1Rv8b, "ld1r", ".8b", 0, 0 }, - { ARM64::LD1Rv4h, "ld1r", ".4h", 0, 0 }, - { ARM64::LD1Rv2s, "ld1r", ".2s", 0, 0 }, - { ARM64::LD1Rv1d, "ld1r", ".1d", 0, 0 }, - { ARM64::LD1Rv16b_POST, "ld1r", ".16b", 0, 1 }, - { ARM64::LD1Rv8h_POST, "ld1r", ".8h", 0, 2 }, - { ARM64::LD1Rv4s_POST, "ld1r", ".4s", 0, 4 }, - { ARM64::LD1Rv2d_POST, "ld1r", ".2d", 0, 8 }, - { ARM64::LD1Rv8b_POST, "ld1r", ".8b", 0, 1 }, - { ARM64::LD1Rv4h_POST, "ld1r", ".4h", 0, 2 }, - { ARM64::LD1Rv2s_POST, "ld1r", ".2s", 0, 4 }, - { ARM64::LD1Rv1d_POST, "ld1r", ".1d", 0, 8 }, - { ARM64::LD1Onev16b, "ld1", ".16b", 0, 0 }, - { ARM64::LD1Onev8h, "ld1", ".8h", 0, 0 }, - { ARM64::LD1Onev4s, "ld1", ".4s", 0, 0 }, - { ARM64::LD1Onev2d, "ld1", ".2d", 0, 0 }, - { ARM64::LD1Onev8b, "ld1", ".8b", 0, 0 }, - { ARM64::LD1Onev4h, "ld1", ".4h", 0, 0 }, - { ARM64::LD1Onev2s, "ld1", ".2s", 0, 0 }, - { ARM64::LD1Onev1d, "ld1", ".1d", 0, 0 }, - { ARM64::LD1Onev16b_POST, "ld1", ".16b", 0, 16 }, - { ARM64::LD1Onev8h_POST, "ld1", ".8h", 0, 16 }, - { ARM64::LD1Onev4s_POST, "ld1", ".4s", 0, 16 }, - { ARM64::LD1Onev2d_POST, "ld1", ".2d", 0, 16 }, - { ARM64::LD1Onev8b_POST, "ld1", ".8b", 0, 8 }, - { ARM64::LD1Onev4h_POST, "ld1", ".4h", 0, 8 }, - { ARM64::LD1Onev2s_POST, "ld1", ".2s", 0, 8 }, - { ARM64::LD1Onev1d_POST, "ld1", ".1d", 0, 8 }, - { ARM64::LD1Twov16b, "ld1", ".16b", 0, 0 }, - { ARM64::LD1Twov8h, "ld1", ".8h", 0, 0 }, - { ARM64::LD1Twov4s, "ld1", ".4s", 0, 0 }, - { ARM64::LD1Twov2d, "ld1", ".2d", 0, 0 }, - { ARM64::LD1Twov8b, "ld1", ".8b", 0, 0 }, - { ARM64::LD1Twov4h, "ld1", ".4h", 0, 0 }, - { ARM64::LD1Twov2s, "ld1", ".2s", 0, 0 }, - { ARM64::LD1Twov1d, "ld1", ".1d", 0, 0 }, - { ARM64::LD1Twov16b_POST, "ld1", ".16b", 0, 32 }, - { ARM64::LD1Twov8h_POST, "ld1", ".8h", 0, 32 }, - { ARM64::LD1Twov4s_POST, "ld1", ".4s", 0, 32 }, - { ARM64::LD1Twov2d_POST, "ld1", ".2d", 0, 32 }, - { ARM64::LD1Twov8b_POST, "ld1", ".8b", 0, 16 }, - { ARM64::LD1Twov4h_POST, "ld1", ".4h", 0, 16 }, - { ARM64::LD1Twov2s_POST, "ld1", ".2s", 0, 16 }, - { ARM64::LD1Twov1d_POST, "ld1", ".1d", 0, 16 }, - { ARM64::LD1Threev16b, "ld1", ".16b", 0, 0 }, - { ARM64::LD1Threev8h, "ld1", ".8h", 0, 0 }, - { ARM64::LD1Threev4s, "ld1", ".4s", 0, 0 }, - { ARM64::LD1Threev2d, "ld1", ".2d", 0, 0 }, - { ARM64::LD1Threev8b, "ld1", ".8b", 0, 0 }, - { ARM64::LD1Threev4h, "ld1", ".4h", 0, 0 }, - { ARM64::LD1Threev2s, "ld1", ".2s", 0, 0 }, - { ARM64::LD1Threev1d, "ld1", ".1d", 0, 0 }, - { ARM64::LD1Threev16b_POST, "ld1", ".16b", 0, 48 }, - { ARM64::LD1Threev8h_POST, "ld1", ".8h", 0, 48 }, - { ARM64::LD1Threev4s_POST, "ld1", ".4s", 0, 48 }, - { ARM64::LD1Threev2d_POST, "ld1", ".2d", 0, 48 }, - { ARM64::LD1Threev8b_POST, "ld1", ".8b", 0, 24 }, - { ARM64::LD1Threev4h_POST, "ld1", ".4h", 0, 24 }, - { ARM64::LD1Threev2s_POST, "ld1", ".2s", 0, 24 }, - { ARM64::LD1Threev1d_POST, "ld1", ".1d", 0, 24 }, - { ARM64::LD1Fourv16b, "ld1", ".16b", 0, 0 }, - { ARM64::LD1Fourv8h, "ld1", ".8h", 0, 0 }, - { ARM64::LD1Fourv4s, "ld1", ".4s", 0, 0 }, - { ARM64::LD1Fourv2d, "ld1", ".2d", 0, 0 }, - { ARM64::LD1Fourv8b, "ld1", ".8b", 0, 0 }, - { ARM64::LD1Fourv4h, "ld1", ".4h", 0, 0 }, - { ARM64::LD1Fourv2s, "ld1", ".2s", 0, 0 }, - { ARM64::LD1Fourv1d, "ld1", ".1d", 0, 0 }, - { ARM64::LD1Fourv16b_POST, "ld1", ".16b", 0, 64 }, - { ARM64::LD1Fourv8h_POST, "ld1", ".8h", 0, 64 }, - { ARM64::LD1Fourv4s_POST, "ld1", ".4s", 0, 64 }, - { ARM64::LD1Fourv2d_POST, "ld1", ".2d", 0, 64 }, - { ARM64::LD1Fourv8b_POST, "ld1", ".8b", 0, 32 }, - { ARM64::LD1Fourv4h_POST, "ld1", ".4h", 0, 32 }, - { ARM64::LD1Fourv2s_POST, "ld1", ".2s", 0, 32 }, - { ARM64::LD1Fourv1d_POST, "ld1", ".1d", 0, 32 }, - { ARM64::LD2i8, "ld2", ".b", 2, 0 }, - { ARM64::LD2i16, "ld2", ".h", 2, 0 }, - { ARM64::LD2i32, "ld2", ".s", 2, 0 }, - { ARM64::LD2i64, "ld2", ".d", 2, 0 }, - { ARM64::LD2i8_POST, "ld2", ".b", 2, 2 }, - { ARM64::LD2i16_POST, "ld2", ".h", 2, 4 }, - { ARM64::LD2i32_POST, "ld2", ".s", 2, 8 }, - { ARM64::LD2i64_POST, "ld2", ".d", 2, 16 }, - { ARM64::LD2Rv16b, "ld2r", ".16b", 0, 0 }, - { ARM64::LD2Rv8h, "ld2r", ".8h", 0, 0 }, - { ARM64::LD2Rv4s, "ld2r", ".4s", 0, 0 }, - { ARM64::LD2Rv2d, "ld2r", ".2d", 0, 0 }, - { ARM64::LD2Rv8b, "ld2r", ".8b", 0, 0 }, - { ARM64::LD2Rv4h, "ld2r", ".4h", 0, 0 }, - { ARM64::LD2Rv2s, "ld2r", ".2s", 0, 0 }, - { ARM64::LD2Rv1d, "ld2r", ".1d", 0, 0 }, - { ARM64::LD2Rv16b_POST, "ld2r", ".16b", 0, 2 }, - { ARM64::LD2Rv8h_POST, "ld2r", ".8h", 0, 4 }, - { ARM64::LD2Rv4s_POST, "ld2r", ".4s", 0, 8 }, - { ARM64::LD2Rv2d_POST, "ld2r", ".2d", 0, 16 }, - { ARM64::LD2Rv8b_POST, "ld2r", ".8b", 0, 2 }, - { ARM64::LD2Rv4h_POST, "ld2r", ".4h", 0, 4 }, - { ARM64::LD2Rv2s_POST, "ld2r", ".2s", 0, 8 }, - { ARM64::LD2Rv1d_POST, "ld2r", ".1d", 0, 16 }, - { ARM64::LD2Twov16b, "ld2", ".16b", 0, 0 }, - { ARM64::LD2Twov8h, "ld2", ".8h", 0, 0 }, - { ARM64::LD2Twov4s, "ld2", ".4s", 0, 0 }, - { ARM64::LD2Twov2d, "ld2", ".2d", 0, 0 }, - { ARM64::LD2Twov8b, "ld2", ".8b", 0, 0 }, - { ARM64::LD2Twov4h, "ld2", ".4h", 0, 0 }, - { ARM64::LD2Twov2s, "ld2", ".2s", 0, 0 }, - { ARM64::LD2Twov16b_POST, "ld2", ".16b", 0, 32 }, - { ARM64::LD2Twov8h_POST, "ld2", ".8h", 0, 32 }, - { ARM64::LD2Twov4s_POST, "ld2", ".4s", 0, 32 }, - { ARM64::LD2Twov2d_POST, "ld2", ".2d", 0, 32 }, - { ARM64::LD2Twov8b_POST, "ld2", ".8b", 0, 16 }, - { ARM64::LD2Twov4h_POST, "ld2", ".4h", 0, 16 }, - { ARM64::LD2Twov2s_POST, "ld2", ".2s", 0, 16 }, - { ARM64::LD3i8, "ld3", ".b", 2, 0 }, - { ARM64::LD3i16, "ld3", ".h", 2, 0 }, - { ARM64::LD3i32, "ld3", ".s", 2, 0 }, - { ARM64::LD3i64, "ld3", ".d", 2, 0 }, - { ARM64::LD3i8_POST, "ld3", ".b", 2, 3 }, - { ARM64::LD3i16_POST, "ld3", ".h", 2, 6 }, - { ARM64::LD3i32_POST, "ld3", ".s", 2, 12 }, - { ARM64::LD3i64_POST, "ld3", ".d", 2, 24 }, - { ARM64::LD3Rv16b, "ld3r", ".16b", 0, 0 }, - { ARM64::LD3Rv8h, "ld3r", ".8h", 0, 0 }, - { ARM64::LD3Rv4s, "ld3r", ".4s", 0, 0 }, - { ARM64::LD3Rv2d, "ld3r", ".2d", 0, 0 }, - { ARM64::LD3Rv8b, "ld3r", ".8b", 0, 0 }, - { ARM64::LD3Rv4h, "ld3r", ".4h", 0, 0 }, - { ARM64::LD3Rv2s, "ld3r", ".2s", 0, 0 }, - { ARM64::LD3Rv1d, "ld3r", ".1d", 0, 0 }, - { ARM64::LD3Rv16b_POST, "ld3r", ".16b", 0, 3 }, - { ARM64::LD3Rv8h_POST, "ld3r", ".8h", 0, 6 }, - { ARM64::LD3Rv4s_POST, "ld3r", ".4s", 0, 12 }, - { ARM64::LD3Rv2d_POST, "ld3r", ".2d", 0, 24 }, - { ARM64::LD3Rv8b_POST, "ld3r", ".8b", 0, 3 }, - { ARM64::LD3Rv4h_POST, "ld3r", ".4h", 0, 6 }, - { ARM64::LD3Rv2s_POST, "ld3r", ".2s", 0, 12 }, - { ARM64::LD3Rv1d_POST, "ld3r", ".1d", 0, 24 }, - { ARM64::LD3Threev16b, "ld3", ".16b", 0, 0 }, - { ARM64::LD3Threev8h, "ld3", ".8h", 0, 0 }, - { ARM64::LD3Threev4s, "ld3", ".4s", 0, 0 }, - { ARM64::LD3Threev2d, "ld3", ".2d", 0, 0 }, - { ARM64::LD3Threev8b, "ld3", ".8b", 0, 0 }, - { ARM64::LD3Threev4h, "ld3", ".4h", 0, 0 }, - { ARM64::LD3Threev2s, "ld3", ".2s", 0, 0 }, - { ARM64::LD3Threev16b_POST, "ld3", ".16b", 0, 48 }, - { ARM64::LD3Threev8h_POST, "ld3", ".8h", 0, 48 }, - { ARM64::LD3Threev4s_POST, "ld3", ".4s", 0, 48 }, - { ARM64::LD3Threev2d_POST, "ld3", ".2d", 0, 48 }, - { ARM64::LD3Threev8b_POST, "ld3", ".8b", 0, 24 }, - { ARM64::LD3Threev4h_POST, "ld3", ".4h", 0, 24 }, - { ARM64::LD3Threev2s_POST, "ld3", ".2s", 0, 24 }, - { ARM64::LD4i8, "ld4", ".b", 2, 0 }, - { ARM64::LD4i16, "ld4", ".h", 2, 0 }, - { ARM64::LD4i32, "ld4", ".s", 2, 0 }, - { ARM64::LD4i64, "ld4", ".d", 2, 0 }, - { ARM64::LD4i8_POST, "ld4", ".b", 2, 4 }, - { ARM64::LD4i16_POST, "ld4", ".h", 2, 8 }, - { ARM64::LD4i32_POST, "ld4", ".s", 2, 16 }, - { ARM64::LD4i64_POST, "ld4", ".d", 2, 32 }, - { ARM64::LD4Rv16b, "ld4r", ".16b", 0, 0 }, - { ARM64::LD4Rv8h, "ld4r", ".8h", 0, 0 }, - { ARM64::LD4Rv4s, "ld4r", ".4s", 0, 0 }, - { ARM64::LD4Rv2d, "ld4r", ".2d", 0, 0 }, - { ARM64::LD4Rv8b, "ld4r", ".8b", 0, 0 }, - { ARM64::LD4Rv4h, "ld4r", ".4h", 0, 0 }, - { ARM64::LD4Rv2s, "ld4r", ".2s", 0, 0 }, - { ARM64::LD4Rv1d, "ld4r", ".1d", 0, 0 }, - { ARM64::LD4Rv16b_POST, "ld4r", ".16b", 0, 4 }, - { ARM64::LD4Rv8h_POST, "ld4r", ".8h", 0, 8 }, - { ARM64::LD4Rv4s_POST, "ld4r", ".4s", 0, 16 }, - { ARM64::LD4Rv2d_POST, "ld4r", ".2d", 0, 32 }, - { ARM64::LD4Rv8b_POST, "ld4r", ".8b", 0, 4 }, - { ARM64::LD4Rv4h_POST, "ld4r", ".4h", 0, 8 }, - { ARM64::LD4Rv2s_POST, "ld4r", ".2s", 0, 16 }, - { ARM64::LD4Rv1d_POST, "ld4r", ".1d", 0, 32 }, - { ARM64::LD4Fourv16b, "ld4", ".16b", 0, 0 }, - { ARM64::LD4Fourv8h, "ld4", ".8h", 0, 0 }, - { ARM64::LD4Fourv4s, "ld4", ".4s", 0, 0 }, - { ARM64::LD4Fourv2d, "ld4", ".2d", 0, 0 }, - { ARM64::LD4Fourv8b, "ld4", ".8b", 0, 0 }, - { ARM64::LD4Fourv4h, "ld4", ".4h", 0, 0 }, - { ARM64::LD4Fourv2s, "ld4", ".2s", 0, 0 }, - { ARM64::LD4Fourv16b_POST, "ld4", ".16b", 0, 64 }, - { ARM64::LD4Fourv8h_POST, "ld4", ".8h", 0, 64 }, - { ARM64::LD4Fourv4s_POST, "ld4", ".4s", 0, 64 }, - { ARM64::LD4Fourv2d_POST, "ld4", ".2d", 0, 64 }, - { ARM64::LD4Fourv8b_POST, "ld4", ".8b", 0, 32 }, - { ARM64::LD4Fourv4h_POST, "ld4", ".4h", 0, 32 }, - { ARM64::LD4Fourv2s_POST, "ld4", ".2s", 0, 32 }, - { ARM64::ST1i8, "st1", ".b", 1, 0 }, - { ARM64::ST1i16, "st1", ".h", 1, 0 }, - { ARM64::ST1i32, "st1", ".s", 1, 0 }, - { ARM64::ST1i64, "st1", ".d", 1, 0 }, - { ARM64::ST1i8_POST, "st1", ".b", 1, 1 }, - { ARM64::ST1i16_POST, "st1", ".h", 1, 2 }, - { ARM64::ST1i32_POST, "st1", ".s", 1, 4 }, - { ARM64::ST1i64_POST, "st1", ".d", 1, 8 }, - { ARM64::ST1Onev16b, "st1", ".16b", 0, 0 }, - { ARM64::ST1Onev8h, "st1", ".8h", 0, 0 }, - { ARM64::ST1Onev4s, "st1", ".4s", 0, 0 }, - { ARM64::ST1Onev2d, "st1", ".2d", 0, 0 }, - { ARM64::ST1Onev8b, "st1", ".8b", 0, 0 }, - { ARM64::ST1Onev4h, "st1", ".4h", 0, 0 }, - { ARM64::ST1Onev2s, "st1", ".2s", 0, 0 }, - { ARM64::ST1Onev1d, "st1", ".1d", 0, 0 }, - { ARM64::ST1Onev16b_POST, "st1", ".16b", 0, 16 }, - { ARM64::ST1Onev8h_POST, "st1", ".8h", 0, 16 }, - { ARM64::ST1Onev4s_POST, "st1", ".4s", 0, 16 }, - { ARM64::ST1Onev2d_POST, "st1", ".2d", 0, 16 }, - { ARM64::ST1Onev8b_POST, "st1", ".8b", 0, 8 }, - { ARM64::ST1Onev4h_POST, "st1", ".4h", 0, 8 }, - { ARM64::ST1Onev2s_POST, "st1", ".2s", 0, 8 }, - { ARM64::ST1Onev1d_POST, "st1", ".1d", 0, 8 }, - { ARM64::ST1Twov16b, "st1", ".16b", 0, 0 }, - { ARM64::ST1Twov8h, "st1", ".8h", 0, 0 }, - { ARM64::ST1Twov4s, "st1", ".4s", 0, 0 }, - { ARM64::ST1Twov2d, "st1", ".2d", 0, 0 }, - { ARM64::ST1Twov8b, "st1", ".8b", 0, 0 }, - { ARM64::ST1Twov4h, "st1", ".4h", 0, 0 }, - { ARM64::ST1Twov2s, "st1", ".2s", 0, 0 }, - { ARM64::ST1Twov1d, "st1", ".1d", 0, 0 }, - { ARM64::ST1Twov16b_POST, "st1", ".16b", 0, 32 }, - { ARM64::ST1Twov8h_POST, "st1", ".8h", 0, 32 }, - { ARM64::ST1Twov4s_POST, "st1", ".4s", 0, 32 }, - { ARM64::ST1Twov2d_POST, "st1", ".2d", 0, 32 }, - { ARM64::ST1Twov8b_POST, "st1", ".8b", 0, 16 }, - { ARM64::ST1Twov4h_POST, "st1", ".4h", 0, 16 }, - { ARM64::ST1Twov2s_POST, "st1", ".2s", 0, 16 }, - { ARM64::ST1Twov1d_POST, "st1", ".1d", 0, 16 }, - { ARM64::ST1Threev16b, "st1", ".16b", 0, 0 }, - { ARM64::ST1Threev8h, "st1", ".8h", 0, 0 }, - { ARM64::ST1Threev4s, "st1", ".4s", 0, 0 }, - { ARM64::ST1Threev2d, "st1", ".2d", 0, 0 }, - { ARM64::ST1Threev8b, "st1", ".8b", 0, 0 }, - { ARM64::ST1Threev4h, "st1", ".4h", 0, 0 }, - { ARM64::ST1Threev2s, "st1", ".2s", 0, 0 }, - { ARM64::ST1Threev1d, "st1", ".1d", 0, 0 }, - { ARM64::ST1Threev16b_POST, "st1", ".16b", 0, 48 }, - { ARM64::ST1Threev8h_POST, "st1", ".8h", 0, 48 }, - { ARM64::ST1Threev4s_POST, "st1", ".4s", 0, 48 }, - { ARM64::ST1Threev2d_POST, "st1", ".2d", 0, 48 }, - { ARM64::ST1Threev8b_POST, "st1", ".8b", 0, 24 }, - { ARM64::ST1Threev4h_POST, "st1", ".4h", 0, 24 }, - { ARM64::ST1Threev2s_POST, "st1", ".2s", 0, 24 }, - { ARM64::ST1Threev1d_POST, "st1", ".1d", 0, 24 }, - { ARM64::ST1Fourv16b, "st1", ".16b", 0, 0 }, - { ARM64::ST1Fourv8h, "st1", ".8h", 0, 0 }, - { ARM64::ST1Fourv4s, "st1", ".4s", 0, 0 }, - { ARM64::ST1Fourv2d, "st1", ".2d", 0, 0 }, - { ARM64::ST1Fourv8b, "st1", ".8b", 0, 0 }, - { ARM64::ST1Fourv4h, "st1", ".4h", 0, 0 }, - { ARM64::ST1Fourv2s, "st1", ".2s", 0, 0 }, - { ARM64::ST1Fourv1d, "st1", ".1d", 0, 0 }, - { ARM64::ST1Fourv16b_POST, "st1", ".16b", 0, 64 }, - { ARM64::ST1Fourv8h_POST, "st1", ".8h", 0, 64 }, - { ARM64::ST1Fourv4s_POST, "st1", ".4s", 0, 64 }, - { ARM64::ST1Fourv2d_POST, "st1", ".2d", 0, 64 }, - { ARM64::ST1Fourv8b_POST, "st1", ".8b", 0, 32 }, - { ARM64::ST1Fourv4h_POST, "st1", ".4h", 0, 32 }, - { ARM64::ST1Fourv2s_POST, "st1", ".2s", 0, 32 }, - { ARM64::ST1Fourv1d_POST, "st1", ".1d", 0, 32 }, - { ARM64::ST2i8, "st2", ".b", 1, 0 }, - { ARM64::ST2i16, "st2", ".h", 1, 0 }, - { ARM64::ST2i32, "st2", ".s", 1, 0 }, - { ARM64::ST2i64, "st2", ".d", 1, 0 }, - { ARM64::ST2i8_POST, "st2", ".b", 1, 2 }, - { ARM64::ST2i16_POST, "st2", ".h", 1, 4 }, - { ARM64::ST2i32_POST, "st2", ".s", 1, 8 }, - { ARM64::ST2i64_POST, "st2", ".d", 1, 16 }, - { ARM64::ST2Twov16b, "st2", ".16b", 0, 0 }, - { ARM64::ST2Twov8h, "st2", ".8h", 0, 0 }, - { ARM64::ST2Twov4s, "st2", ".4s", 0, 0 }, - { ARM64::ST2Twov2d, "st2", ".2d", 0, 0 }, - { ARM64::ST2Twov8b, "st2", ".8b", 0, 0 }, - { ARM64::ST2Twov4h, "st2", ".4h", 0, 0 }, - { ARM64::ST2Twov2s, "st2", ".2s", 0, 0 }, - { ARM64::ST2Twov16b_POST, "st2", ".16b", 0, 32 }, - { ARM64::ST2Twov8h_POST, "st2", ".8h", 0, 32 }, - { ARM64::ST2Twov4s_POST, "st2", ".4s", 0, 32 }, - { ARM64::ST2Twov2d_POST, "st2", ".2d", 0, 32 }, - { ARM64::ST2Twov8b_POST, "st2", ".8b", 0, 16 }, - { ARM64::ST2Twov4h_POST, "st2", ".4h", 0, 16 }, - { ARM64::ST2Twov2s_POST, "st2", ".2s", 0, 16 }, - { ARM64::ST3i8, "st3", ".b", 1, 0 }, - { ARM64::ST3i16, "st3", ".h", 1, 0 }, - { ARM64::ST3i32, "st3", ".s", 1, 0 }, - { ARM64::ST3i64, "st3", ".d", 1, 0 }, - { ARM64::ST3i8_POST, "st3", ".b", 1, 3 }, - { ARM64::ST3i16_POST, "st3", ".h", 1, 6 }, - { ARM64::ST3i32_POST, "st3", ".s", 1, 12 }, - { ARM64::ST3i64_POST, "st3", ".d", 1, 24 }, - { ARM64::ST3Threev16b, "st3", ".16b", 0, 0 }, - { ARM64::ST3Threev8h, "st3", ".8h", 0, 0 }, - { ARM64::ST3Threev4s, "st3", ".4s", 0, 0 }, - { ARM64::ST3Threev2d, "st3", ".2d", 0, 0 }, - { ARM64::ST3Threev8b, "st3", ".8b", 0, 0 }, - { ARM64::ST3Threev4h, "st3", ".4h", 0, 0 }, - { ARM64::ST3Threev2s, "st3", ".2s", 0, 0 }, - { ARM64::ST3Threev16b_POST, "st3", ".16b", 0, 48 }, - { ARM64::ST3Threev8h_POST, "st3", ".8h", 0, 48 }, - { ARM64::ST3Threev4s_POST, "st3", ".4s", 0, 48 }, - { ARM64::ST3Threev2d_POST, "st3", ".2d", 0, 48 }, - { ARM64::ST3Threev8b_POST, "st3", ".8b", 0, 24 }, - { ARM64::ST3Threev4h_POST, "st3", ".4h", 0, 24 }, - { ARM64::ST3Threev2s_POST, "st3", ".2s", 0, 24 }, - { ARM64::ST4i8, "st4", ".b", 1, 0 }, - { ARM64::ST4i16, "st4", ".h", 1, 0 }, - { ARM64::ST4i32, "st4", ".s", 1, 0 }, - { ARM64::ST4i64, "st4", ".d", 1, 0 }, - { ARM64::ST4i8_POST, "st4", ".b", 1, 4 }, - { ARM64::ST4i16_POST, "st4", ".h", 1, 8 }, - { ARM64::ST4i32_POST, "st4", ".s", 1, 16 }, - { ARM64::ST4i64_POST, "st4", ".d", 1, 32 }, - { ARM64::ST4Fourv16b, "st4", ".16b", 0, 0 }, - { ARM64::ST4Fourv8h, "st4", ".8h", 0, 0 }, - { ARM64::ST4Fourv4s, "st4", ".4s", 0, 0 }, - { ARM64::ST4Fourv2d, "st4", ".2d", 0, 0 }, - { ARM64::ST4Fourv8b, "st4", ".8b", 0, 0 }, - { ARM64::ST4Fourv4h, "st4", ".4h", 0, 0 }, - { ARM64::ST4Fourv2s, "st4", ".2s", 0, 0 }, - { ARM64::ST4Fourv16b_POST, "st4", ".16b", 0, 64 }, - { ARM64::ST4Fourv8h_POST, "st4", ".8h", 0, 64 }, - { ARM64::ST4Fourv4s_POST, "st4", ".4s", 0, 64 }, - { ARM64::ST4Fourv2d_POST, "st4", ".2d", 0, 64 }, - { ARM64::ST4Fourv8b_POST, "st4", ".8b", 0, 32 }, - { ARM64::ST4Fourv4h_POST, "st4", ".4h", 0, 32 }, - { ARM64::ST4Fourv2s_POST, "st4", ".2s", 0, 32 }, -}; - -static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) { - unsigned Idx; - for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx) - if (LdStNInstInfo[Idx].Opcode == Opcode) - return &LdStNInstInfo[Idx]; - - return 0; -} - -void ARM64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - unsigned Opcode = MI->getOpcode(); - StringRef Layout, Mnemonic; - - bool IsTbx; - if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) { - O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t' - << getRegisterName(MI->getOperand(0).getReg(), ARM64::vreg) << ", "; - - unsigned ListOpNum = IsTbx ? 2 : 1; - printVectorList(MI, ListOpNum, O, ""); - - O << ", " - << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), ARM64::vreg); - printAnnotation(O, Annot); - return; - } - - if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) { - O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t'; - - // Now onto the operands: first a vector list with possible lane - // specifier. E.g. { v0 }[2] - printVectorList(MI, 0, O, ""); - - if (LdStDesc->LaneOperand != 0) - O << '[' << MI->getOperand(LdStDesc->LaneOperand).getImm() << ']'; - - // Next the address: [xN] - unsigned AddrOpNum = LdStDesc->LaneOperand + 1; - unsigned AddrReg = MI->getOperand(AddrOpNum).getReg(); - O << ", [" << getRegisterName(AddrReg) << ']'; - - // Finally, there might be a post-indexed offset. - if (LdStDesc->NaturalOffset != 0) { - unsigned Reg = MI->getOperand(AddrOpNum + 1).getReg(); - if (Reg != ARM64::XZR) - O << ", " << getRegisterName(Reg); - else { - assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?"); - O << ", #" << LdStDesc->NaturalOffset; - } - } - - printAnnotation(O, Annot); - return; - } - - ARM64InstPrinter::printInst(MI, O, Annot); -} - -bool ARM64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { -#ifndef NDEBUG - unsigned Opcode = MI->getOpcode(); - assert((Opcode == ARM64::SYS || Opcode == ARM64::SYSxt) && - "Invalid opcode for SYS alias!"); -#endif - - const char *Asm = 0; - const MCOperand &Op1 = MI->getOperand(0); - const MCOperand &Cn = MI->getOperand(1); - const MCOperand &Cm = MI->getOperand(2); - const MCOperand &Op2 = MI->getOperand(3); - - unsigned Op1Val = Op1.getImm(); - unsigned CnVal = Cn.getImm(); - unsigned CmVal = Cm.getImm(); - unsigned Op2Val = Op2.getImm(); - - if (CnVal == 7) { - switch (CmVal) { - default: - break; - - // IC aliases - case 1: - if (Op1Val == 0 && Op2Val == 0) - Asm = "ic\tialluis"; - break; - case 5: - if (Op1Val == 0 && Op2Val == 0) - Asm = "ic\tiallu"; - else if (Op1Val == 3 && Op2Val == 1) - Asm = "ic\tivau"; - break; - - // DC aliases - case 4: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tzva"; - break; - case 6: - if (Op1Val == 0 && Op2Val == 1) - Asm = "dc\tivac"; - if (Op1Val == 0 && Op2Val == 2) - Asm = "dc\tisw"; - break; - case 10: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tcvac"; - else if (Op1Val == 0 && Op2Val == 2) - Asm = "dc\tcsw"; - break; - case 11: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tcvau"; - break; - case 14: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tcivac"; - else if (Op1Val == 0 && Op2Val == 2) - Asm = "dc\tcisw"; - break; - - // AT aliases - case 8: - switch (Op1Val) { - default: - break; - case 0: - switch (Op2Val) { - default: - break; - case 0: Asm = "at\ts1e1r"; break; - case 1: Asm = "at\ts1e1w"; break; - case 2: Asm = "at\ts1e0r"; break; - case 3: Asm = "at\ts1e0w"; break; - } - break; - case 4: - switch (Op2Val) { - default: - break; - case 0: Asm = "at\ts1e2r"; break; - case 1: Asm = "at\ts1e2w"; break; - case 4: Asm = "at\ts12e1r"; break; - case 5: Asm = "at\ts12e1w"; break; - case 6: Asm = "at\ts12e0r"; break; - case 7: Asm = "at\ts12e0w"; break; - } - break; - case 6: - switch (Op2Val) { - default: - break; - case 0: Asm = "at\ts1e3r"; break; - case 1: Asm = "at\ts1e3w"; break; - } - break; - } - break; - } - } else if (CnVal == 8) { - // TLBI aliases - switch (CmVal) { - default: - break; - case 3: - switch (Op1Val) { - default: - break; - case 0: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\tvmalle1is"; break; - case 1: Asm = "tlbi\tvae1is"; break; - case 2: Asm = "tlbi\taside1is"; break; - case 3: Asm = "tlbi\tvaae1is"; break; - case 5: Asm = "tlbi\tvale1is"; break; - case 7: Asm = "tlbi\tvaale1is"; break; - } - break; - case 4: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle2is"; break; - case 1: Asm = "tlbi\tvae2is"; break; - case 4: Asm = "tlbi\talle1is"; break; - case 5: Asm = "tlbi\tvale2is"; break; - case 6: Asm = "tlbi\tvmalls12e1is"; break; - } - break; - case 6: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle3is"; break; - case 1: Asm = "tlbi\tvae3is"; break; - case 5: Asm = "tlbi\tvale3is"; break; - } - break; - } - break; - case 4: - switch (Op1Val) { - default: - break; - case 4: - switch (Op2Val) { - default: - break; - case 1: Asm = "tlbi\tipas2e1"; break; - case 5: Asm = "tlbi\tipas2le1"; break; - } - break; - } - break; - case 7: - switch (Op1Val) { - default: - break; - case 0: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\tvmalle1"; break; - case 1: Asm = "tlbi\tvae1"; break; - case 2: Asm = "tlbi\taside1"; break; - case 3: Asm = "tlbi\tvaae1"; break; - case 5: Asm = "tlbi\tvale1"; break; - case 7: Asm = "tlbi\tvaale1"; break; - } - break; - case 4: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle2"; break; - case 1: Asm = "tlbi\tvae2"; break; - case 4: Asm = "tlbi\talle1"; break; - case 5: Asm = "tlbi\tvale2"; break; - case 6: Asm = "tlbi\tvmalls12e1"; break; - } - break; - case 6: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle3"; break; - case 1: Asm = "tlbi\tvae3"; break; - case 5: Asm = "tlbi\tvale3"; break; - } - break; - } - break; - } - } - - if (Asm) { - O << '\t' << Asm; - if (MI->getNumOperands() == 5) - O << ", " << getRegisterName(MI->getOperand(4).getReg()); - } - - return Asm != 0; -} - -void ARM64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - O << getRegisterName(Reg); - } else if (Op.isImm()) { - O << '#' << Op.getImm(); - } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); - } -} - -void ARM64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, - unsigned Imm, raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - if (Reg == ARM64::XZR) - O << "#" << Imm; - else - O << getRegisterName(Reg); - } else - assert(0 && "unknown operand kind in printPostIncOperand64"); -} - -void ARM64InstPrinter::printPostIncOperand1(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 1, O); -} - -void ARM64InstPrinter::printPostIncOperand2(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 2, O); -} - -void ARM64InstPrinter::printPostIncOperand3(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 3, O); -} - -void ARM64InstPrinter::printPostIncOperand4(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 4, O); -} - -void ARM64InstPrinter::printPostIncOperand6(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 6, O); -} - -void ARM64InstPrinter::printPostIncOperand8(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 8, O); -} - -void ARM64InstPrinter::printPostIncOperand12(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 12, O); -} - -void ARM64InstPrinter::printPostIncOperand16(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 16, O); -} - -void ARM64InstPrinter::printPostIncOperand24(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 24, O); -} - -void ARM64InstPrinter::printPostIncOperand32(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 32, O); -} - -void ARM64InstPrinter::printPostIncOperand48(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 48, O); -} - -void ARM64InstPrinter::printPostIncOperand64(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 64, O); -} - -void ARM64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - assert(Op.isReg() && "Non-register vreg operand!"); - unsigned Reg = Op.getReg(); - O << getRegisterName(Reg, ARM64::vreg); -} - -void ARM64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - assert(Op.isImm() && "System instruction C[nm] operands must be immediates!"); - O << "c" << Op.getImm(); -} - -void ARM64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - if (MO.isImm()) { - unsigned Val = (MO.getImm() & 0xfff); - assert(Val == MO.getImm() && "Add/sub immediate out of range!"); - unsigned Shift = - ARM64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm()); - O << '#' << (Val << Shift); - // Distinguish "0, lsl #12" from "0, lsl #0". - if (Val == 0 && Shift != 0) - printShifter(MI, OpNum + 1, O); - } else { - assert(MO.isExpr() && "Unexpected operand type!"); - O << *MO.getExpr(); - printShifter(MI, OpNum + 1, O); - } -} - -void ARM64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - uint64_t Val = MI->getOperand(OpNum).getImm(); - O << "#0x"; - O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 32)); -} - -void ARM64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - uint64_t Val = MI->getOperand(OpNum).getImm(); - O << "#0x"; - O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 64)); -} - -void ARM64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNum).getImm(); - // LSL #0 should not be printed. - if (ARM64_AM::getShiftType(Val) == ARM64_AM::LSL && - ARM64_AM::getShiftValue(Val) == 0) - return; - O << ", " << ARM64_AM::getShiftName(ARM64_AM::getShiftType(Val)) << " #" - << ARM64_AM::getShiftValue(Val); -} - -void ARM64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << getRegisterName(MI->getOperand(OpNum).getReg()); - printShifter(MI, OpNum + 1, O); -} - -void ARM64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << getRegisterName(MI->getOperand(OpNum).getReg()); - printExtend(MI, OpNum + 1, O); -} - -void ARM64InstPrinter::printExtend(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNum).getImm(); - ARM64_AM::ExtendType ExtType = ARM64_AM::getArithExtendType(Val); - unsigned ShiftVal = ARM64_AM::getArithShiftValue(Val); - - // If the destination or first source register operand is [W]SP, print - // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at - // all. - if (ExtType == ARM64_AM::UXTW || ExtType == ARM64_AM::UXTX) { - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src1 = MI->getOperand(1).getReg(); - if (Dest == ARM64::SP || Dest == ARM64::WSP || Src1 == ARM64::SP || - Src1 == ARM64::WSP) { - if (ShiftVal != 0) - O << ", lsl #" << ShiftVal; - return; - } - } - O << ", " << ARM64_AM::getExtendName(ExtType); - if (ShiftVal != 0) - O << " #" << ShiftVal; -} - -void ARM64InstPrinter::printDotCondCode(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm(); - if (CC != ARM64CC::AL) - O << '.' << ARM64CC::getCondCodeName(CC); -} - -void ARM64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm(); - O << ARM64CC::getCondCodeName(CC); -} - -void ARM64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']'; -} - -void ARM64InstPrinter::printImmScale4(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '#' << 4 * MI->getOperand(OpNum).getImm(); -} - -void ARM64InstPrinter::printImmScale8(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '#' << 8 * MI->getOperand(OpNum).getImm(); -} - -void ARM64InstPrinter::printImmScale16(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '#' << 16 * MI->getOperand(OpNum).getImm(); -} - -void ARM64InstPrinter::printAMIndexed(const MCInst *MI, unsigned OpNum, - unsigned Scale, raw_ostream &O) { - const MCOperand MO1 = MI->getOperand(OpNum + 1); - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()); - if (MO1.isImm()) { - if (MO1.getImm() != 0) - O << ", #" << (MO1.getImm() * Scale); - } else { - assert(MO1.isExpr() && "Unexpected operand type!"); - O << ", " << *MO1.getExpr(); - } - O << ']'; -} - -void ARM64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned prfop = MI->getOperand(OpNum).getImm(); - if (ARM64_AM::isNamedPrefetchOp(prfop)) - O << ARM64_AM::getPrefetchOpName((ARM64_AM::PrefetchOp)prfop); - else - O << '#' << prfop; -} - -void ARM64InstPrinter::printMemoryPostIndexed32(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #" - << 4 * MI->getOperand(OpNum + 1).getImm(); -} - -void ARM64InstPrinter::printMemoryPostIndexed64(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #" - << 8 * MI->getOperand(OpNum + 1).getImm(); -} - -void ARM64InstPrinter::printMemoryPostIndexed128(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #" - << 16 * MI->getOperand(OpNum + 1).getImm(); -} - -void ARM64InstPrinter::printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #" - << MI->getOperand(OpNum + 1).getImm(); -} - -void ARM64InstPrinter::printMemoryRegOffset(const MCInst *MI, unsigned OpNum, - raw_ostream &O, int LegalShiftAmt) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum + 1).getReg()); - - unsigned Val = MI->getOperand(OpNum + 2).getImm(); - ARM64_AM::ExtendType ExtType = ARM64_AM::getMemExtendType(Val); - bool DoShift = ARM64_AM::getMemDoShift(Val); - - if (ExtType == ARM64_AM::UXTX) { - if (DoShift) - O << ", lsl"; - } else - O << ", " << ARM64_AM::getExtendName(ExtType); - - if (DoShift) - O << " #" << LegalShiftAmt; - - O << "]"; -} - -void ARM64InstPrinter::printMemoryRegOffset8(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, 0); -} - -void ARM64InstPrinter::printMemoryRegOffset16(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, 1); -} - -void ARM64InstPrinter::printMemoryRegOffset32(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, 2); -} - -void ARM64InstPrinter::printMemoryRegOffset64(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, 3); -} - -void ARM64InstPrinter::printMemoryRegOffset128(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, 4); -} - -void ARM64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - O << '#'; - if (MO.isFPImm()) - // FIXME: Should this ever happen? - O << MO.getFPImm(); - else - O << ARM64_AM::getFPImmFloat(MO.getImm()); -} - -static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { - while (Stride--) { - switch (Reg) { - default: - assert(0 && "Vector register expected!"); - case ARM64::Q0: Reg = ARM64::Q1; break; - case ARM64::Q1: Reg = ARM64::Q2; break; - case ARM64::Q2: Reg = ARM64::Q3; break; - case ARM64::Q3: Reg = ARM64::Q4; break; - case ARM64::Q4: Reg = ARM64::Q5; break; - case ARM64::Q5: Reg = ARM64::Q6; break; - case ARM64::Q6: Reg = ARM64::Q7; break; - case ARM64::Q7: Reg = ARM64::Q8; break; - case ARM64::Q8: Reg = ARM64::Q9; break; - case ARM64::Q9: Reg = ARM64::Q10; break; - case ARM64::Q10: Reg = ARM64::Q11; break; - case ARM64::Q11: Reg = ARM64::Q12; break; - case ARM64::Q12: Reg = ARM64::Q13; break; - case ARM64::Q13: Reg = ARM64::Q14; break; - case ARM64::Q14: Reg = ARM64::Q15; break; - case ARM64::Q15: Reg = ARM64::Q16; break; - case ARM64::Q16: Reg = ARM64::Q17; break; - case ARM64::Q17: Reg = ARM64::Q18; break; - case ARM64::Q18: Reg = ARM64::Q19; break; - case ARM64::Q19: Reg = ARM64::Q20; break; - case ARM64::Q20: Reg = ARM64::Q21; break; - case ARM64::Q21: Reg = ARM64::Q22; break; - case ARM64::Q22: Reg = ARM64::Q23; break; - case ARM64::Q23: Reg = ARM64::Q24; break; - case ARM64::Q24: Reg = ARM64::Q25; break; - case ARM64::Q25: Reg = ARM64::Q26; break; - case ARM64::Q26: Reg = ARM64::Q27; break; - case ARM64::Q27: Reg = ARM64::Q28; break; - case ARM64::Q28: Reg = ARM64::Q29; break; - case ARM64::Q29: Reg = ARM64::Q30; break; - case ARM64::Q30: Reg = ARM64::Q31; break; - // Vector lists can wrap around. - case ARM64::Q31: - Reg = ARM64::Q0; - break; - } - } - return Reg; -} - -void ARM64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O, StringRef LayoutSuffix) { - unsigned Reg = MI->getOperand(OpNum).getReg(); - - O << "{ "; - - // Work out how many registers there are in the list (if there is an actual - // list). - unsigned NumRegs = 1; - if (MRI.getRegClass(ARM64::DDRegClassID).contains(Reg) || - MRI.getRegClass(ARM64::QQRegClassID).contains(Reg)) - NumRegs = 2; - else if (MRI.getRegClass(ARM64::DDDRegClassID).contains(Reg) || - MRI.getRegClass(ARM64::QQQRegClassID).contains(Reg)) - NumRegs = 3; - else if (MRI.getRegClass(ARM64::DDDDRegClassID).contains(Reg) || - MRI.getRegClass(ARM64::QQQQRegClassID).contains(Reg)) - NumRegs = 4; - - // Now forget about the list and find out what the first register is. - if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::dsub0)) - Reg = FirstReg; - else if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::qsub0)) - Reg = FirstReg; - - // If it's a D-reg, we need to promote it to the equivalent Q-reg before - // printing (otherwise getRegisterName fails). - if (MRI.getRegClass(ARM64::FPR64RegClassID).contains(Reg)) { - const MCRegisterClass &FPR128RC = MRI.getRegClass(ARM64::FPR128RegClassID); - Reg = MRI.getMatchingSuperReg(Reg, ARM64::dsub, &FPR128RC); - } - - for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) { - O << getRegisterName(Reg, ARM64::vreg) << LayoutSuffix; - if (i + 1 != NumRegs) - O << ", "; - } - - O << " }"; -} - -void ARM64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - printVectorList(MI, OpNum, O, ""); -} - -template <unsigned NumLanes, char LaneKind> -void ARM64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - std::string Suffix("."); - if (NumLanes) - Suffix += itostr(NumLanes) + LaneKind; - else - Suffix += LaneKind; - - printVectorList(MI, OpNum, O, Suffix); -} - -void ARM64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "[" << MI->getOperand(OpNum).getImm() << "]"; -} - -void ARM64InstPrinter::printAlignedBranchTarget(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - - // If the label has already been resolved to an immediate offset (say, when - // we're running the disassembler), just print the immediate. - if (Op.isImm()) { - O << "#" << (Op.getImm() << 2); - return; - } - - // If the branch target is simply an address then print it in hex. - const MCConstantExpr *BranchTarget = - dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr()); - int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { - O << "0x"; - O.write_hex(Address); - } else { - // Otherwise, just print the expression. - O << *MI->getOperand(OpNum).getExpr(); - } -} - -void ARM64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - - // If the label has already been resolved to an immediate offset (say, when - // we're running the disassembler), just print the immediate. - if (Op.isImm()) { - O << "#" << (Op.getImm() << 12); - return; - } - - // Otherwise, just print the expression. - O << *MI->getOperand(OpNum).getExpr(); -} - -void ARM64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNo).getImm(); - const char *Name = ARM64SYS::getBarrierOptName((ARM64SYS::BarrierOption)Val); - if (Name) - O << Name; - else - O << "#" << Val; -} - -void ARM64InstPrinter::printSystemRegister(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNo).getImm(); - const char *Name = - ARM64SYS::getSystemRegisterName((ARM64SYS::SystemRegister)Val); - if (Name) { - O << Name; - return; - } - - unsigned Op0 = 2 | ((Val >> 14) & 1); - unsigned Op1 = (Val >> 11) & 7; - unsigned CRn = (Val >> 7) & 0xf; - unsigned CRm = (Val >> 3) & 0xf; - unsigned Op2 = Val & 7; - - O << 'S' << Op0 << '_' << Op1 << "_C" << CRn << "_C" << CRm << '_' << Op2; -} - -void ARM64InstPrinter::printSystemCPSRField(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNo).getImm(); - const char *Name = ARM64SYS::getCPSRFieldName((ARM64SYS::CPSRField)Val); - O << Name; -} - -void ARM64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned RawVal = MI->getOperand(OpNo).getImm(); - uint64_t Val = ARM64_AM::decodeAdvSIMDModImmType10(RawVal); - O << format("#%#016llx", Val); -} diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h deleted file mode 100644 index ff66ff0..0000000 --- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h +++ /dev/null @@ -1,157 +0,0 @@ -//===-- ARM64InstPrinter.h - Convert ARM64 MCInst to assembly syntax ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an ARM64 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64INSTPRINTER_H -#define ARM64INSTPRINTER_H - -#include "MCTargetDesc/ARM64MCTargetDesc.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCSubtargetInfo.h" - -namespace llvm { - -class MCOperand; - -class ARM64InstPrinter : public MCInstPrinter { -public: - ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - - // Autogenerated by tblgen. - virtual void printInstruction(const MCInst *MI, raw_ostream &O); - virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O); - virtual StringRef getRegName(unsigned RegNo) const { - return getRegisterName(RegNo); - } - static const char *getRegisterName(unsigned RegNo, - unsigned AltIdx = ARM64::NoRegAltName); - -protected: - bool printSysAlias(const MCInst *MI, raw_ostream &O); - // Operand printers - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm, - raw_ostream &O); - void printPostIncOperand1(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand2(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand3(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand4(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand6(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand8(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand12(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand16(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand24(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand32(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand48(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand64(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printDotCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAlignedBranchTarget(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printAMIndexed(const MCInst *MI, unsigned OpNum, unsigned Scale, - raw_ostream &O); - void printAMIndexed128(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 16, O); - } - - void printAMIndexed64(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 8, O); - } - - void printAMIndexed32(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 4, O); - } - - void printAMIndexed16(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 2, O); - } - - void printAMIndexed8(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 1, O); - } - void printAMUnscaled(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 1, O); - } - void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printImmScale4(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printImmScale8(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printImmScale16(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryPostIndexed32(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printMemoryPostIndexed64(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printMemoryPostIndexed128(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printMemoryRegOffset(const MCInst *MI, unsigned OpNum, raw_ostream &O, - int LegalShiftAmt); - void printMemoryRegOffset8(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryRegOffset16(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryRegOffset32(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryRegOffset64(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryRegOffset128(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O, - StringRef LayoutSuffix); - - /// Print a list of vector registers where the type suffix is implicit - /// (i.e. attached to the instruction rather than the registers). - void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - template <unsigned NumLanes, char LaneKind> - void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSystemCPSRField(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); -}; - -class ARM64AppleInstPrinter : public ARM64InstPrinter { -public: - ARM64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - - virtual void printInstruction(const MCInst *MI, raw_ostream &O); - virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O); - virtual StringRef getRegName(unsigned RegNo) const { - return getRegisterName(RegNo); - } - static const char *getRegisterName(unsigned RegNo, - unsigned AltIdx = ARM64::NoRegAltName); -}; -} - -#endif diff --git a/lib/Target/ARM64/InstPrinter/CMakeLists.txt b/lib/Target/ARM64/InstPrinter/CMakeLists.txt deleted file mode 100644 index b8ee12c..0000000 --- a/lib/Target/ARM64/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARM64AsmPrinter - ARM64InstPrinter.cpp - ) - -add_dependencies(LLVMARM64AsmPrinter ARM64CommonTableGen) diff --git a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt b/lib/Target/ARM64/InstPrinter/LLVMBuild.txt deleted file mode 100644 index 2ec83d2..0000000 --- a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/ARM64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ARM64AsmPrinter -parent = ARM64 -required_libraries = MC Support -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/InstPrinter/Makefile b/lib/Target/ARM64/InstPrinter/Makefile deleted file mode 100644 index a59efb0..0000000 --- a/lib/Target/ARM64/InstPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM64/AsmPrinter/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64AsmPrinter - -# Hack: we need to include 'main' arm target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/LLVMBuild.txt b/lib/Target/ARM64/LLVMBuild.txt deleted file mode 100644 index 45b0628..0000000 --- a/lib/Target/ARM64/LLVMBuild.txt +++ /dev/null @@ -1,36 +0,0 @@ -;===- ./lib/Target/ARM64/LLVMBuild.txt -------------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[common] -subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo - -[component_0] -type = TargetGroup -name = ARM64 -parent = Target -has_asmparser = 1 -has_asmprinter = 1 -has_disassembler = 1 -has_jit = 1 - -[component_1] -type = Library -name = ARM64CodeGen -parent = ARM64 -required_libraries = ARM64AsmPrinter ARM64Desc ARM64Info Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h b/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h deleted file mode 100644 index 7717743..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h +++ /dev/null @@ -1,758 +0,0 @@ -//===- ARM64AddressingModes.h - ARM64 Addressing Modes ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 addressing mode implementation stuff. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H -#define LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H - -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/APInt.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include <cassert> - -namespace llvm { - -/// ARM64_AM - ARM64 Addressing Mode Stuff -namespace ARM64_AM { - -//===----------------------------------------------------------------------===// -// Shifts -// - -enum ShiftType { - InvalidShift = -1, - LSL = 0, - LSR = 1, - ASR = 2, - ROR = 3, - MSL = 4 -}; - -/// getShiftName - Get the string encoding for the shift type. -static inline const char *getShiftName(ARM64_AM::ShiftType ST) { - switch (ST) { - default: assert(false && "unhandled shift type!"); - case ARM64_AM::LSL: return "lsl"; - case ARM64_AM::LSR: return "lsr"; - case ARM64_AM::ASR: return "asr"; - case ARM64_AM::ROR: return "ror"; - case ARM64_AM::MSL: return "msl"; - } - return 0; -} - -/// getShiftType - Extract the shift type. -static inline ARM64_AM::ShiftType getShiftType(unsigned Imm) { - return ARM64_AM::ShiftType((Imm >> 6) & 0x7); -} - -/// getShiftValue - Extract the shift value. -static inline unsigned getShiftValue(unsigned Imm) { - return Imm & 0x3f; -} - -/// getShifterImm - Encode the shift type and amount: -/// imm: 6-bit shift amount -/// shifter: 000 ==> lsl -/// 001 ==> lsr -/// 010 ==> asr -/// 011 ==> ror -/// 100 ==> msl -/// {8-6} = shifter -/// {5-0} = imm -static inline unsigned getShifterImm(ARM64_AM::ShiftType ST, unsigned Imm) { - assert((Imm & 0x3f) == Imm && "Illegal shifted immedate value!"); - return (unsigned(ST) << 6) | (Imm & 0x3f); -} - -//===----------------------------------------------------------------------===// -// Extends -// - -enum ExtendType { - InvalidExtend = -1, - UXTB = 0, - UXTH = 1, - UXTW = 2, - UXTX = 3, - SXTB = 4, - SXTH = 5, - SXTW = 6, - SXTX = 7 -}; - -/// getExtendName - Get the string encoding for the extend type. -static inline const char *getExtendName(ARM64_AM::ExtendType ET) { - switch (ET) { - default: assert(false && "unhandled extend type!"); - case ARM64_AM::UXTB: return "uxtb"; - case ARM64_AM::UXTH: return "uxth"; - case ARM64_AM::UXTW: return "uxtw"; - case ARM64_AM::UXTX: return "uxtx"; - case ARM64_AM::SXTB: return "sxtb"; - case ARM64_AM::SXTH: return "sxth"; - case ARM64_AM::SXTW: return "sxtw"; - case ARM64_AM::SXTX: return "sxtx"; - } - return 0; -} - -/// getArithShiftValue - get the arithmetic shift value. -static inline unsigned getArithShiftValue(unsigned Imm) { - return Imm & 0x7; -} - -/// getExtendType - Extract the extend type for operands of arithmetic ops. -static inline ARM64_AM::ExtendType getArithExtendType(unsigned Imm) { - return ARM64_AM::ExtendType((Imm >> 3) & 0x7); -} - -/// getArithExtendImm - Encode the extend type and shift amount for an -/// arithmetic instruction: -/// imm: 3-bit extend amount -/// shifter: 000 ==> uxtb -/// 001 ==> uxth -/// 010 ==> uxtw -/// 011 ==> uxtx -/// 100 ==> sxtb -/// 101 ==> sxth -/// 110 ==> sxtw -/// 111 ==> sxtx -/// {5-3} = shifter -/// {2-0} = imm3 -static inline unsigned getArithExtendImm(ARM64_AM::ExtendType ET, - unsigned Imm) { - assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!"); - return (unsigned(ET) << 3) | (Imm & 0x7); -} - -/// getMemDoShift - Extract the "do shift" flag value for load/store -/// instructions. -static inline bool getMemDoShift(unsigned Imm) { - return (Imm & 0x1) != 0; -} - -/// getExtendType - Extract the extend type for the offset operand of -/// loads/stores. -static inline ARM64_AM::ExtendType getMemExtendType(unsigned Imm) { - return ARM64_AM::ExtendType((Imm >> 1) & 0x7); -} - -/// getExtendImm - Encode the extend type and amount for a load/store inst: -/// doshift: should the offset be scaled by the access size -/// shifter: 000 ==> uxtb -/// 001 ==> uxth -/// 010 ==> uxtw -/// 011 ==> uxtx -/// 100 ==> sxtb -/// 101 ==> sxth -/// 110 ==> sxtw -/// 111 ==> sxtx -/// {3-1} = shifter -/// {0} = doshift -static inline unsigned getMemExtendImm(ARM64_AM::ExtendType ET, bool DoShift) { - return (unsigned(ET) << 1) | unsigned(DoShift); -} - -//===----------------------------------------------------------------------===// -// Prefetch -// - -/// Pre-fetch operator names. -/// The enum values match the encoding values: -/// prfop<4:3> 00=preload data, 10=prepare for store -/// prfop<2:1> 00=target L1 cache, 01=target L2 cache, 10=target L3 cache, -/// prfop<0> 0=non-streaming (temporal), 1=streaming (non-temporal) -enum PrefetchOp { - InvalidPrefetchOp = -1, - PLDL1KEEP = 0x00, - PLDL1STRM = 0x01, - PLDL2KEEP = 0x02, - PLDL2STRM = 0x03, - PLDL3KEEP = 0x04, - PLDL3STRM = 0x05, - PSTL1KEEP = 0x10, - PSTL1STRM = 0x11, - PSTL2KEEP = 0x12, - PSTL2STRM = 0x13, - PSTL3KEEP = 0x14, - PSTL3STRM = 0x15 -}; - -/// isNamedPrefetchOp - Check if the prefetch-op 5-bit value has a name. -static inline bool isNamedPrefetchOp(unsigned prfop) { - switch (prfop) { - default: return false; - case ARM64_AM::PLDL1KEEP: case ARM64_AM::PLDL1STRM: case ARM64_AM::PLDL2KEEP: - case ARM64_AM::PLDL2STRM: case ARM64_AM::PLDL3KEEP: case ARM64_AM::PLDL3STRM: - case ARM64_AM::PSTL1KEEP: case ARM64_AM::PSTL1STRM: case ARM64_AM::PSTL2KEEP: - case ARM64_AM::PSTL2STRM: case ARM64_AM::PSTL3KEEP: case ARM64_AM::PSTL3STRM: - return true; - } -} - - -/// getPrefetchOpName - Get the string encoding for the prefetch operator. -static inline const char *getPrefetchOpName(ARM64_AM::PrefetchOp prfop) { - switch (prfop) { - default: assert(false && "unhandled prefetch-op type!"); - case ARM64_AM::PLDL1KEEP: return "pldl1keep"; - case ARM64_AM::PLDL1STRM: return "pldl1strm"; - case ARM64_AM::PLDL2KEEP: return "pldl2keep"; - case ARM64_AM::PLDL2STRM: return "pldl2strm"; - case ARM64_AM::PLDL3KEEP: return "pldl3keep"; - case ARM64_AM::PLDL3STRM: return "pldl3strm"; - case ARM64_AM::PSTL1KEEP: return "pstl1keep"; - case ARM64_AM::PSTL1STRM: return "pstl1strm"; - case ARM64_AM::PSTL2KEEP: return "pstl2keep"; - case ARM64_AM::PSTL2STRM: return "pstl2strm"; - case ARM64_AM::PSTL3KEEP: return "pstl3keep"; - case ARM64_AM::PSTL3STRM: return "pstl3strm"; - } - return 0; -} - -static inline uint64_t ror(uint64_t elt, unsigned size) { - return ((elt & 1) << (size-1)) | (elt >> 1); -} - -/// processLogicalImmediate - Determine if an immediate value can be encoded -/// as the immediate operand of a logical instruction for the given register -/// size. If so, return true with "encoding" set to the encoded value in -/// the form N:immr:imms. -static inline bool processLogicalImmediate(uint64_t imm, unsigned regSize, - uint64_t &encoding) { - if (imm == 0ULL || imm == ~0ULL || - (regSize != 64 && (imm >> regSize != 0 || imm == ~0U))) - return false; - - unsigned size = 2; - uint64_t eltVal = imm; - - // First, determine the element size. - while (size < regSize) { - unsigned numElts = regSize / size; - unsigned mask = (1ULL << size) - 1; - uint64_t lowestEltVal = imm & mask; - - bool allMatched = true; - for (unsigned i = 1; i < numElts; ++i) { - uint64_t currEltVal = (imm >> (i*size)) & mask; - if (currEltVal != lowestEltVal) { - allMatched = false; - break; - } - } - - if (allMatched) { - eltVal = lowestEltVal; - break; - } - - size *= 2; - } - - // Second, determine the rotation to make the element be: 0^m 1^n. - for (unsigned i = 0; i < size; ++i) { - eltVal = ror(eltVal, size); - uint32_t clz = countLeadingZeros(eltVal) - (64 - size); - uint32_t cto = CountTrailingOnes_64(eltVal); - - if (clz + cto == size) { - // Encode in immr the number of RORs it would take to get *from* this - // element value to our target value, where i+1 is the number of RORs - // to go the opposite direction. - unsigned immr = size - (i + 1); - - // If size has a 1 in the n'th bit, create a value that has zeroes in - // bits [0, n] and ones above that. - uint64_t nimms = ~(size-1) << 1; - - // Or the CTO value into the low bits, which must be below the Nth bit - // bit mentioned above. - nimms |= (cto-1); - - // Extract the seventh bit and toggle it to create the N field. - unsigned N = ((nimms >> 6) & 1) ^ 1; - - encoding = (N << 12) | (immr << 6) | (nimms & 0x3f); - return true; - } - } - - return false; -} - -/// isLogicalImmediate - Return true if the immediate is valid for a logical -/// immediate instruction of the given register size. Return false otherwise. -static inline bool isLogicalImmediate(uint64_t imm, unsigned regSize) { - uint64_t encoding; - return processLogicalImmediate(imm, regSize, encoding); -} - -/// encodeLogicalImmediate - Return the encoded immediate value for a logical -/// immediate instruction of the given register size. -static inline uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize) { - uint64_t encoding = 0; - bool res = processLogicalImmediate(imm, regSize, encoding); - assert(res && "invalid logical immediate"); - (void)res; - return encoding; -} - -/// decodeLogicalImmediate - Decode a logical immediate value in the form -/// "N:immr:imms" (where the immr and imms fields are each 6 bits) into the -/// integer value it represents with regSize bits. -static inline uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize) { - // Extract the N, imms, and immr fields. - unsigned N = (val >> 12) & 1; - unsigned immr = (val >> 6) & 0x3f; - unsigned imms = val & 0x3f; - - assert((regSize == 64 || N == 0) && "undefined logical immediate encoding"); - int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f)); - assert(len >= 0 && "undefined logical immediate encoding"); - unsigned size = (1 << len); - unsigned R = immr & (size - 1); - unsigned S = imms & (size - 1); - assert(S != size - 1 && "undefined logical immediate encoding"); - uint64_t pattern = (1ULL << (S + 1)) - 1; - for (unsigned i = 0; i < R; ++i) - pattern = ror(pattern, size); - - // Replicate the pattern to fill the regSize. - while (size != regSize) { - pattern |= (pattern << size); - size *= 2; - } - return pattern; -} - -/// isValidDecodeLogicalImmediate - Check to see if the logical immediate value -/// in the form "N:immr:imms" (where the immr and imms fields are each 6 bits) -/// is a valid encoding for an integer value with regSize bits. -static inline bool isValidDecodeLogicalImmediate(uint64_t val, - unsigned regSize) { - // Extract the N and imms fields needed for checking. - unsigned N = (val >> 12) & 1; - unsigned imms = val & 0x3f; - - if (regSize == 32 && N != 0) // undefined logical immediate encoding - return false; - int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f)); - if (len < 0) // undefined logical immediate encoding - return false; - unsigned size = (1 << len); - unsigned S = imms & (size - 1); - if (S == size - 1) // undefined logical immediate encoding - return false; - - return true; -} - -//===----------------------------------------------------------------------===// -// Floating-point Immediates -// -static inline float getFPImmFloat(unsigned Imm) { - // We expect an 8-bit binary encoding of a floating-point number here. - union { - uint32_t I; - float F; - } FPUnion; - - uint8_t Sign = (Imm >> 7) & 0x1; - uint8_t Exp = (Imm >> 4) & 0x7; - uint8_t Mantissa = Imm & 0xf; - - // 8-bit FP iEEEE Float Encoding - // abcd efgh aBbbbbbc defgh000 00000000 00000000 - // - // where B = NOT(b); - - FPUnion.I = 0; - FPUnion.I |= Sign << 31; - FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30; - FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25; - FPUnion.I |= (Exp & 0x3) << 23; - FPUnion.I |= Mantissa << 19; - return FPUnion.F; -} - -/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit -/// floating-point value. If the value cannot be represented as an 8-bit -/// floating-point value, then return -1. -static inline int getFP32Imm(const APInt &Imm) { - uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; - int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 - int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits - - // We can handle 4 bits of mantissa. - // mantissa = (16+UInt(e:f:g:h))/16. - if (Mantissa & 0x7ffff) - return -1; - Mantissa >>= 19; - if ((Mantissa & 0xf) != Mantissa) - return -1; - - // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 - if (Exp < -3 || Exp > 4) - return -1; - Exp = ((Exp+3) & 0x7) ^ 4; - - return ((int)Sign << 7) | (Exp << 4) | Mantissa; -} - -static inline int getFP32Imm(const APFloat &FPImm) { - return getFP32Imm(FPImm.bitcastToAPInt()); -} - -/// getFP64Imm - Return an 8-bit floating-point version of the 64-bit -/// floating-point value. If the value cannot be represented as an 8-bit -/// floating-point value, then return -1. -static inline int getFP64Imm(const APInt &Imm) { - uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; - int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 - uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL; - - // We can handle 4 bits of mantissa. - // mantissa = (16+UInt(e:f:g:h))/16. - if (Mantissa & 0xffffffffffffULL) - return -1; - Mantissa >>= 48; - if ((Mantissa & 0xf) != Mantissa) - return -1; - - // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 - if (Exp < -3 || Exp > 4) - return -1; - Exp = ((Exp+3) & 0x7) ^ 4; - - return ((int)Sign << 7) | (Exp << 4) | Mantissa; -} - -static inline int getFP64Imm(const APFloat &FPImm) { - return getFP64Imm(FPImm.bitcastToAPInt()); -} - -//===--------------------------------------------------------------------===// -// AdvSIMD Modified Immediates -//===--------------------------------------------------------------------===// - -// 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh -static inline bool isAdvSIMDModImmType1(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0xffffff00ffffff00ULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType1(uint64_t Imm) { - return (Imm & 0xffULL); -} - -static inline uint64_t decodeAdvSIMDModImmType1(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 32) | EncVal; -} - -// 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 -static inline bool isAdvSIMDModImmType2(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0xffff00ffffff00ffULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType2(uint64_t Imm) { - return (Imm & 0xff00ULL) >> 8; -} - -static inline uint64_t decodeAdvSIMDModImmType2(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 40) | (EncVal << 8); -} - -// 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00 -static inline bool isAdvSIMDModImmType3(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0xff00ffffff00ffffULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType3(uint64_t Imm) { - return (Imm & 0xff0000ULL) >> 16; -} - -static inline uint64_t decodeAdvSIMDModImmType3(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 48) | (EncVal << 16); -} - -// abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00 -static inline bool isAdvSIMDModImmType4(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0x00ffffff00ffffffULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType4(uint64_t Imm) { - return (Imm & 0xff000000ULL) >> 24; -} - -static inline uint64_t decodeAdvSIMDModImmType4(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 56) | (EncVal << 24); -} - -// 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh -static inline bool isAdvSIMDModImmType5(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - (((Imm & 0x00ff0000ULL) >> 16) == (Imm & 0x000000ffULL)) && - ((Imm & 0xff00ff00ff00ff00ULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType5(uint64_t Imm) { - return (Imm & 0xffULL); -} - -static inline uint64_t decodeAdvSIMDModImmType5(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 48) | (EncVal << 32) | (EncVal << 16) | EncVal; -} - -// abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 -static inline bool isAdvSIMDModImmType6(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - (((Imm & 0xff000000ULL) >> 16) == (Imm & 0x0000ff00ULL)) && - ((Imm & 0x00ff00ff00ff00ffULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType6(uint64_t Imm) { - return (Imm & 0xff00ULL) >> 8; -} - -static inline uint64_t decodeAdvSIMDModImmType6(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 56) | (EncVal << 40) | (EncVal << 24) | (EncVal << 8); -} - -// 0x00 0x00 abcdefgh 0xFF 0x00 0x00 abcdefgh 0xFF -static inline bool isAdvSIMDModImmType7(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0xffff00ffffff00ffULL) == 0x000000ff000000ffULL); -} - -static inline uint8_t encodeAdvSIMDModImmType7(uint64_t Imm) { - return (Imm & 0xff00ULL) >> 8; -} - -static inline uint64_t decodeAdvSIMDModImmType7(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 40) | (EncVal << 8) | 0x000000ff000000ffULL; -} - -// 0x00 abcdefgh 0xFF 0xFF 0x00 abcdefgh 0xFF 0xFF -static inline bool isAdvSIMDModImmType8(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0xff00ffffff00ffffULL) == 0x0000ffff0000ffffULL); -} - -static inline uint64_t decodeAdvSIMDModImmType8(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 48) | (EncVal << 16) | 0x0000ffff0000ffffULL; -} - -static inline uint8_t encodeAdvSIMDModImmType8(uint64_t Imm) { - return (Imm & 0x00ff0000ULL) >> 16; -} - -// abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh -static inline bool isAdvSIMDModImmType9(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm >> 48) == (Imm & 0x0000ffffULL)) && - ((Imm >> 56) == (Imm & 0x000000ffULL)); -} - -static inline uint8_t encodeAdvSIMDModImmType9(uint64_t Imm) { - return (Imm & 0xffULL); -} - -static inline uint64_t decodeAdvSIMDModImmType9(uint8_t Imm) { - uint64_t EncVal = Imm; - EncVal |= (EncVal << 8); - EncVal |= (EncVal << 16); - EncVal |= (EncVal << 32); - return EncVal; -} - -// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh -// cmode: 1110, op: 1 -static inline bool isAdvSIMDModImmType10(uint64_t Imm) { - uint64_t ByteA = Imm & 0xff00000000000000ULL; - uint64_t ByteB = Imm & 0x00ff000000000000ULL; - uint64_t ByteC = Imm & 0x0000ff0000000000ULL; - uint64_t ByteD = Imm & 0x000000ff00000000ULL; - uint64_t ByteE = Imm & 0x00000000ff000000ULL; - uint64_t ByteF = Imm & 0x0000000000ff0000ULL; - uint64_t ByteG = Imm & 0x000000000000ff00ULL; - uint64_t ByteH = Imm & 0x00000000000000ffULL; - - return (ByteA == 0ULL || ByteA == 0xff00000000000000ULL) && - (ByteB == 0ULL || ByteB == 0x00ff000000000000ULL) && - (ByteC == 0ULL || ByteC == 0x0000ff0000000000ULL) && - (ByteD == 0ULL || ByteD == 0x000000ff00000000ULL) && - (ByteE == 0ULL || ByteE == 0x00000000ff000000ULL) && - (ByteF == 0ULL || ByteF == 0x0000000000ff0000ULL) && - (ByteG == 0ULL || ByteG == 0x000000000000ff00ULL) && - (ByteH == 0ULL || ByteH == 0x00000000000000ffULL); -} - -static inline uint8_t encodeAdvSIMDModImmType10(uint64_t Imm) { - uint8_t BitA = (Imm & 0xff00000000000000ULL) != 0; - uint8_t BitB = (Imm & 0x00ff000000000000ULL) != 0; - uint8_t BitC = (Imm & 0x0000ff0000000000ULL) != 0; - uint8_t BitD = (Imm & 0x000000ff00000000ULL) != 0; - uint8_t BitE = (Imm & 0x00000000ff000000ULL) != 0; - uint8_t BitF = (Imm & 0x0000000000ff0000ULL) != 0; - uint8_t BitG = (Imm & 0x000000000000ff00ULL) != 0; - uint8_t BitH = (Imm & 0x00000000000000ffULL) != 0; - - uint8_t EncVal = BitA; - EncVal <<= 1; - EncVal |= BitB; - EncVal <<= 1; - EncVal |= BitC; - EncVal <<= 1; - EncVal |= BitD; - EncVal <<= 1; - EncVal |= BitE; - EncVal <<= 1; - EncVal |= BitF; - EncVal <<= 1; - EncVal |= BitG; - EncVal <<= 1; - EncVal |= BitH; - return EncVal; -} - -static inline uint64_t decodeAdvSIMDModImmType10(uint8_t Imm) { - uint64_t EncVal = 0; - if (Imm & 0x80) EncVal |= 0xff00000000000000ULL; - if (Imm & 0x40) EncVal |= 0x00ff000000000000ULL; - if (Imm & 0x20) EncVal |= 0x0000ff0000000000ULL; - if (Imm & 0x10) EncVal |= 0x000000ff00000000ULL; - if (Imm & 0x08) EncVal |= 0x00000000ff000000ULL; - if (Imm & 0x04) EncVal |= 0x0000000000ff0000ULL; - if (Imm & 0x02) EncVal |= 0x000000000000ff00ULL; - if (Imm & 0x01) EncVal |= 0x00000000000000ffULL; - return EncVal; -} - -// aBbbbbbc defgh000 0x00 0x00 aBbbbbbc defgh000 0x00 0x00 -static inline bool isAdvSIMDModImmType11(uint64_t Imm) { - uint64_t BString = (Imm & 0x7E000000ULL) >> 25; - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - (BString == 0x1f || BString == 0x20) && - ((Imm & 0x0007ffff0007ffffULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType11(uint64_t Imm) { - uint8_t BitA = (Imm & 0x80000000ULL) != 0; - uint8_t BitB = (Imm & 0x20000000ULL) != 0; - uint8_t BitC = (Imm & 0x01000000ULL) != 0; - uint8_t BitD = (Imm & 0x00800000ULL) != 0; - uint8_t BitE = (Imm & 0x00400000ULL) != 0; - uint8_t BitF = (Imm & 0x00200000ULL) != 0; - uint8_t BitG = (Imm & 0x00100000ULL) != 0; - uint8_t BitH = (Imm & 0x00080000ULL) != 0; - - uint8_t EncVal = BitA; - EncVal <<= 1; - EncVal |= BitB; - EncVal <<= 1; - EncVal |= BitC; - EncVal <<= 1; - EncVal |= BitD; - EncVal <<= 1; - EncVal |= BitE; - EncVal <<= 1; - EncVal |= BitF; - EncVal <<= 1; - EncVal |= BitG; - EncVal <<= 1; - EncVal |= BitH; - return EncVal; -} - -static inline uint64_t decodeAdvSIMDModImmType11(uint8_t Imm) { - uint64_t EncVal = 0; - if (Imm & 0x80) EncVal |= 0x80000000ULL; - if (Imm & 0x40) EncVal |= 0x3e000000ULL; - else EncVal |= 0x40000000ULL; - if (Imm & 0x20) EncVal |= 0x01000000ULL; - if (Imm & 0x10) EncVal |= 0x00800000ULL; - if (Imm & 0x08) EncVal |= 0x00400000ULL; - if (Imm & 0x04) EncVal |= 0x00200000ULL; - if (Imm & 0x02) EncVal |= 0x00100000ULL; - if (Imm & 0x01) EncVal |= 0x00080000ULL; - return (EncVal << 32) | EncVal; -} - -// aBbbbbbb bbcdefgh 0x00 0x00 0x00 0x00 0x00 0x00 -static inline bool isAdvSIMDModImmType12(uint64_t Imm) { - uint64_t BString = (Imm & 0x7fc0000000000000ULL) >> 54; - return ((BString == 0xff || BString == 0x100) && - ((Imm & 0x0000ffffffffffffULL) == 0)); -} - -static inline uint8_t encodeAdvSIMDModImmType12(uint64_t Imm) { - uint8_t BitA = (Imm & 0x8000000000000000ULL) != 0; - uint8_t BitB = (Imm & 0x0040000000000000ULL) != 0; - uint8_t BitC = (Imm & 0x0020000000000000ULL) != 0; - uint8_t BitD = (Imm & 0x0010000000000000ULL) != 0; - uint8_t BitE = (Imm & 0x0008000000000000ULL) != 0; - uint8_t BitF = (Imm & 0x0004000000000000ULL) != 0; - uint8_t BitG = (Imm & 0x0002000000000000ULL) != 0; - uint8_t BitH = (Imm & 0x0001000000000000ULL) != 0; - - uint8_t EncVal = BitA; - EncVal <<= 1; - EncVal |= BitB; - EncVal <<= 1; - EncVal |= BitC; - EncVal <<= 1; - EncVal |= BitD; - EncVal <<= 1; - EncVal |= BitE; - EncVal <<= 1; - EncVal |= BitF; - EncVal <<= 1; - EncVal |= BitG; - EncVal <<= 1; - EncVal |= BitH; - return EncVal; -} - -static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) { - uint64_t EncVal = 0; - if (Imm & 0x80) EncVal |= 0x8000000000000000ULL; - if (Imm & 0x40) EncVal |= 0x3fc0000000000000ULL; - else EncVal |= 0x4000000000000000ULL; - if (Imm & 0x20) EncVal |= 0x0020000000000000ULL; - if (Imm & 0x10) EncVal |= 0x0010000000000000ULL; - if (Imm & 0x08) EncVal |= 0x0008000000000000ULL; - if (Imm & 0x04) EncVal |= 0x0004000000000000ULL; - if (Imm & 0x02) EncVal |= 0x0002000000000000ULL; - if (Imm & 0x01) EncVal |= 0x0001000000000000ULL; - return (EncVal << 32) | EncVal; -} - -} // end namespace ARM64_AM - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp deleted file mode 100644 index 26813e2..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp +++ /dev/null @@ -1,533 +0,0 @@ -//===-- ARM64AsmBackend.cpp - ARM64 Assembler Backend ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "ARM64.h" -#include "ARM64RegisterInfo.h" -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "llvm/ADT/Triple.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCDirectives.h" -#include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" -using namespace llvm; - -namespace { - -class ARM64AsmBackend : public MCAsmBackend { - static const unsigned PCRelFlagVal = - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel; - -public: - ARM64AsmBackend(const Target &T) : MCAsmBackend() {} - - unsigned getNumFixupKinds() const { return ARM64::NumTargetFixupKinds; } - - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - const static MCFixupKindInfo Infos[ARM64::NumTargetFixupKinds] = { - // This table *must* be in the order that the fixup_* kinds are defined in - // ARM64FixupKinds.h. - // - // Name Offset (bits) Size (bits) Flags - { "fixup_arm64_pcrel_adr_imm21", 0, 32, PCRelFlagVal }, - { "fixup_arm64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal }, - { "fixup_arm64_add_imm12", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale1", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale2", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale4", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale8", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale16", 10, 12, 0 }, - { "fixup_arm64_movw", 5, 16, 0 }, - { "fixup_arm64_pcrel_branch14", 5, 14, PCRelFlagVal }, - { "fixup_arm64_pcrel_imm19", 5, 19, PCRelFlagVal }, - { "fixup_arm64_pcrel_branch26", 0, 26, PCRelFlagVal }, - { "fixup_arm64_pcrel_call26", 0, 26, PCRelFlagVal }, - { "fixup_arm64_tlsdesc_call", 0, 0, 0 } - }; - - if (Kind < FirstTargetFixupKind) - return MCAsmBackend::getFixupKindInfo(Kind); - - assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && - "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; - } - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const; - - bool mayNeedRelaxation(const MCInst &Inst) const; - bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const; - void relaxInstruction(const MCInst &Inst, MCInst &Res) const; - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; - - void HandleAssemblerFlag(MCAssemblerFlag Flag) {} - - unsigned getPointerSize() const { return 8; } -}; - -} // end anonymous namespace - -/// \brief The number of bytes the fixup may change. -static unsigned getFixupKindNumBytes(unsigned Kind) { - switch (Kind) { - default: - assert(0 && "Unknown fixup kind!"); - - case ARM64::fixup_arm64_tlsdesc_call: - return 0; - - case FK_Data_1: - return 1; - - case FK_Data_2: - case ARM64::fixup_arm64_movw: - return 2; - - case ARM64::fixup_arm64_pcrel_branch14: - case ARM64::fixup_arm64_add_imm12: - case ARM64::fixup_arm64_ldst_imm12_scale1: - case ARM64::fixup_arm64_ldst_imm12_scale2: - case ARM64::fixup_arm64_ldst_imm12_scale4: - case ARM64::fixup_arm64_ldst_imm12_scale8: - case ARM64::fixup_arm64_ldst_imm12_scale16: - case ARM64::fixup_arm64_pcrel_imm19: - return 3; - - case ARM64::fixup_arm64_pcrel_adr_imm21: - case ARM64::fixup_arm64_pcrel_adrp_imm21: - case ARM64::fixup_arm64_pcrel_branch26: - case ARM64::fixup_arm64_pcrel_call26: - case FK_Data_4: - return 4; - - case FK_Data_8: - return 8; - } -} - -static unsigned AdrImmBits(unsigned Value) { - unsigned lo2 = Value & 0x3; - unsigned hi19 = (Value & 0x1ffffc) >> 2; - return (hi19 << 5) | (lo2 << 29); -} - -static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { - int64_t SignedValue = static_cast<int64_t>(Value); - switch (Kind) { - default: - assert(false && "Unknown fixup kind!"); - case ARM64::fixup_arm64_pcrel_adr_imm21: - if (SignedValue > 2097151 || SignedValue < -2097152) - report_fatal_error("fixup value out of range"); - return AdrImmBits(Value & 0x1fffffULL); - case ARM64::fixup_arm64_pcrel_adrp_imm21: - return AdrImmBits((Value & 0x1fffff000ULL) >> 12); - case ARM64::fixup_arm64_pcrel_imm19: - // Signed 21-bit immediate - if (SignedValue > 2097151 || SignedValue < -2097152) - report_fatal_error("fixup value out of range"); - // Low two bits are not encoded. - return (Value >> 2) & 0x7ffff; - case ARM64::fixup_arm64_add_imm12: - case ARM64::fixup_arm64_ldst_imm12_scale1: - // Unsigned 12-bit immediate - if (Value >= 0x1000) - report_fatal_error("invalid imm12 fixup value"); - return Value; - case ARM64::fixup_arm64_ldst_imm12_scale2: - // Unsigned 12-bit immediate which gets multiplied by 2 - if (Value & 1 || Value >= 0x2000) - report_fatal_error("invalid imm12 fixup value"); - return Value >> 1; - case ARM64::fixup_arm64_ldst_imm12_scale4: - // Unsigned 12-bit immediate which gets multiplied by 4 - if (Value & 3 || Value >= 0x4000) - report_fatal_error("invalid imm12 fixup value"); - return Value >> 2; - case ARM64::fixup_arm64_ldst_imm12_scale8: - // Unsigned 12-bit immediate which gets multiplied by 8 - if (Value & 7 || Value >= 0x8000) - report_fatal_error("invalid imm12 fixup value"); - return Value >> 3; - case ARM64::fixup_arm64_ldst_imm12_scale16: - // Unsigned 12-bit immediate which gets multiplied by 16 - if (Value & 15 || Value >= 0x10000) - report_fatal_error("invalid imm12 fixup value"); - return Value >> 4; - case ARM64::fixup_arm64_movw: - report_fatal_error("no resolvable MOVZ/MOVK fixups supported yet"); - return Value; - case ARM64::fixup_arm64_pcrel_branch14: - // Signed 16-bit immediate - if (SignedValue > 32767 || SignedValue < -32768) - report_fatal_error("fixup value out of range"); - // Low two bits are not encoded (4-byte alignment assumed). - if (Value & 0x3) - report_fatal_error("fixup not sufficiently aligned"); - return (Value >> 2) & 0x3fff; - case ARM64::fixup_arm64_pcrel_branch26: - case ARM64::fixup_arm64_pcrel_call26: - // Signed 28-bit immediate - if (SignedValue > 134217727 || SignedValue < -134217728) - report_fatal_error("fixup value out of range"); - // Low two bits are not encoded (4-byte alignment assumed). - if (Value & 0x3) - report_fatal_error("fixup not sufficiently aligned"); - return (Value >> 2) & 0x3ffffff; - case FK_Data_1: - case FK_Data_2: - case FK_Data_4: - case FK_Data_8: - return Value; - } -} - -void ARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel) const { - unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - if (!Value) - return; // Doesn't change encoding. - MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); - // Apply any target-specific value adjustments. - Value = adjustFixupValue(Fixup.getKind(), Value); - - // Shift the value into position. - Value <<= Info.TargetOffset; - - unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); - - // For each byte of the fragment that the fixup touches, mask in the - // bits from the fixup value. - for (unsigned i = 0; i != NumBytes; ++i) - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); -} - -bool ARM64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const { - return false; -} - -bool ARM64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { - // FIXME: This isn't correct for ARM64. Just moving the "generic" logic - // into the targets for now. - // - // Relax if the value is too big for a (signed) i8. - return int64_t(Value) != int64_t(int8_t(Value)); -} - -void ARM64AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { - assert(false && "ARM64AsmBackend::relaxInstruction() unimplemented"); -} - -bool ARM64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - // If the count is not 4-byte aligned, we must be writing data into the text - // section (otherwise we have unaligned instructions, and thus have far - // bigger problems), so just write zeros instead. - if ((Count & 3) != 0) { - for (uint64_t i = 0, e = (Count & 3); i != e; ++i) - OW->Write8(0); - } - - // We are properly aligned, so write NOPs as requested. - Count /= 4; - for (uint64_t i = 0; i != Count; ++i) - OW->Write32(0xd503201f); - return true; -} - -namespace { - -namespace CU { - -/// \brief Compact unwind encoding values. -enum CompactUnwindEncodings { - /// \brief A "frameless" leaf function, where no non-volatile registers are - /// saved. The return remains in LR throughout the function. - UNWIND_ARM64_MODE_FRAMELESS = 0x02000000, - - /// \brief No compact unwind encoding available. Instead the low 23-bits of - /// the compact unwind encoding is the offset of the DWARF FDE in the - /// __eh_frame section. This mode is never used in object files. It is only - /// generated by the linker in final linked images, which have only DWARF info - /// for a function. - UNWIND_ARM64_MODE_DWARF = 0x03000000, - - /// \brief This is a standard arm64 prologue where FP/LR are immediately - /// pushed on the stack, then SP is copied to FP. If there are any - /// non-volatile register saved, they are copied into the stack fame in pairs - /// in a contiguous ranger right below the saved FP/LR pair. Any subset of the - /// five X pairs and four D pairs can be saved, but the memory layout must be - /// in register number order. - UNWIND_ARM64_MODE_FRAME = 0x04000000, - - /// \brief Frame register pair encodings. - UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001, - UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002, - UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004, - UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008, - UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010, - UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100, - UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200, - UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400, - UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800 -}; - -} // end CU namespace - -// FIXME: This should be in a separate file. -class DarwinARM64AsmBackend : public ARM64AsmBackend { - const MCRegisterInfo &MRI; - - /// \brief Encode compact unwind stack adjustment for frameless functions. - /// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h. - /// The stack size always needs to be 16 byte aligned. - uint32_t encodeStackAdjustment(uint32_t StackSize) const { - return (StackSize / 16) << 12; - } - -public: - DarwinARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI) - : ARM64AsmBackend(T), MRI(MRI) {} - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createARM64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64, - MachO::CPU_SUBTYPE_ARM64_ALL); - } - - virtual bool doesSectionRequireSymbols(const MCSection &Section) const { - // Any section for which the linker breaks things into atoms needs to - // preserve symbols, including assembler local symbols, to identify - // those atoms. These sections are: - // Sections of type: - // - // S_CSTRING_LITERALS (e.g. __cstring) - // S_LITERAL_POINTERS (e.g. objc selector pointers) - // S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS - // - // Sections named: - // - // __TEXT,__eh_frame - // __TEXT,__ustring - // __DATA,__cfstring - // __DATA,__objc_classrefs - // __DATA,__objc_catlist - // - // FIXME: It would be better if the compiler used actual linker local - // symbols for each of these sections rather than preserving what - // are ostensibly assembler local symbols. - const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section); - return (SMO.getType() == MachO::S_CSTRING_LITERALS || - SMO.getType() == MachO::S_4BYTE_LITERALS || - SMO.getType() == MachO::S_8BYTE_LITERALS || - SMO.getType() == MachO::S_16BYTE_LITERALS || - SMO.getType() == MachO::S_LITERAL_POINTERS || - (SMO.getSegmentName() == "__TEXT" && - (SMO.getSectionName() == "__eh_frame" || - SMO.getSectionName() == "__ustring")) || - (SMO.getSegmentName() == "__DATA" && - (SMO.getSectionName() == "__cfstring" || - SMO.getSectionName() == "__objc_classrefs" || - SMO.getSectionName() == "__objc_catlist"))); - } - - /// \brief Generate the compact unwind encoding from the CFI directives. - virtual uint32_t - generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const - override { - if (Instrs.empty()) - return CU::UNWIND_ARM64_MODE_FRAMELESS; - - bool HasFP = false; - unsigned StackSize = 0; - - uint32_t CompactUnwindEncoding = 0; - for (size_t i = 0, e = Instrs.size(); i != e; ++i) { - const MCCFIInstruction &Inst = Instrs[i]; - - switch (Inst.getOperation()) { - default: - // Cannot handle this directive: bail out. - return CU::UNWIND_ARM64_MODE_DWARF; - case MCCFIInstruction::OpDefCfa: { - // Defines a frame pointer. - assert(getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)) == - ARM64::FP && - "Invalid frame pointer!"); - assert(i + 2 < e && "Insufficient CFI instructions to define a frame!"); - - const MCCFIInstruction &LRPush = Instrs[++i]; - assert(LRPush.getOperation() == MCCFIInstruction::OpOffset && - "Link register not pushed!"); - const MCCFIInstruction &FPPush = Instrs[++i]; - assert(FPPush.getOperation() == MCCFIInstruction::OpOffset && - "Frame pointer not pushed!"); - - unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true); - unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true); - - LRReg = getXRegFromWReg(LRReg); - FPReg = getXRegFromWReg(FPReg); - - assert(LRReg == ARM64::LR && FPReg == ARM64::FP && - "Pushing invalid registers for frame!"); - - // Indicate that the function has a frame. - CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAME; - HasFP = true; - break; - } - case MCCFIInstruction::OpDefCfaOffset: { - assert(StackSize == 0 && "We already have the CFA offset!"); - StackSize = std::abs(Inst.getOffset()); - break; - } - case MCCFIInstruction::OpOffset: { - // Registers are saved in pairs. We expect there to be two consecutive - // `.cfi_offset' instructions with the appropriate registers specified. - unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true); - if (i + 1 == e) - return CU::UNWIND_ARM64_MODE_DWARF; - - const MCCFIInstruction &Inst2 = Instrs[++i]; - if (Inst2.getOperation() != MCCFIInstruction::OpOffset) - return CU::UNWIND_ARM64_MODE_DWARF; - unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true); - - // N.B. The encodings must be in register number order, and the X - // registers before the D registers. - - // X19/X20 pair = 0x00000001, - // X21/X22 pair = 0x00000002, - // X23/X24 pair = 0x00000004, - // X25/X26 pair = 0x00000008, - // X27/X28 pair = 0x00000010 - Reg1 = getXRegFromWReg(Reg1); - Reg2 = getXRegFromWReg(Reg2); - - if (Reg1 == ARM64::X19 && Reg2 == ARM64::X20 && - (CompactUnwindEncoding & 0xF1E) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X19_X20_PAIR; - else if (Reg1 == ARM64::X21 && Reg2 == ARM64::X22 && - (CompactUnwindEncoding & 0xF1C) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X21_X22_PAIR; - else if (Reg1 == ARM64::X23 && Reg2 == ARM64::X24 && - (CompactUnwindEncoding & 0xF18) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X23_X24_PAIR; - else if (Reg1 == ARM64::X25 && Reg2 == ARM64::X26 && - (CompactUnwindEncoding & 0xF10) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X25_X26_PAIR; - else if (Reg1 == ARM64::X27 && Reg2 == ARM64::X28 && - (CompactUnwindEncoding & 0xF00) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X27_X28_PAIR; - else { - Reg1 = getDRegFromBReg(Reg1); - Reg2 = getDRegFromBReg(Reg2); - - // D8/D9 pair = 0x00000100, - // D10/D11 pair = 0x00000200, - // D12/D13 pair = 0x00000400, - // D14/D15 pair = 0x00000800 - if (Reg1 == ARM64::D8 && Reg2 == ARM64::D9 && - (CompactUnwindEncoding & 0xE00) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D8_D9_PAIR; - else if (Reg1 == ARM64::D10 && Reg2 == ARM64::D11 && - (CompactUnwindEncoding & 0xC00) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D10_D11_PAIR; - else if (Reg1 == ARM64::D12 && Reg2 == ARM64::D13 && - (CompactUnwindEncoding & 0x800) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D12_D13_PAIR; - else if (Reg1 == ARM64::D14 && Reg2 == ARM64::D15) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D14_D15_PAIR; - else - // A pair was pushed which we cannot handle. - return CU::UNWIND_ARM64_MODE_DWARF; - } - - break; - } - } - } - - if (!HasFP) { - // With compact unwind info we can only represent stack adjustments of up - // to 65520 bytes. - if (StackSize > 65520) - return CU::UNWIND_ARM64_MODE_DWARF; - - CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAMELESS; - CompactUnwindEncoding |= encodeStackAdjustment(StackSize); - } - - return CompactUnwindEncoding; - } -}; - -} // end anonymous namespace - -namespace { - -class ELFARM64AsmBackend : public ARM64AsmBackend { -public: - uint8_t OSABI; - - ELFARM64AsmBackend(const Target &T, uint8_t OSABI) - : ARM64AsmBackend(T), OSABI(OSABI) {} - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createARM64ELFObjectWriter(OS, OSABI); - } - - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; -}; - -void ELFARM64AsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, - uint64_t &Value, bool &IsResolved) { - // The ADRP instruction adds some multiple of 0x1000 to the current PC & - // ~0xfff. This means that the required offset to reach a symbol can vary by - // up to one step depending on where the ADRP is in memory. For example: - // - // ADRP x0, there - // there: - // - // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and - // we'll need that as an offset. At any other address "there" will be in the - // same page as the ADRP and the instruction should encode 0x0. Assuming the - // section isn't 0x1000-aligned, we therefore need to delegate this decision - // to the linker -- a relocation! - if ((uint32_t)Fixup.getKind() == ARM64::fixup_arm64_pcrel_adrp_imm21) - IsResolved = false; -} -} - -MCAsmBackend *llvm::createARM64AsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return new DarwinARM64AsmBackend(T, MRI); - - assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target"); - return new ELFARM64AsmBackend(T, TheTriple.getOS()); -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h b/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h deleted file mode 100644 index d3c2cf7..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h +++ /dev/null @@ -1,998 +0,0 @@ -//===-- ARM64BaseInfo.h - Top level definitions for ARM64 -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone helper functions and enum definitions for -// the ARM64 target useful for the compiler back-end and the MC libraries. -// As such, it deliberately does not include references to LLVM core -// code gen types, passes, etc.. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64BASEINFO_H -#define ARM64BASEINFO_H - -#include "ARM64MCTargetDesc.h" -#include "llvm/Support/ErrorHandling.h" - -namespace llvm { - -inline static unsigned getWRegFromXReg(unsigned Reg) { - switch (Reg) { - case ARM64::X0: return ARM64::W0; - case ARM64::X1: return ARM64::W1; - case ARM64::X2: return ARM64::W2; - case ARM64::X3: return ARM64::W3; - case ARM64::X4: return ARM64::W4; - case ARM64::X5: return ARM64::W5; - case ARM64::X6: return ARM64::W6; - case ARM64::X7: return ARM64::W7; - case ARM64::X8: return ARM64::W8; - case ARM64::X9: return ARM64::W9; - case ARM64::X10: return ARM64::W10; - case ARM64::X11: return ARM64::W11; - case ARM64::X12: return ARM64::W12; - case ARM64::X13: return ARM64::W13; - case ARM64::X14: return ARM64::W14; - case ARM64::X15: return ARM64::W15; - case ARM64::X16: return ARM64::W16; - case ARM64::X17: return ARM64::W17; - case ARM64::X18: return ARM64::W18; - case ARM64::X19: return ARM64::W19; - case ARM64::X20: return ARM64::W20; - case ARM64::X21: return ARM64::W21; - case ARM64::X22: return ARM64::W22; - case ARM64::X23: return ARM64::W23; - case ARM64::X24: return ARM64::W24; - case ARM64::X25: return ARM64::W25; - case ARM64::X26: return ARM64::W26; - case ARM64::X27: return ARM64::W27; - case ARM64::X28: return ARM64::W28; - case ARM64::FP: return ARM64::W29; - case ARM64::LR: return ARM64::W30; - case ARM64::SP: return ARM64::WSP; - case ARM64::XZR: return ARM64::WZR; - } - // For anything else, return it unchanged. - return Reg; -} - -inline static unsigned getXRegFromWReg(unsigned Reg) { - switch (Reg) { - case ARM64::W0: return ARM64::X0; - case ARM64::W1: return ARM64::X1; - case ARM64::W2: return ARM64::X2; - case ARM64::W3: return ARM64::X3; - case ARM64::W4: return ARM64::X4; - case ARM64::W5: return ARM64::X5; - case ARM64::W6: return ARM64::X6; - case ARM64::W7: return ARM64::X7; - case ARM64::W8: return ARM64::X8; - case ARM64::W9: return ARM64::X9; - case ARM64::W10: return ARM64::X10; - case ARM64::W11: return ARM64::X11; - case ARM64::W12: return ARM64::X12; - case ARM64::W13: return ARM64::X13; - case ARM64::W14: return ARM64::X14; - case ARM64::W15: return ARM64::X15; - case ARM64::W16: return ARM64::X16; - case ARM64::W17: return ARM64::X17; - case ARM64::W18: return ARM64::X18; - case ARM64::W19: return ARM64::X19; - case ARM64::W20: return ARM64::X20; - case ARM64::W21: return ARM64::X21; - case ARM64::W22: return ARM64::X22; - case ARM64::W23: return ARM64::X23; - case ARM64::W24: return ARM64::X24; - case ARM64::W25: return ARM64::X25; - case ARM64::W26: return ARM64::X26; - case ARM64::W27: return ARM64::X27; - case ARM64::W28: return ARM64::X28; - case ARM64::W29: return ARM64::FP; - case ARM64::W30: return ARM64::LR; - case ARM64::WSP: return ARM64::SP; - case ARM64::WZR: return ARM64::XZR; - } - // For anything else, return it unchanged. - return Reg; -} - -static inline unsigned getBRegFromDReg(unsigned Reg) { - switch (Reg) { - case ARM64::D0: return ARM64::B0; - case ARM64::D1: return ARM64::B1; - case ARM64::D2: return ARM64::B2; - case ARM64::D3: return ARM64::B3; - case ARM64::D4: return ARM64::B4; - case ARM64::D5: return ARM64::B5; - case ARM64::D6: return ARM64::B6; - case ARM64::D7: return ARM64::B7; - case ARM64::D8: return ARM64::B8; - case ARM64::D9: return ARM64::B9; - case ARM64::D10: return ARM64::B10; - case ARM64::D11: return ARM64::B11; - case ARM64::D12: return ARM64::B12; - case ARM64::D13: return ARM64::B13; - case ARM64::D14: return ARM64::B14; - case ARM64::D15: return ARM64::B15; - case ARM64::D16: return ARM64::B16; - case ARM64::D17: return ARM64::B17; - case ARM64::D18: return ARM64::B18; - case ARM64::D19: return ARM64::B19; - case ARM64::D20: return ARM64::B20; - case ARM64::D21: return ARM64::B21; - case ARM64::D22: return ARM64::B22; - case ARM64::D23: return ARM64::B23; - case ARM64::D24: return ARM64::B24; - case ARM64::D25: return ARM64::B25; - case ARM64::D26: return ARM64::B26; - case ARM64::D27: return ARM64::B27; - case ARM64::D28: return ARM64::B28; - case ARM64::D29: return ARM64::B29; - case ARM64::D30: return ARM64::B30; - case ARM64::D31: return ARM64::B31; - } - // For anything else, return it unchanged. - return Reg; -} - - -static inline unsigned getDRegFromBReg(unsigned Reg) { - switch (Reg) { - case ARM64::B0: return ARM64::D0; - case ARM64::B1: return ARM64::D1; - case ARM64::B2: return ARM64::D2; - case ARM64::B3: return ARM64::D3; - case ARM64::B4: return ARM64::D4; - case ARM64::B5: return ARM64::D5; - case ARM64::B6: return ARM64::D6; - case ARM64::B7: return ARM64::D7; - case ARM64::B8: return ARM64::D8; - case ARM64::B9: return ARM64::D9; - case ARM64::B10: return ARM64::D10; - case ARM64::B11: return ARM64::D11; - case ARM64::B12: return ARM64::D12; - case ARM64::B13: return ARM64::D13; - case ARM64::B14: return ARM64::D14; - case ARM64::B15: return ARM64::D15; - case ARM64::B16: return ARM64::D16; - case ARM64::B17: return ARM64::D17; - case ARM64::B18: return ARM64::D18; - case ARM64::B19: return ARM64::D19; - case ARM64::B20: return ARM64::D20; - case ARM64::B21: return ARM64::D21; - case ARM64::B22: return ARM64::D22; - case ARM64::B23: return ARM64::D23; - case ARM64::B24: return ARM64::D24; - case ARM64::B25: return ARM64::D25; - case ARM64::B26: return ARM64::D26; - case ARM64::B27: return ARM64::D27; - case ARM64::B28: return ARM64::D28; - case ARM64::B29: return ARM64::D29; - case ARM64::B30: return ARM64::D30; - case ARM64::B31: return ARM64::D31; - } - // For anything else, return it unchanged. - return Reg; -} - -namespace ARM64CC { - -// The CondCodes constants map directly to the 4-bit encoding of the condition -// field for predicated instructions. -enum CondCode { // Meaning (integer) Meaning (floating-point) - EQ = 0x0, // Equal Equal - NE = 0x1, // Not equal Not equal, or unordered - CS = 0x2, // Carry set >, ==, or unordered - CC = 0x3, // Carry clear Less than - MI = 0x4, // Minus, negative Less than - PL = 0x5, // Plus, positive or zero >, ==, or unordered - VS = 0x6, // Overflow Unordered - VC = 0x7, // No overflow Not unordered - HI = 0x8, // Unsigned higher Greater than, or unordered - LS = 0x9, // Unsigned lower or same Less than or equal - GE = 0xa, // Greater than or equal Greater than or equal - LT = 0xb, // Less than Less than, or unordered - GT = 0xc, // Greater than Greater than - LE = 0xd, // Less than or equal <, ==, or unordered - AL = 0xe // Always (unconditional) Always (unconditional) -}; - -inline static const char *getCondCodeName(CondCode Code) { - // cond<0> is ignored when cond<3:1> = 111, where 1110 is 0xe (aka AL). - if ((Code & AL) == AL) - Code = AL; - switch (Code) { - case EQ: return "eq"; - case NE: return "ne"; - case CS: return "cs"; - case CC: return "cc"; - case MI: return "mi"; - case PL: return "pl"; - case VS: return "vs"; - case VC: return "vc"; - case HI: return "hi"; - case LS: return "ls"; - case GE: return "ge"; - case LT: return "lt"; - case GT: return "gt"; - case LE: return "le"; - case AL: return "al"; - } - llvm_unreachable("Unknown condition code"); -} - -inline static CondCode getInvertedCondCode(CondCode Code) { - switch (Code) { - default: llvm_unreachable("Unknown condition code"); - case EQ: return NE; - case NE: return EQ; - case CS: return CC; - case CC: return CS; - case MI: return PL; - case PL: return MI; - case VS: return VC; - case VC: return VS; - case HI: return LS; - case LS: return HI; - case GE: return LT; - case LT: return GE; - case GT: return LE; - case LE: return GT; - } -} - -/// Given a condition code, return NZCV flags that would satisfy that condition. -/// The flag bits are in the format expected by the ccmp instructions. -/// Note that many different flag settings can satisfy a given condition code, -/// this function just returns one of them. -inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) { - // NZCV flags encoded as expected by ccmp instructions, ARMv8 ISA 5.5.7. - enum { N = 8, Z = 4, C = 2, V = 1 }; - switch (Code) { - default: llvm_unreachable("Unknown condition code"); - case EQ: return Z; // Z == 1 - case NE: return 0; // Z == 0 - case CS: return C; // C == 1 - case CC: return 0; // C == 0 - case MI: return N; // N == 1 - case PL: return 0; // N == 0 - case VS: return V; // V == 1 - case VC: return 0; // V == 0 - case HI: return C; // C == 1 && Z == 0 - case LS: return 0; // C == 0 || Z == 1 - case GE: return 0; // N == V - case LT: return N; // N != V - case GT: return 0; // Z == 0 && N == V - case LE: return Z; // Z == 1 || N != V - } -} -} // end namespace ARM64CC - -namespace ARM64SYS { -enum BarrierOption { - InvalidBarrier = 0xff, - OSHLD = 0x1, - OSHST = 0x2, - OSH = 0x3, - NSHLD = 0x5, - NSHST = 0x6, - NSH = 0x7, - ISHLD = 0x9, - ISHST = 0xa, - ISH = 0xb, - LD = 0xd, - ST = 0xe, - SY = 0xf -}; - -inline static const char *getBarrierOptName(BarrierOption Opt) { - switch (Opt) { - default: return NULL; - case 0x1: return "oshld"; - case 0x2: return "oshst"; - case 0x3: return "osh"; - case 0x5: return "nshld"; - case 0x6: return "nshst"; - case 0x7: return "nsh"; - case 0x9: return "ishld"; - case 0xa: return "ishst"; - case 0xb: return "ish"; - case 0xd: return "ld"; - case 0xe: return "st"; - case 0xf: return "sy"; - } -} - -#define A64_SYSREG_ENC(op0,CRn,op2,CRm,op1) ((op0) << 14 | (op1) << 11 | \ - (CRn) << 7 | (CRm) << 3 | (op2)) -enum SystemRegister { - InvalidSystemReg = 0, - // Table in section 3.10.3 - SPSR_EL1 = 0xc200, - SPSR_svc = SPSR_EL1, - ELR_EL1 = 0xc201, - SP_EL0 = 0xc208, - SPSel = 0xc210, - CurrentEL = 0xc212, - DAIF = 0xda11, - NZCV = 0xda10, - FPCR = 0xda20, - FPSR = 0xda21, - DSPSR = 0xda28, - DLR = 0xda29, - SPSR_EL2 = 0xe200, - SPSR_hyp = SPSR_EL2, - ELR_EL2 = 0xe201, - SP_EL1 = 0xe208, - SPSR_irq = 0xe218, - SPSR_abt = 0xe219, - SPSR_und = 0xe21a, - SPSR_fiq = 0xe21b, - SPSR_EL3 = 0xf200, - ELR_EL3 = 0xf201, - SP_EL2 = 0xf208, - - - // Table in section 3.10.8 - MIDR_EL1 = 0xc000, - CTR_EL0 = 0xd801, - MPIDR_EL1 = 0xc005, - ECOIDR_EL1 = 0xc006, - DCZID_EL0 = 0xd807, - MVFR0_EL1 = 0xc018, - MVFR1_EL1 = 0xc019, - ID_AA64PFR0_EL1 = 0xc020, - ID_AA64PFR1_EL1 = 0xc021, - ID_AA64DFR0_EL1 = 0xc028, - ID_AA64DFR1_EL1 = 0xc029, - ID_AA64ISAR0_EL1 = 0xc030, - ID_AA64ISAR1_EL1 = 0xc031, - ID_AA64MMFR0_EL1 = 0xc038, - ID_AA64MMFR1_EL1 = 0xc039, - CCSIDR_EL1 = 0xc800, - CLIDR_EL1 = 0xc801, - AIDR_EL1 = 0xc807, - CSSELR_EL1 = 0xd000, - VPIDR_EL2 = 0xe000, - VMPIDR_EL2 = 0xe005, - SCTLR_EL1 = 0xc080, - SCTLR_EL2 = 0xe080, - SCTLR_EL3 = 0xf080, - ACTLR_EL1 = 0xc081, - ACTLR_EL2 = 0xe081, - ACTLR_EL3 = 0xf081, - CPACR_EL1 = 0xc082, - CPTR_EL2 = 0xe08a, - CPTR_EL3 = 0xf08a, - SCR_EL3 = 0xf088, - HCR_EL2 = 0xe088, - MDCR_EL2 = 0xe089, - MDCR_EL3 = 0xf099, - HSTR_EL2 = 0xe08b, - HACR_EL2 = 0xe08f, - TTBR0_EL1 = 0xc100, - TTBR1_EL1 = 0xc101, - TTBR0_EL2 = 0xe100, - TTBR0_EL3 = 0xf100, - VTTBR_EL2 = 0xe108, - TCR_EL1 = 0xc102, - TCR_EL2 = 0xe102, - TCR_EL3 = 0xf102, - VTCR_EL2 = 0xe10a, - ADFSR_EL1 = 0xc288, - AIFSR_EL1 = 0xc289, - ADFSR_EL2 = 0xe288, - AIFSR_EL2 = 0xe289, - ADFSR_EL3 = 0xf288, - AIFSR_EL3 = 0xf289, - ESR_EL1 = 0xc290, - ESR_EL2 = 0xe290, - ESR_EL3 = 0xf290, - FAR_EL1 = 0xc300, - FAR_EL2 = 0xe300, - FAR_EL3 = 0xf300, - HPFAR_EL2 = 0xe304, - PAR_EL1 = 0xc3a0, - MAIR_EL1 = 0xc510, - MAIR_EL2 = 0xe510, - MAIR_EL3 = 0xf510, - AMAIR_EL1 = 0xc518, - AMAIR_EL2 = 0xe518, - AMAIR_EL3 = 0xf518, - VBAR_EL1 = 0xc600, - VBAR_EL2 = 0xe600, - VBAR_EL3 = 0xf600, - RVBAR_EL1 = 0xc601, - RVBAR_EL2 = 0xe601, - RVBAR_EL3 = 0xf601, - ISR_EL1 = 0xc608, - CONTEXTIDR_EL1 = 0xc681, - TPIDR_EL0 = 0xde82, - TPIDRRO_EL0 = 0xde83, - TPIDR_EL1 = 0xc684, - TPIDR_EL2 = 0xe682, - TPIDR_EL3 = 0xf682, - TEECR32_EL1 = 0x9000, - CNTFRQ_EL0 = 0xdf00, - CNTPCT_EL0 = 0xdf01, - CNTVCT_EL0 = 0xdf02, - CNTVOFF_EL2 = 0xe703, - CNTKCTL_EL1 = 0xc708, - CNTHCTL_EL2 = 0xe708, - CNTP_TVAL_EL0 = 0xdf10, - CNTP_CTL_EL0 = 0xdf11, - CNTP_CVAL_EL0 = 0xdf12, - CNTV_TVAL_EL0 = 0xdf18, - CNTV_CTL_EL0 = 0xdf19, - CNTV_CVAL_EL0 = 0xdf1a, - CNTHP_TVAL_EL2 = 0xe710, - CNTHP_CTL_EL2 = 0xe711, - CNTHP_CVAL_EL2 = 0xe712, - CNTPS_TVAL_EL1 = 0xff10, - CNTPS_CTL_EL1 = 0xff11, - CNTPS_CVAL_EL1= 0xff12, - - PMEVCNTR0_EL0 = 0xdf40, - PMEVCNTR1_EL0 = 0xdf41, - PMEVCNTR2_EL0 = 0xdf42, - PMEVCNTR3_EL0 = 0xdf43, - PMEVCNTR4_EL0 = 0xdf44, - PMEVCNTR5_EL0 = 0xdf45, - PMEVCNTR6_EL0 = 0xdf46, - PMEVCNTR7_EL0 = 0xdf47, - PMEVCNTR8_EL0 = 0xdf48, - PMEVCNTR9_EL0 = 0xdf49, - PMEVCNTR10_EL0 = 0xdf4a, - PMEVCNTR11_EL0 = 0xdf4b, - PMEVCNTR12_EL0 = 0xdf4c, - PMEVCNTR13_EL0 = 0xdf4d, - PMEVCNTR14_EL0 = 0xdf4e, - PMEVCNTR15_EL0 = 0xdf4f, - PMEVCNTR16_EL0 = 0xdf50, - PMEVCNTR17_EL0 = 0xdf51, - PMEVCNTR18_EL0 = 0xdf52, - PMEVCNTR19_EL0 = 0xdf53, - PMEVCNTR20_EL0 = 0xdf54, - PMEVCNTR21_EL0 = 0xdf55, - PMEVCNTR22_EL0 = 0xdf56, - PMEVCNTR23_EL0 = 0xdf57, - PMEVCNTR24_EL0 = 0xdf58, - PMEVCNTR25_EL0 = 0xdf59, - PMEVCNTR26_EL0 = 0xdf5a, - PMEVCNTR27_EL0 = 0xdf5b, - PMEVCNTR28_EL0 = 0xdf5c, - PMEVCNTR29_EL0 = 0xdf5d, - PMEVCNTR30_EL0 = 0xdf5e, - - PMEVTYPER0_EL0 = 0xdf60, - PMEVTYPER1_EL0 = 0xdf61, - PMEVTYPER2_EL0 = 0xdf62, - PMEVTYPER3_EL0 = 0xdf63, - PMEVTYPER4_EL0 = 0xdf64, - PMEVTYPER5_EL0 = 0xdf65, - PMEVTYPER6_EL0 = 0xdf66, - PMEVTYPER7_EL0 = 0xdf67, - PMEVTYPER8_EL0 = 0xdf68, - PMEVTYPER9_EL0 = 0xdf69, - PMEVTYPER10_EL0 = 0xdf6a, - PMEVTYPER11_EL0 = 0xdf6b, - PMEVTYPER12_EL0 = 0xdf6c, - PMEVTYPER13_EL0 = 0xdf6d, - PMEVTYPER14_EL0 = 0xdf6e, - PMEVTYPER15_EL0 = 0xdf6f, - PMEVTYPER16_EL0 = 0xdf70, - PMEVTYPER17_EL0 = 0xdf71, - PMEVTYPER18_EL0 = 0xdf72, - PMEVTYPER19_EL0 = 0xdf73, - PMEVTYPER20_EL0 = 0xdf74, - PMEVTYPER21_EL0 = 0xdf75, - PMEVTYPER22_EL0 = 0xdf76, - PMEVTYPER23_EL0 = 0xdf77, - PMEVTYPER24_EL0 = 0xdf78, - PMEVTYPER25_EL0 = 0xdf79, - PMEVTYPER26_EL0 = 0xdf7a, - PMEVTYPER27_EL0 = 0xdf7b, - PMEVTYPER28_EL0 = 0xdf7c, - PMEVTYPER29_EL0 = 0xdf7d, - PMEVTYPER30_EL0 = 0xdf7e, - - PMCCFILTR_EL0 = 0xdf7f, - - RMR_EL3 = 0xf602, - RMR_EL2 = 0xd602, - RMR_EL1 = 0xce02, - - // Debug Architecture 5.3, Table 17. - MDCCSR_EL0 = A64_SYSREG_ENC(2, 0, 0, 1, 3), - MDCCINT_EL1 = A64_SYSREG_ENC(2, 0, 0, 2, 0), - DBGDTR_EL0 = A64_SYSREG_ENC(2, 0, 0, 4, 3), - DBGDTRRX_EL0 = A64_SYSREG_ENC(2, 0, 0, 5, 3), - DBGDTRTX_EL0 = DBGDTRRX_EL0, - DBGVCR32_EL2 = A64_SYSREG_ENC(2, 0, 0, 7, 4), - OSDTRRX_EL1 = A64_SYSREG_ENC(2, 0, 2, 0, 0), - MDSCR_EL1 = A64_SYSREG_ENC(2, 0, 2, 2, 0), - OSDTRTX_EL1 = A64_SYSREG_ENC(2, 0, 2, 3, 0), - OSECCR_EL11 = A64_SYSREG_ENC(2, 0, 2, 6, 0), - - DBGBVR0_EL1 = A64_SYSREG_ENC(2, 0, 4, 0, 0), - DBGBVR1_EL1 = A64_SYSREG_ENC(2, 0, 4, 1, 0), - DBGBVR2_EL1 = A64_SYSREG_ENC(2, 0, 4, 2, 0), - DBGBVR3_EL1 = A64_SYSREG_ENC(2, 0, 4, 3, 0), - DBGBVR4_EL1 = A64_SYSREG_ENC(2, 0, 4, 4, 0), - DBGBVR5_EL1 = A64_SYSREG_ENC(2, 0, 4, 5, 0), - DBGBVR6_EL1 = A64_SYSREG_ENC(2, 0, 4, 6, 0), - DBGBVR7_EL1 = A64_SYSREG_ENC(2, 0, 4, 7, 0), - DBGBVR8_EL1 = A64_SYSREG_ENC(2, 0, 4, 8, 0), - DBGBVR9_EL1 = A64_SYSREG_ENC(2, 0, 4, 9, 0), - DBGBVR10_EL1 = A64_SYSREG_ENC(2, 0, 4, 10, 0), - DBGBVR11_EL1 = A64_SYSREG_ENC(2, 0, 4, 11, 0), - DBGBVR12_EL1 = A64_SYSREG_ENC(2, 0, 4, 12, 0), - DBGBVR13_EL1 = A64_SYSREG_ENC(2, 0, 4, 13, 0), - DBGBVR14_EL1 = A64_SYSREG_ENC(2, 0, 4, 14, 0), - DBGBVR15_EL1 = A64_SYSREG_ENC(2, 0, 4, 15, 0), - - DBGBCR0_EL1 = A64_SYSREG_ENC(2, 0, 5, 0, 0), - DBGBCR1_EL1 = A64_SYSREG_ENC(2, 0, 5, 1, 0), - DBGBCR2_EL1 = A64_SYSREG_ENC(2, 0, 5, 2, 0), - DBGBCR3_EL1 = A64_SYSREG_ENC(2, 0, 5, 3, 0), - DBGBCR4_EL1 = A64_SYSREG_ENC(2, 0, 5, 4, 0), - DBGBCR5_EL1 = A64_SYSREG_ENC(2, 0, 5, 5, 0), - DBGBCR6_EL1 = A64_SYSREG_ENC(2, 0, 5, 6, 0), - DBGBCR7_EL1 = A64_SYSREG_ENC(2, 0, 5, 7, 0), - DBGBCR8_EL1 = A64_SYSREG_ENC(2, 0, 5, 8, 0), - DBGBCR9_EL1 = A64_SYSREG_ENC(2, 0, 5, 9, 0), - DBGBCR10_EL1 = A64_SYSREG_ENC(2, 0, 5, 10, 0), - DBGBCR11_EL1 = A64_SYSREG_ENC(2, 0, 5, 11, 0), - DBGBCR12_EL1 = A64_SYSREG_ENC(2, 0, 5, 12, 0), - DBGBCR13_EL1 = A64_SYSREG_ENC(2, 0, 5, 13, 0), - DBGBCR14_EL1 = A64_SYSREG_ENC(2, 0, 5, 14, 0), - DBGBCR15_EL1 = A64_SYSREG_ENC(2, 0, 5, 15, 0), - - DBGWVR0_EL1 = A64_SYSREG_ENC(2, 0, 6, 0, 0), - DBGWVR1_EL1 = A64_SYSREG_ENC(2, 0, 6, 1, 0), - DBGWVR2_EL1 = A64_SYSREG_ENC(2, 0, 6, 2, 0), - DBGWVR3_EL1 = A64_SYSREG_ENC(2, 0, 6, 3, 0), - DBGWVR4_EL1 = A64_SYSREG_ENC(2, 0, 6, 4, 0), - DBGWVR5_EL1 = A64_SYSREG_ENC(2, 0, 6, 5, 0), - DBGWVR6_EL1 = A64_SYSREG_ENC(2, 0, 6, 6, 0), - DBGWVR7_EL1 = A64_SYSREG_ENC(2, 0, 6, 7, 0), - DBGWVR8_EL1 = A64_SYSREG_ENC(2, 0, 6, 8, 0), - DBGWVR9_EL1 = A64_SYSREG_ENC(2, 0, 6, 9, 0), - DBGWVR10_EL1 = A64_SYSREG_ENC(2, 0, 6, 10, 0), - DBGWVR11_EL1 = A64_SYSREG_ENC(2, 0, 6, 11, 0), - DBGWVR12_EL1 = A64_SYSREG_ENC(2, 0, 6, 12, 0), - DBGWVR13_EL1 = A64_SYSREG_ENC(2, 0, 6, 13, 0), - DBGWVR14_EL1 = A64_SYSREG_ENC(2, 0, 6, 14, 0), - DBGWVR15_EL1 = A64_SYSREG_ENC(2, 0, 6, 15, 0), - - DBGWCR0_EL1 = A64_SYSREG_ENC(2, 0, 7, 0, 0), - DBGWCR1_EL1 = A64_SYSREG_ENC(2, 0, 7, 1, 0), - DBGWCR2_EL1 = A64_SYSREG_ENC(2, 0, 7, 2, 0), - DBGWCR3_EL1 = A64_SYSREG_ENC(2, 0, 7, 3, 0), - DBGWCR4_EL1 = A64_SYSREG_ENC(2, 0, 7, 4, 0), - DBGWCR5_EL1 = A64_SYSREG_ENC(2, 0, 7, 5, 0), - DBGWCR6_EL1 = A64_SYSREG_ENC(2, 0, 7, 6, 0), - DBGWCR7_EL1 = A64_SYSREG_ENC(2, 0, 7, 7, 0), - DBGWCR8_EL1 = A64_SYSREG_ENC(2, 0, 7, 8, 0), - DBGWCR9_EL1 = A64_SYSREG_ENC(2, 0, 7, 9, 0), - DBGWCR10_EL1 = A64_SYSREG_ENC(2, 0, 7, 10, 0), - DBGWCR11_EL1 = A64_SYSREG_ENC(2, 0, 7, 11, 0), - DBGWCR12_EL1 = A64_SYSREG_ENC(2, 0, 7, 12, 0), - DBGWCR13_EL1 = A64_SYSREG_ENC(2, 0, 7, 13, 0), - DBGWCR14_EL1 = A64_SYSREG_ENC(2, 0, 7, 14, 0), - DBGWCR15_EL1 = A64_SYSREG_ENC(2, 0, 7, 15, 0), - - MDRAR_EL1 = A64_SYSREG_ENC(2, 1, 0, 0, 0), - OSLAR_EL1 = A64_SYSREG_ENC(2, 1, 4, 0, 0), - OSLSR_EL1 = A64_SYSREG_ENC(2, 1, 4, 1, 0), - OSDLR_EL1 = A64_SYSREG_ENC(2, 1, 4, 3, 0), - DBGPRCR_EL1 = A64_SYSREG_ENC(2, 1, 4, 4, 0), - - DBGCLAIMSET_EL1 = A64_SYSREG_ENC(2, 7, 6, 8, 0), - DBGCLAIMCLR_EL1 = A64_SYSREG_ENC(2, 7, 6, 9, 0), - DBGAUTHSTATUS_EL1 = A64_SYSREG_ENC(2, 7, 6, 14, 0), - - DBGDEVID2 = A64_SYSREG_ENC(2, 7, 7, 0, 0), - DBGDEVID1 = A64_SYSREG_ENC(2, 7, 7, 1, 0), - DBGDEVID0 = A64_SYSREG_ENC(2, 7, 7, 2, 0), - - // The following registers are defined to allow access from AArch64 to - // registers which are only used in the AArch32 architecture. - DACR32_EL2 = 0xe180, - IFSR32_EL2 = 0xe281, - TEEHBR32_EL1 = 0x9080, - SDER32_EL3 = 0xf089, - FPEXC32_EL2 = 0xe298, - - // Cyclone specific system registers - CPM_IOACC_CTL_EL3 = 0xff90, - - // Architectural system registers - ID_PFR0_EL1 = 0xc008, - ID_PFR1_EL1 = 0xc009, - ID_DFR0_EL1 = 0xc00a, - ID_AFR0_EL1 = 0xc00b, - ID_ISAR0_EL1 = 0xc010, - ID_ISAR1_EL1 = 0xc011, - ID_ISAR2_EL1 = 0xc012, - ID_ISAR3_EL1 = 0xc013, - ID_ISAR4_EL1 = 0xc014, - ID_ISAR5_EL1 = 0xc015, - AFSR1_EL1 = 0xc289, // note same as old AIFSR_EL1 - AFSR0_EL1 = 0xc288, // note same as old ADFSR_EL1 - REVIDR_EL1 = 0xc006 // note same as old ECOIDR_EL1 - -}; -#undef A64_SYSREG_ENC - -static inline const char *getSystemRegisterName(SystemRegister Reg) { - switch(Reg) { - default: return NULL; // Caller is responsible for handling invalid value. - case SPSR_EL1: return "SPSR_EL1"; - case ELR_EL1: return "ELR_EL1"; - case SP_EL0: return "SP_EL0"; - case SPSel: return "SPSel"; - case DAIF: return "DAIF"; - case CurrentEL: return "CurrentEL"; - case NZCV: return "NZCV"; - case FPCR: return "FPCR"; - case FPSR: return "FPSR"; - case DSPSR: return "DSPSR"; - case DLR: return "DLR"; - case SPSR_EL2: return "SPSR_EL2"; - case ELR_EL2: return "ELR_EL2"; - case SP_EL1: return "SP_EL1"; - case SPSR_irq: return "SPSR_irq"; - case SPSR_abt: return "SPSR_abt"; - case SPSR_und: return "SPSR_und"; - case SPSR_fiq: return "SPSR_fiq"; - case SPSR_EL3: return "SPSR_EL3"; - case ELR_EL3: return "ELR_EL3"; - case SP_EL2: return "SP_EL2"; - case MIDR_EL1: return "MIDR_EL1"; - case CTR_EL0: return "CTR_EL0"; - case MPIDR_EL1: return "MPIDR_EL1"; - case DCZID_EL0: return "DCZID_EL0"; - case MVFR0_EL1: return "MVFR0_EL1"; - case MVFR1_EL1: return "MVFR1_EL1"; - case ID_AA64PFR0_EL1: return "ID_AA64PFR0_EL1"; - case ID_AA64PFR1_EL1: return "ID_AA64PFR1_EL1"; - case ID_AA64DFR0_EL1: return "ID_AA64DFR0_EL1"; - case ID_AA64DFR1_EL1: return "ID_AA64DFR1_EL1"; - case ID_AA64ISAR0_EL1: return "ID_AA64ISAR0_EL1"; - case ID_AA64ISAR1_EL1: return "ID_AA64ISAR1_EL1"; - case ID_AA64MMFR0_EL1: return "ID_AA64MMFR0_EL1"; - case ID_AA64MMFR1_EL1: return "ID_AA64MMFR1_EL1"; - case CCSIDR_EL1: return "CCSIDR_EL1"; - case CLIDR_EL1: return "CLIDR_EL1"; - case AIDR_EL1: return "AIDR_EL1"; - case CSSELR_EL1: return "CSSELR_EL1"; - case VPIDR_EL2: return "VPIDR_EL2"; - case VMPIDR_EL2: return "VMPIDR_EL2"; - case SCTLR_EL1: return "SCTLR_EL1"; - case SCTLR_EL2: return "SCTLR_EL2"; - case SCTLR_EL3: return "SCTLR_EL3"; - case ACTLR_EL1: return "ACTLR_EL1"; - case ACTLR_EL2: return "ACTLR_EL2"; - case ACTLR_EL3: return "ACTLR_EL3"; - case CPACR_EL1: return "CPACR_EL1"; - case CPTR_EL2: return "CPTR_EL2"; - case CPTR_EL3: return "CPTR_EL3"; - case SCR_EL3: return "SCR_EL3"; - case HCR_EL2: return "HCR_EL2"; - case MDCR_EL2: return "MDCR_EL2"; - case MDCR_EL3: return "MDCR_EL3"; - case HSTR_EL2: return "HSTR_EL2"; - case HACR_EL2: return "HACR_EL2"; - case TTBR0_EL1: return "TTBR0_EL1"; - case TTBR1_EL1: return "TTBR1_EL1"; - case TTBR0_EL2: return "TTBR0_EL2"; - case TTBR0_EL3: return "TTBR0_EL3"; - case VTTBR_EL2: return "VTTBR_EL2"; - case TCR_EL1: return "TCR_EL1"; - case TCR_EL2: return "TCR_EL2"; - case TCR_EL3: return "TCR_EL3"; - case VTCR_EL2: return "VTCR_EL2"; - case ADFSR_EL2: return "ADFSR_EL2"; - case AIFSR_EL2: return "AIFSR_EL2"; - case ADFSR_EL3: return "ADFSR_EL3"; - case AIFSR_EL3: return "AIFSR_EL3"; - case ESR_EL1: return "ESR_EL1"; - case ESR_EL2: return "ESR_EL2"; - case ESR_EL3: return "ESR_EL3"; - case FAR_EL1: return "FAR_EL1"; - case FAR_EL2: return "FAR_EL2"; - case FAR_EL3: return "FAR_EL3"; - case HPFAR_EL2: return "HPFAR_EL2"; - case PAR_EL1: return "PAR_EL1"; - case MAIR_EL1: return "MAIR_EL1"; - case MAIR_EL2: return "MAIR_EL2"; - case MAIR_EL3: return "MAIR_EL3"; - case AMAIR_EL1: return "AMAIR_EL1"; - case AMAIR_EL2: return "AMAIR_EL2"; - case AMAIR_EL3: return "AMAIR_EL3"; - case VBAR_EL1: return "VBAR_EL1"; - case VBAR_EL2: return "VBAR_EL2"; - case VBAR_EL3: return "VBAR_EL3"; - case RVBAR_EL1: return "RVBAR_EL1"; - case RVBAR_EL2: return "RVBAR_EL2"; - case RVBAR_EL3: return "RVBAR_EL3"; - case ISR_EL1: return "ISR_EL1"; - case CONTEXTIDR_EL1: return "CONTEXTIDR_EL1"; - case TPIDR_EL0: return "TPIDR_EL0"; - case TPIDRRO_EL0: return "TPIDRRO_EL0"; - case TPIDR_EL1: return "TPIDR_EL1"; - case TPIDR_EL2: return "TPIDR_EL2"; - case TPIDR_EL3: return "TPIDR_EL3"; - case TEECR32_EL1: return "TEECR32_EL1"; - case CNTFRQ_EL0: return "CNTFRQ_EL0"; - case CNTPCT_EL0: return "CNTPCT_EL0"; - case CNTVCT_EL0: return "CNTVCT_EL0"; - case CNTVOFF_EL2: return "CNTVOFF_EL2"; - case CNTKCTL_EL1: return "CNTKCTL_EL1"; - case CNTHCTL_EL2: return "CNTHCTL_EL2"; - case CNTP_TVAL_EL0: return "CNTP_TVAL_EL0"; - case CNTP_CTL_EL0: return "CNTP_CTL_EL0"; - case CNTP_CVAL_EL0: return "CNTP_CVAL_EL0"; - case CNTV_TVAL_EL0: return "CNTV_TVAL_EL0"; - case CNTV_CTL_EL0: return "CNTV_CTL_EL0"; - case CNTV_CVAL_EL0: return "CNTV_CVAL_EL0"; - case CNTHP_TVAL_EL2: return "CNTHP_TVAL_EL2"; - case CNTHP_CTL_EL2: return "CNTHP_CTL_EL2"; - case CNTHP_CVAL_EL2: return "CNTHP_CVAL_EL2"; - case CNTPS_TVAL_EL1: return "CNTPS_TVAL_EL1"; - case CNTPS_CTL_EL1: return "CNTPS_CTL_EL1"; - case CNTPS_CVAL_EL1: return "CNTPS_CVAL_EL1"; - case DACR32_EL2: return "DACR32_EL2"; - case IFSR32_EL2: return "IFSR32_EL2"; - case TEEHBR32_EL1: return "TEEHBR32_EL1"; - case SDER32_EL3: return "SDER32_EL3"; - case FPEXC32_EL2: return "FPEXC32_EL2"; - case PMEVCNTR0_EL0: return "PMEVCNTR0_EL0"; - case PMEVCNTR1_EL0: return "PMEVCNTR1_EL0"; - case PMEVCNTR2_EL0: return "PMEVCNTR2_EL0"; - case PMEVCNTR3_EL0: return "PMEVCNTR3_EL0"; - case PMEVCNTR4_EL0: return "PMEVCNTR4_EL0"; - case PMEVCNTR5_EL0: return "PMEVCNTR5_EL0"; - case PMEVCNTR6_EL0: return "PMEVCNTR6_EL0"; - case PMEVCNTR7_EL0: return "PMEVCNTR7_EL0"; - case PMEVCNTR8_EL0: return "PMEVCNTR8_EL0"; - case PMEVCNTR9_EL0: return "PMEVCNTR9_EL0"; - case PMEVCNTR10_EL0: return "PMEVCNTR10_EL0"; - case PMEVCNTR11_EL0: return "PMEVCNTR11_EL0"; - case PMEVCNTR12_EL0: return "PMEVCNTR12_EL0"; - case PMEVCNTR13_EL0: return "PMEVCNTR13_EL0"; - case PMEVCNTR14_EL0: return "PMEVCNTR14_EL0"; - case PMEVCNTR15_EL0: return "PMEVCNTR15_EL0"; - case PMEVCNTR16_EL0: return "PMEVCNTR16_EL0"; - case PMEVCNTR17_EL0: return "PMEVCNTR17_EL0"; - case PMEVCNTR18_EL0: return "PMEVCNTR18_EL0"; - case PMEVCNTR19_EL0: return "PMEVCNTR19_EL0"; - case PMEVCNTR20_EL0: return "PMEVCNTR20_EL0"; - case PMEVCNTR21_EL0: return "PMEVCNTR21_EL0"; - case PMEVCNTR22_EL0: return "PMEVCNTR22_EL0"; - case PMEVCNTR23_EL0: return "PMEVCNTR23_EL0"; - case PMEVCNTR24_EL0: return "PMEVCNTR24_EL0"; - case PMEVCNTR25_EL0: return "PMEVCNTR25_EL0"; - case PMEVCNTR26_EL0: return "PMEVCNTR26_EL0"; - case PMEVCNTR27_EL0: return "PMEVCNTR27_EL0"; - case PMEVCNTR28_EL0: return "PMEVCNTR28_EL0"; - case PMEVCNTR29_EL0: return "PMEVCNTR29_EL0"; - case PMEVCNTR30_EL0: return "PMEVCNTR30_EL0"; - case PMEVTYPER0_EL0: return "PMEVTYPER0_EL0"; - case PMEVTYPER1_EL0: return "PMEVTYPER1_EL0"; - case PMEVTYPER2_EL0: return "PMEVTYPER2_EL0"; - case PMEVTYPER3_EL0: return "PMEVTYPER3_EL0"; - case PMEVTYPER4_EL0: return "PMEVTYPER4_EL0"; - case PMEVTYPER5_EL0: return "PMEVTYPER5_EL0"; - case PMEVTYPER6_EL0: return "PMEVTYPER6_EL0"; - case PMEVTYPER7_EL0: return "PMEVTYPER7_EL0"; - case PMEVTYPER8_EL0: return "PMEVTYPER8_EL0"; - case PMEVTYPER9_EL0: return "PMEVTYPER9_EL0"; - case PMEVTYPER10_EL0: return "PMEVTYPER10_EL0"; - case PMEVTYPER11_EL0: return "PMEVTYPER11_EL0"; - case PMEVTYPER12_EL0: return "PMEVTYPER12_EL0"; - case PMEVTYPER13_EL0: return "PMEVTYPER13_EL0"; - case PMEVTYPER14_EL0: return "PMEVTYPER14_EL0"; - case PMEVTYPER15_EL0: return "PMEVTYPER15_EL0"; - case PMEVTYPER16_EL0: return "PMEVTYPER16_EL0"; - case PMEVTYPER17_EL0: return "PMEVTYPER17_EL0"; - case PMEVTYPER18_EL0: return "PMEVTYPER18_EL0"; - case PMEVTYPER19_EL0: return "PMEVTYPER19_EL0"; - case PMEVTYPER20_EL0: return "PMEVTYPER20_EL0"; - case PMEVTYPER21_EL0: return "PMEVTYPER21_EL0"; - case PMEVTYPER22_EL0: return "PMEVTYPER22_EL0"; - case PMEVTYPER23_EL0: return "PMEVTYPER23_EL0"; - case PMEVTYPER24_EL0: return "PMEVTYPER24_EL0"; - case PMEVTYPER25_EL0: return "PMEVTYPER25_EL0"; - case PMEVTYPER26_EL0: return "PMEVTYPER26_EL0"; - case PMEVTYPER27_EL0: return "PMEVTYPER27_EL0"; - case PMEVTYPER28_EL0: return "PMEVTYPER28_EL0"; - case PMEVTYPER29_EL0: return "PMEVTYPER29_EL0"; - case PMEVTYPER30_EL0: return "PMEVTYPER30_EL0"; - case PMCCFILTR_EL0: return "PMCCFILTR_EL0"; - case RMR_EL3: return "RMR_EL3"; - case RMR_EL2: return "RMR_EL2"; - case RMR_EL1: return "RMR_EL1"; - case CPM_IOACC_CTL_EL3: return "CPM_IOACC_CTL_EL3"; - case MDCCSR_EL0: return "MDCCSR_EL0"; - case MDCCINT_EL1: return "MDCCINT_EL1"; - case DBGDTR_EL0: return "DBGDTR_EL0"; - case DBGDTRRX_EL0: return "DBGDTRRX_EL0"; - case DBGVCR32_EL2: return "DBGVCR32_EL2"; - case OSDTRRX_EL1: return "OSDTRRX_EL1"; - case MDSCR_EL1: return "MDSCR_EL1"; - case OSDTRTX_EL1: return "OSDTRTX_EL1"; - case OSECCR_EL11: return "OSECCR_EL11"; - case DBGBVR0_EL1: return "DBGBVR0_EL1"; - case DBGBVR1_EL1: return "DBGBVR1_EL1"; - case DBGBVR2_EL1: return "DBGBVR2_EL1"; - case DBGBVR3_EL1: return "DBGBVR3_EL1"; - case DBGBVR4_EL1: return "DBGBVR4_EL1"; - case DBGBVR5_EL1: return "DBGBVR5_EL1"; - case DBGBVR6_EL1: return "DBGBVR6_EL1"; - case DBGBVR7_EL1: return "DBGBVR7_EL1"; - case DBGBVR8_EL1: return "DBGBVR8_EL1"; - case DBGBVR9_EL1: return "DBGBVR9_EL1"; - case DBGBVR10_EL1: return "DBGBVR10_EL1"; - case DBGBVR11_EL1: return "DBGBVR11_EL1"; - case DBGBVR12_EL1: return "DBGBVR12_EL1"; - case DBGBVR13_EL1: return "DBGBVR13_EL1"; - case DBGBVR14_EL1: return "DBGBVR14_EL1"; - case DBGBVR15_EL1: return "DBGBVR15_EL1"; - case DBGBCR0_EL1: return "DBGBCR0_EL1"; - case DBGBCR1_EL1: return "DBGBCR1_EL1"; - case DBGBCR2_EL1: return "DBGBCR2_EL1"; - case DBGBCR3_EL1: return "DBGBCR3_EL1"; - case DBGBCR4_EL1: return "DBGBCR4_EL1"; - case DBGBCR5_EL1: return "DBGBCR5_EL1"; - case DBGBCR6_EL1: return "DBGBCR6_EL1"; - case DBGBCR7_EL1: return "DBGBCR7_EL1"; - case DBGBCR8_EL1: return "DBGBCR8_EL1"; - case DBGBCR9_EL1: return "DBGBCR9_EL1"; - case DBGBCR10_EL1: return "DBGBCR10_EL1"; - case DBGBCR11_EL1: return "DBGBCR11_EL1"; - case DBGBCR12_EL1: return "DBGBCR12_EL1"; - case DBGBCR13_EL1: return "DBGBCR13_EL1"; - case DBGBCR14_EL1: return "DBGBCR14_EL1"; - case DBGBCR15_EL1: return "DBGBCR15_EL1"; - case DBGWVR0_EL1: return "DBGWVR0_EL1"; - case DBGWVR1_EL1: return "DBGWVR1_EL1"; - case DBGWVR2_EL1: return "DBGWVR2_EL1"; - case DBGWVR3_EL1: return "DBGWVR3_EL1"; - case DBGWVR4_EL1: return "DBGWVR4_EL1"; - case DBGWVR5_EL1: return "DBGWVR5_EL1"; - case DBGWVR6_EL1: return "DBGWVR6_EL1"; - case DBGWVR7_EL1: return "DBGWVR7_EL1"; - case DBGWVR8_EL1: return "DBGWVR8_EL1"; - case DBGWVR9_EL1: return "DBGWVR9_EL1"; - case DBGWVR10_EL1: return "DBGWVR10_EL1"; - case DBGWVR11_EL1: return "DBGWVR11_EL1"; - case DBGWVR12_EL1: return "DBGWVR12_EL1"; - case DBGWVR13_EL1: return "DBGWVR13_EL1"; - case DBGWVR14_EL1: return "DBGWVR14_EL1"; - case DBGWVR15_EL1: return "DBGWVR15_EL1"; - case DBGWCR0_EL1: return "DBGWCR0_EL1"; - case DBGWCR1_EL1: return "DBGWCR1_EL1"; - case DBGWCR2_EL1: return "DBGWCR2_EL1"; - case DBGWCR3_EL1: return "DBGWCR3_EL1"; - case DBGWCR4_EL1: return "DBGWCR4_EL1"; - case DBGWCR5_EL1: return "DBGWCR5_EL1"; - case DBGWCR6_EL1: return "DBGWCR6_EL1"; - case DBGWCR7_EL1: return "DBGWCR7_EL1"; - case DBGWCR8_EL1: return "DBGWCR8_EL1"; - case DBGWCR9_EL1: return "DBGWCR9_EL1"; - case DBGWCR10_EL1: return "DBGWCR10_EL1"; - case DBGWCR11_EL1: return "DBGWCR11_EL1"; - case DBGWCR12_EL1: return "DBGWCR12_EL1"; - case DBGWCR13_EL1: return "DBGWCR13_EL1"; - case DBGWCR14_EL1: return "DBGWCR14_EL1"; - case DBGWCR15_EL1: return "DBGWCR15_EL1"; - case MDRAR_EL1: return "MDRAR_EL1"; - case OSLAR_EL1: return "OSLAR_EL1"; - case OSLSR_EL1: return "OSLSR_EL1"; - case OSDLR_EL1: return "OSDLR_EL1"; - case DBGPRCR_EL1: return "DBGPRCR_EL1"; - case DBGCLAIMSET_EL1: return "DBGCLAIMSET_EL1"; - case DBGCLAIMCLR_EL1: return "DBGCLAIMCLR_EL1"; - case DBGAUTHSTATUS_EL1: return "DBGAUTHSTATUS_EL1"; - case DBGDEVID2: return "DBGDEVID2"; - case DBGDEVID1: return "DBGDEVID1"; - case DBGDEVID0: return "DBGDEVID0"; - case ID_PFR0_EL1: return "ID_PFR0_EL1"; - case ID_PFR1_EL1: return "ID_PFR1_EL1"; - case ID_DFR0_EL1: return "ID_DFR0_EL1"; - case ID_AFR0_EL1: return "ID_AFR0_EL1"; - case ID_ISAR0_EL1: return "ID_ISAR0_EL1"; - case ID_ISAR1_EL1: return "ID_ISAR1_EL1"; - case ID_ISAR2_EL1: return "ID_ISAR2_EL1"; - case ID_ISAR3_EL1: return "ID_ISAR3_EL1"; - case ID_ISAR4_EL1: return "ID_ISAR4_EL1"; - case ID_ISAR5_EL1: return "ID_ISAR5_EL1"; - case AFSR1_EL1: return "AFSR1_EL1"; - case AFSR0_EL1: return "AFSR0_EL1"; - case REVIDR_EL1: return "REVIDR_EL1"; - } -} - -enum CPSRField { - InvalidCPSRField = 0xff, - cpsr_SPSel = 0x5, - cpsr_DAIFSet = 0x1e, - cpsr_DAIFClr = 0x1f -}; - -static inline const char *getCPSRFieldName(CPSRField Val) { - switch(Val) { - default: assert(0 && "Invalid system register value!"); - case cpsr_SPSel: return "SPSel"; - case cpsr_DAIFSet: return "DAIFSet"; - case cpsr_DAIFClr: return "DAIFClr"; - } -} - -} // end namespace ARM64SYS - -namespace ARM64II { - /// Target Operand Flag enum. - enum TOF { - //===------------------------------------------------------------------===// - // ARM64 Specific MachineOperand flags. - - MO_NO_FLAG, - - MO_FRAGMENT = 0x7, - - /// MO_PAGE - A symbol operand with this flag represents the pc-relative - /// offset of the 4K page containing the symbol. This is used with the - /// ADRP instruction. - MO_PAGE = 1, - - /// MO_PAGEOFF - A symbol operand with this flag represents the offset of - /// that symbol within a 4K page. This offset is added to the page address - /// to produce the complete address. - MO_PAGEOFF = 2, - - /// MO_G3 - A symbol operand with this flag (granule 3) represents the high - /// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction - MO_G3 = 3, - - /// MO_G2 - A symbol operand with this flag (granule 2) represents the bits - /// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction - MO_G2 = 4, - - /// MO_G1 - A symbol operand with this flag (granule 1) represents the bits - /// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction - MO_G1 = 5, - - /// MO_G0 - A symbol operand with this flag (granule 0) represents the bits - /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction - MO_G0 = 6, - - /// MO_GOT - This flag indicates that a symbol operand represents the - /// address of the GOT entry for the symbol, rather than the address of - /// the symbol itself. - MO_GOT = 8, - - /// MO_NC - Indicates whether the linker is expected to check the symbol - /// reference for overflow. For example in an ADRP/ADD pair of relocations - /// the ADRP usually does check, but not the ADD. - MO_NC = 0x10, - - /// MO_TLS - Indicates that the operand being accessed is some kind of - /// thread-local symbol. On Darwin, only one type of thread-local access - /// exists (pre linker-relaxation), but on ELF the TLSModel used for the - /// referee will affect interpretation. - MO_TLS = 0x20 - }; -} // end namespace ARM64II - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp deleted file mode 100644 index 1a132a1..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp +++ /dev/null @@ -1,237 +0,0 @@ -//===-- ARM64ELFObjectWriter.cpp - ARM64 ELF Writer -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file handles ELF-specific object emission, converting LLVM's internal -// fixups into the appropriate relocations. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "MCTargetDesc/ARM64MCTargetDesc.h" -#include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -namespace { -class ARM64ELFObjectWriter : public MCELFObjectTargetWriter { -public: - ARM64ELFObjectWriter(uint8_t OSABI); - - virtual ~ARM64ELFObjectWriter(); - -protected: - unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel) const override; - -private: -}; -} - -ARM64ELFObjectWriter::ARM64ELFObjectWriter(uint8_t OSABI) - : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, - /*HasRelocationAddend*/ true) {} - -ARM64ELFObjectWriter::~ARM64ELFObjectWriter() {} - -unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { - ARM64MCExpr::VariantKind RefKind = - static_cast<ARM64MCExpr::VariantKind>(Target.getRefKind()); - ARM64MCExpr::VariantKind SymLoc = ARM64MCExpr::getSymbolLoc(RefKind); - bool IsNC = ARM64MCExpr::isNotChecked(RefKind); - - assert((!Target.getSymA() || - Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) && - "Should only be expression-level modifiers here"); - - assert((!Target.getSymB() || - Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None) && - "Should only be expression-level modifiers here"); - - if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { - case FK_Data_2: - return ELF::R_AARCH64_PREL16; - case FK_Data_4: - return ELF::R_AARCH64_PREL32; - case FK_Data_8: - return ELF::R_AARCH64_PREL64; - case ARM64::fixup_arm64_pcrel_adr_imm21: - llvm_unreachable("No ELF relocations supported for ADR at the moment"); - case ARM64::fixup_arm64_pcrel_adrp_imm21: - if (SymLoc == ARM64MCExpr::VK_ABS && !IsNC) - return ELF::R_AARCH64_ADR_PREL_PG_HI21; - if (SymLoc == ARM64MCExpr::VK_GOT && !IsNC) - return ELF::R_AARCH64_ADR_GOT_PAGE; - if (SymLoc == ARM64MCExpr::VK_GOTTPREL && !IsNC) - return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; - if (SymLoc == ARM64MCExpr::VK_TLSDESC && !IsNC) - return ELF::R_AARCH64_TLSDESC_ADR_PAGE; - llvm_unreachable("invalid symbol kind for ADRP relocation"); - case ARM64::fixup_arm64_pcrel_branch26: - return ELF::R_AARCH64_JUMP26; - case ARM64::fixup_arm64_pcrel_call26: - return ELF::R_AARCH64_CALL26; - case ARM64::fixup_arm64_pcrel_imm19: - return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; - default: - llvm_unreachable("Unsupported pc-relative fixup kind"); - } - } else { - switch ((unsigned)Fixup.getKind()) { - case FK_Data_2: - return ELF::R_AARCH64_ABS16; - case FK_Data_4: - return ELF::R_AARCH64_ABS32; - case FK_Data_8: - return ELF::R_AARCH64_ABS64; - case ARM64::fixup_arm64_add_imm12: - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) - return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) - return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) - return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) - return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC) - return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_ADD_ABS_LO12_NC; - - report_fatal_error("invalid fixup for add (uimm12) instruction"); - return 0; - case ARM64::fixup_arm64_ldst_imm12_scale1: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_LDST8_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) - return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) - return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) - return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) - return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; - - report_fatal_error("invalid fixup for 8-bit load/store instruction"); - return 0; - case ARM64::fixup_arm64_ldst_imm12_scale2: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_LDST16_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) - return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) - return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) - return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) - return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; - - report_fatal_error("invalid fixup for 16-bit load/store instruction"); - return 0; - case ARM64::fixup_arm64_ldst_imm12_scale4: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_LDST32_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) - return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) - return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) - return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) - return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; - - report_fatal_error("invalid fixup for 32-bit load/store instruction"); - return 0; - case ARM64::fixup_arm64_ldst_imm12_scale8: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_LDST64_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_GOT && IsNC) - return ELF::R_AARCH64_LD64_GOT_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) - return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) - return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) - return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) - return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_GOTTPREL && IsNC) - return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC) - return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; - - report_fatal_error("invalid fixup for 64-bit load/store instruction"); - return 0; - case ARM64::fixup_arm64_ldst_imm12_scale16: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_LDST128_ABS_LO12_NC; - - report_fatal_error("invalid fixup for 128-bit load/store instruction"); - return 0; - case ARM64::fixup_arm64_movw: - if (RefKind == ARM64MCExpr::VK_ABS_G3) - return ELF::R_AARCH64_MOVW_UABS_G3; - if (RefKind == ARM64MCExpr::VK_ABS_G2) - return ELF::R_AARCH64_MOVW_UABS_G2; - if (RefKind == ARM64MCExpr::VK_ABS_G2_NC) - return ELF::R_AARCH64_MOVW_UABS_G2_NC; - if (RefKind == ARM64MCExpr::VK_ABS_G1) - return ELF::R_AARCH64_MOVW_UABS_G1; - if (RefKind == ARM64MCExpr::VK_ABS_G1_NC) - return ELF::R_AARCH64_MOVW_UABS_G1_NC; - if (RefKind == ARM64MCExpr::VK_ABS_G0) - return ELF::R_AARCH64_MOVW_UABS_G0; - if (RefKind == ARM64MCExpr::VK_ABS_G0_NC) - return ELF::R_AARCH64_MOVW_UABS_G0_NC; - if (RefKind == ARM64MCExpr::VK_DTPREL_G2) - return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; - if (RefKind == ARM64MCExpr::VK_DTPREL_G1) - return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; - if (RefKind == ARM64MCExpr::VK_DTPREL_G1_NC) - return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; - if (RefKind == ARM64MCExpr::VK_DTPREL_G0) - return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; - if (RefKind == ARM64MCExpr::VK_DTPREL_G0_NC) - return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; - if (RefKind == ARM64MCExpr::VK_TPREL_G2) - return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; - if (RefKind == ARM64MCExpr::VK_TPREL_G1) - return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; - if (RefKind == ARM64MCExpr::VK_TPREL_G1_NC) - return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; - if (RefKind == ARM64MCExpr::VK_TPREL_G0) - return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; - if (RefKind == ARM64MCExpr::VK_TPREL_G0_NC) - return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; - if (RefKind == ARM64MCExpr::VK_GOTTPREL_G1) - return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; - if (RefKind == ARM64MCExpr::VK_GOTTPREL_G0_NC) - return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; - report_fatal_error("invalid fixup for movz/movk instruction"); - return 0; - case ARM64::fixup_arm64_tlsdesc_call: - return ELF::R_AARCH64_TLSDESC_CALL; - default: - llvm_unreachable("Unknown ELF relocation type"); - } - } - - llvm_unreachable("Unimplemented fixup -> relocation"); -} - -MCObjectWriter *llvm::createARM64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI) { - MCELFObjectTargetWriter *MOTW = new ARM64ELFObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp deleted file mode 100644 index 97a3493..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp +++ /dev/null @@ -1,158 +0,0 @@ -//===- lib/MC/ARM64ELFStreamer.cpp - ELF Object Output for ARM64 ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file assembles .s files and emits AArch64 ELF .o object files. Different -// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit -// regions of data and code. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCELFStreamer.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" -#include "llvm/MC/MCELFStreamer.h" -#include "llvm/MC/MCELFSymbolFlags.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSection.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - -/// Extend the generic ELFStreamer class so that it can emit mapping symbols at -/// the appropriate points in the object files. These symbols are defined in the -/// AArch64 ELF ABI: -/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf -/// -/// In brief: $x or $d should be emitted at the start of each contiguous region -/// of A64 code or data in a section. In practice, this emission does not rely -/// on explicit assembler directives but on inherent properties of the -/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an -/// instruction). -/// -/// As a result this system is orthogonal to the DataRegion infrastructure used -/// by MachO. Beware! -class ARM64ELFStreamer : public MCELFStreamer { -public: - ARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter) - : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0), - LastEMS(EMS_None) {} - - ~ARM64ELFStreamer() {} - - virtual void ChangeSection(const MCSection *Section, - const MCExpr *Subsection) { - // We have to keep track of the mapping symbol state of any sections we - // use. Each one should start off as EMS_None, which is provided as the - // default constructor by DenseMap::lookup. - LastMappingSymbols[getPreviousSection().first] = LastEMS; - LastEMS = LastMappingSymbols.lookup(Section); - - MCELFStreamer::ChangeSection(Section, Subsection); - } - - /// This function is the one used to emit instruction data into the ELF - /// streamer. We override it to add the appropriate mapping symbol if - /// necessary. - virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) { - EmitA64MappingSymbol(); - MCELFStreamer::EmitInstruction(Inst, STI); - } - - /// This is one of the functions used to emit data into an ELF section, so the - /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d) - /// if necessary. - virtual void EmitBytes(StringRef Data) { - EmitDataMappingSymbol(); - MCELFStreamer::EmitBytes(Data); - } - - /// This is one of the functions used to emit data into an ELF section, so the - /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d) - /// if necessary. - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) { - EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size); - } - -private: - enum ElfMappingSymbol { - EMS_None, - EMS_A64, - EMS_Data - }; - - void EmitDataMappingSymbol() { - if (LastEMS == EMS_Data) - return; - EmitMappingSymbol("$d"); - LastEMS = EMS_Data; - } - - void EmitA64MappingSymbol() { - if (LastEMS == EMS_A64) - return; - EmitMappingSymbol("$x"); - LastEMS = EMS_A64; - } - - void EmitMappingSymbol(StringRef Name) { - MCSymbol *Start = getContext().CreateTempSymbol(); - EmitLabel(Start); - - MCSymbol *Symbol = getContext().GetOrCreateSymbol( - Name + "." + Twine(MappingSymbolCounter++)); - - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - MCELF::SetType(SD, ELF::STT_NOTYPE); - MCELF::SetBinding(SD, ELF::STB_LOCAL); - SD.setExternal(false); - Symbol->setSection(*getCurrentSection().first); - - const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); - Symbol->setVariableValue(Value); - } - - int64_t MappingSymbolCounter; - - DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; - ElfMappingSymbol LastEMS; - - /// @} -}; -} - -namespace llvm { -MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack) { - ARM64ELFStreamer *S = new ARM64ELFStreamer(Context, TAB, OS, Emitter); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); - if (NoExecStack) - S->getAssembler().setNoExecStack(true); - return S; -} -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h b/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h deleted file mode 100644 index 72dadbc..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h +++ /dev/null @@ -1,26 +0,0 @@ -//===-- ARM64ELFStreamer.h - ELF Streamer for ARM64 -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF streamer information for the ARM64 backend. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_ELF_STREAMER_H -#define LLVM_AARCH64_ELF_STREAMER_H - -#include "llvm/MC/MCELFStreamer.h" - -namespace llvm { - -MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack); -} - -#endif // ARM64_ELF_STREAMER_H diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h b/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h deleted file mode 100644 index 02eb91f..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h +++ /dev/null @@ -1,72 +0,0 @@ -//===-- ARM64FixupKinds.h - ARM64 Specific Fixup Entries --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ARM64FIXUPKINDS_H -#define LLVM_ARM64FIXUPKINDS_H - -#include "llvm/MC/MCFixup.h" - -namespace llvm { -namespace ARM64 { - -enum Fixups { - // fixup_arm64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into - // an ADR instruction. - fixup_arm64_pcrel_adr_imm21 = FirstTargetFixupKind, - - // fixup_arm64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into - // an ADRP instruction. - fixup_arm64_pcrel_adrp_imm21, - - // fixup_arm64_imm12 - 12-bit fixup for add/sub instructions. - // No alignment adjustment. All value bits are encoded. - fixup_arm64_add_imm12, - - // fixup_arm64_ldst_imm12_* - unsigned 12-bit fixups for load and - // store instructions. - fixup_arm64_ldst_imm12_scale1, - fixup_arm64_ldst_imm12_scale2, - fixup_arm64_ldst_imm12_scale4, - fixup_arm64_ldst_imm12_scale8, - fixup_arm64_ldst_imm12_scale16, - - // FIXME: comment - fixup_arm64_movw, - - // fixup_arm64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative - // immediate. - fixup_arm64_pcrel_branch14, - - // fixup_arm64_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative - // immediate. Same encoding as fixup_arm64_pcrel_adrhi, except this - // is not used as part of a lo/hi pair and thus generates relocations - // directly when necessary. - fixup_arm64_pcrel_imm19, - - // fixup_arm64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative - // immediate. - fixup_arm64_pcrel_branch26, - - // fixup_arm64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative - // immediate. Distinguished from branch26 only on ELF. - fixup_arm64_pcrel_call26, - - // fixup_arm64_tlsdesc_call - zero-space placeholder for the ELF - // R_AARCH64_TLSDESC_CALL relocation. - fixup_arm64_tlsdesc_call, - - // Marker - LastTargetFixupKind, - NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind -}; - -} // end namespace ARM64 -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp deleted file mode 100644 index 97e0d3c..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp +++ /dev/null @@ -1,92 +0,0 @@ -//===-- ARM64MCAsmInfo.cpp - ARM64 asm properties -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the ARM64MCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "ARM64MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/CommandLine.h" -using namespace llvm; - -enum AsmWriterVariantTy { - Default = -1, - Generic = 0, - Apple = 1 -}; - -static cl::opt<AsmWriterVariantTy> AsmWriterVariant( - "arm64-neon-syntax", cl::init(Default), - cl::desc("Choose style of NEON code to emit from ARM64 backend:"), - cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"), - clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"), - clEnumValEnd)); - -ARM64MCAsmInfoDarwin::ARM64MCAsmInfoDarwin() { - // We prefer NEON instructions to be printed in the short form. - AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant; - - PrivateGlobalPrefix = "L"; - SeparatorString = "%%"; - CommentString = ";"; - PointerSize = CalleeSaveStackSlotSize = 8; - - AlignmentIsInBytes = false; - UsesELFSectionDirectiveForBSS = true; - SupportsDebugInformation = true; - UseDataRegionDirectives = true; - - ExceptionsType = ExceptionHandling::DwarfCFI; -} - -const MCExpr *ARM64MCAsmInfoDarwin::getExprForPersonalitySymbol( - const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const { - // On Darwin, we can reference dwarf symbols with foo@GOT-., which - // is an indirect pc-relative reference. The default implementation - // won't reference using the GOT, so we need this target-specific - // version. - MCContext &Context = Streamer.getContext(); - const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context); - MCSymbol *PCSym = Context.CreateTempSymbol(); - Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context); - return MCBinaryExpr::CreateSub(Res, PC, Context); -} - -ARM64MCAsmInfoELF::ARM64MCAsmInfoELF() { - // We prefer NEON instructions to be printed in the short form. - AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant; - - PointerSize = 8; - - // ".comm align is in bytes but .align is pow-2." - AlignmentIsInBytes = false; - - CommentString = "//"; - PrivateGlobalPrefix = ".L"; - Code32Directive = ".code\t32"; - - Data16bitsDirective = "\t.hword\t"; - Data32bitsDirective = "\t.word\t"; - Data64bitsDirective = "\t.xword\t"; - - UseDataRegionDirectives = false; - - WeakRefDirective = "\t.weak\t"; - - HasLEB128 = true; - SupportsDebugInformation = true; - - // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfCFI; -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h deleted file mode 100644 index f2d33a7..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h +++ /dev/null @@ -1,36 +0,0 @@ -//=====-- ARM64MCAsmInfo.h - ARM64 asm properties -----------*- C++ -*--====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the ARM64MCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64TARGETASMINFO_H -#define ARM64TARGETASMINFO_H - -#include "llvm/MC/MCAsmInfoDarwin.h" - -namespace llvm { -class Target; -class StringRef; -class MCStreamer; -struct ARM64MCAsmInfoDarwin : public MCAsmInfoDarwin { - explicit ARM64MCAsmInfoDarwin(); - virtual const MCExpr *getExprForPersonalitySymbol(const MCSymbol *Sym, - unsigned Encoding, - MCStreamer &Streamer) const; -}; - -struct ARM64MCAsmInfoELF : public MCAsmInfo { - explicit ARM64MCAsmInfoELF(); -}; - -} // namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp deleted file mode 100644 index 19559f8..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp +++ /dev/null @@ -1,563 +0,0 @@ -//===-- ARM64/ARM64MCCodeEmitter.cpp - Convert ARM64 code to machine code -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64MCCodeEmitter class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "mccodeemitter" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); -STATISTIC(MCNumFixups, "Number of MC fixups created."); - -namespace { - -class ARM64MCCodeEmitter : public MCCodeEmitter { - MCContext &Ctx; - - ARM64MCCodeEmitter(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT -public: - ARM64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : Ctx(ctx) {} - - ~ARM64MCCodeEmitter() {} - - // getBinaryCodeForInstr - TableGen'erated function for getting the - // binary encoding for an instruction. - uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getMachineOpValue - Return binary encoding of operand. If the machine - /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getAMIndexed8OpValue - Return encoding info for base register - /// and 12-bit unsigned immediate attached to a load, store or prfm - /// instruction. If operand requires a relocation, record it and - /// return zero in that part of the encoding. - template <uint32_t FixupKind> - uint32_t getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label - /// target. - uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and - /// the 2-bit shift field. - uint32_t getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getCondBranchTargetOpValue - Return the encoded value for a conditional - /// branch target. - uint32_t getCondBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and- - /// branch target. - uint32_t getTestBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getBranchTargetOpValue - Return the encoded value for an unconditional - /// branch target. - uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getMoveWideImmOpValue - Return the encoded value for the immediate operand - /// of a MOVZ or MOVK instruction. - uint32_t getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getVecShifterOpValue - Return the encoded value for the vector shifter. - uint32_t getVecShifterOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getMoveVecShifterOpValue - Return the encoded value for the vector move - /// shifter (MSL). - uint32_t getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getFixedPointScaleOpValue - Return the encoded value for the - // FP-to-fixed-point scale factor. - uint32_t getFixedPointScaleOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - uint32_t getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - /// getSIMDShift64OpValue - Return the encoded value for the - // shift-by-immediate AdvSIMD instructions. - uint32_t getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - uint32_t getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - uint32_t getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - uint32_t getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const; - - void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; } - - void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const { - // Output the constant in little endian byte order. - for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, OS); - Val >>= 8; - } - } - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; -}; - -} // end anonymous namespace - -MCCodeEmitter *llvm::createARM64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new ARM64MCCodeEmitter(MCII, STI, Ctx); -} - -/// getMachineOpValue - Return binary encoding of operand. If the machine -/// operand requires relocation, record the relocation and return zero. -unsigned -ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - if (MO.isReg()) - return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); - else { - assert(MO.isImm() && "did not expect relocated expression"); - return static_cast<unsigned>(MO.getImm()); - } - - assert(0 && "Unable to encode MCOperand!"); - return 0; -} - -template <uint32_t FixupKind> -uint32_t -ARM64MCCodeEmitter::getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - unsigned BaseReg = MI.getOperand(OpIdx).getReg(); - BaseReg = Ctx.getRegisterInfo()->getEncodingValue(BaseReg); - - const MCOperand &MO = MI.getOperand(OpIdx + 1); - uint32_t ImmVal = 0; - - if (MO.isImm()) - ImmVal = static_cast<uint32_t>(MO.getImm()); - else { - assert(MO.isExpr() && "unable to encode load/store imm operand"); - MCFixupKind Kind = MCFixupKind(FixupKind); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); - ++MCNumFixups; - } - - return BaseReg | (ImmVal << 5); -} - -/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label -/// target. -uint32_t -ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) - return MO.getImm(); - assert(MO.isExpr() && "Unexpected ADR target type!"); - const MCExpr *Expr = MO.getExpr(); - - MCFixupKind Kind = MI.getOpcode() == ARM64::ADR - ? MCFixupKind(ARM64::fixup_arm64_pcrel_adr_imm21) - : MCFixupKind(ARM64::fixup_arm64_pcrel_adrp_imm21); - Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); - - MCNumFixups += 1; - - // All of the information is in the fixup. - return 0; -} - -/// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and -/// the 2-bit shift field. The shift field is stored in bits 13-14 of the -/// return value. -uint32_t -ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - // Suboperands are [imm, shifter]. - const MCOperand &MO = MI.getOperand(OpIdx); - const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - assert(ARM64_AM::getShiftType(MO1.getImm()) == ARM64_AM::LSL && - "unexpected shift type for add/sub immediate"); - unsigned ShiftVal = ARM64_AM::getShiftValue(MO1.getImm()); - assert((ShiftVal == 0 || ShiftVal == 12) && - "unexpected shift value for add/sub immediate"); - if (MO.isImm()) - return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << 12)); - assert(MO.isExpr() && "Unable to encode MCOperand!"); - const MCExpr *Expr = MO.getExpr(); - assert(ShiftVal == 0 && "shift not allowed on add/sub immediate with fixup"); - - // Encode the 12 bits of the fixup. - MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_add_imm12); - Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); - - ++MCNumFixups; - - return 0; -} - -/// getCondBranchTargetOpValue - Return the encoded value for a conditional -/// branch target. -uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue( - const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) - return MO.getImm(); - assert(MO.isExpr() && "Unexpected target type!"); - - MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_imm19); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); - - ++MCNumFixups; - - // All of the information is in the fixup. - return 0; -} - -uint32_t -ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - if (MO.isImm()) - return MO.getImm(); - assert(MO.isExpr() && "Unexpected movz/movk immediate"); - - Fixups.push_back(MCFixup::Create( - 0, MO.getExpr(), MCFixupKind(ARM64::fixup_arm64_movw), MI.getLoc())); - - ++MCNumFixups; - - return 0; -} - -/// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and- -/// branch target. -uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue( - const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) - return MO.getImm(); - assert(MO.isExpr() && "Unexpected ADR target type!"); - - MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch14); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); - - ++MCNumFixups; - - // All of the information is in the fixup. - return 0; -} - -/// getBranchTargetOpValue - Return the encoded value for an unconditional -/// branch target. -uint32_t -ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) - return MO.getImm(); - assert(MO.isExpr() && "Unexpected ADR target type!"); - - MCFixupKind Kind = MI.getOpcode() == ARM64::BL - ? MCFixupKind(ARM64::fixup_arm64_pcrel_call26) - : MCFixupKind(ARM64::fixup_arm64_pcrel_branch26); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); - - ++MCNumFixups; - - // All of the information is in the fixup. - return 0; -} - -/// getVecShifterOpValue - Return the encoded value for the vector shifter: -/// -/// 00 -> 0 -/// 01 -> 8 -/// 10 -> 16 -/// 11 -> 24 -uint32_t -ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the shift amount!"); - - switch (MO.getImm()) { - default: - break; - case 0: - return 0; - case 8: - return 1; - case 16: - return 2; - case 24: - return 3; - } - - assert(false && "Invalid value for vector shift amount!"); - return 0; -} - -uint32_t -ARM64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the shift amount!"); - return 64 - (MO.getImm()); -} - -uint32_t -ARM64MCCodeEmitter::getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the shift amount!"); - return 64 - (MO.getImm() | 32); -} - -uint32_t -ARM64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the shift amount!"); - return 32 - (MO.getImm() | 16); -} - -uint32_t -ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the shift amount!"); - return 16 - (MO.getImm() | 8); -} - -/// getFixedPointScaleOpValue - Return the encoded value for the -// FP-to-fixed-point scale factor. -uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue( - const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return 64 - MO.getImm(); -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return 64 - MO.getImm(); -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return 32 - MO.getImm(); -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return 16 - MO.getImm(); -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return 8 - MO.getImm(); -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return MO.getImm() - 64; -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return MO.getImm() - 32; -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return MO.getImm() - 16; -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return MO.getImm() - 8; -} - -/// getMoveVecShifterOpValue - Return the encoded value for the vector move -/// shifter (MSL). -uint32_t -ARM64MCCodeEmitter::getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && - "Expected an immediate value for the move shift amount!"); - unsigned ShiftVal = ARM64_AM::getShiftValue(MO.getImm()); - assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!"); - return ShiftVal == 8 ? 0 : 1; -} - -unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const { - // If one of the signed fixup kinds is applied to a MOVZ instruction, the - // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's - // job to ensure that any bits possibly affected by this are 0. This means we - // must zero out bit 30 (essentially emitting a MOVN). - MCOperand UImm16MO = MI.getOperand(1); - - // Nothing to do if there's no fixup. - if (UImm16MO.isImm()) - return EncodedValue; - - return EncodedValue & ~(1u << 30); -} - -void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - if (MI.getOpcode() == ARM64::TLSDESCCALL) { - // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the - // following (BLR) instruction. It doesn't emit any code itself so it - // doesn't go through the normal TableGenerated channels. - MCFixupKind Fixup = MCFixupKind(ARM64::fixup_arm64_tlsdesc_call); - Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup)); - return; - } - - uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); - EmitConstant(Binary, 4, OS); - ++MCNumEmitted; // Keep track of the # of mi's emitted. -} - -#include "ARM64GenMCCodeEmitter.inc" diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp deleted file mode 100644 index d4ab140..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp +++ /dev/null @@ -1,168 +0,0 @@ -//===-- ARM64MCExpr.cpp - ARM64 specific MC expression classes --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of the assembly expression modifiers -// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...). -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "aarch64symbolrefexpr" -#include "ARM64MCExpr.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Object/ELF.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -const ARM64MCExpr *ARM64MCExpr::Create(const MCExpr *Expr, VariantKind Kind, - MCContext &Ctx) { - return new (Ctx) ARM64MCExpr(Expr, Kind); -} - -StringRef ARM64MCExpr::getVariantKindName() const { - switch (static_cast<uint32_t>(getKind())) { - case VK_CALL: return ""; - case VK_LO12: return ":lo12:"; - case VK_ABS_G3: return ":abs_g3:"; - case VK_ABS_G2: return ":abs_g2:"; - case VK_ABS_G2_NC: return ":abs_g2_nc:"; - case VK_ABS_G1: return ":abs_g1:"; - case VK_ABS_G1_NC: return ":abs_g1_nc:"; - case VK_ABS_G0: return ":abs_g0:"; - case VK_ABS_G0_NC: return ":abs_g0_nc:"; - case VK_DTPREL_G2: return ":dtprel_g2:"; - case VK_DTPREL_G1: return ":dtprel_g1:"; - case VK_DTPREL_G1_NC: return ":dtprel_g1_nc:"; - case VK_DTPREL_G0: return ":dtprel_g0:"; - case VK_DTPREL_G0_NC: return ":dtprel_g0_nc:"; - case VK_DTPREL_LO12: return ":dtprel_lo12:"; - case VK_DTPREL_LO12_NC: return ":dtprel_lo12_nc:"; - case VK_TPREL_G2: return ":tprel_g2:"; - case VK_TPREL_G1: return ":tprel_g1:"; - case VK_TPREL_G1_NC: return ":tprel_g1_nc:"; - case VK_TPREL_G0: return ":tprel_g0:"; - case VK_TPREL_G0_NC: return ":tprel_g0_nc:"; - case VK_TPREL_LO12: return ":tprel_lo12:"; - case VK_TPREL_LO12_NC: return ":tprel_lo12_nc:"; - case VK_TLSDESC_LO12: return ":tlsdesc_lo12:"; - case VK_ABS_PAGE: return ""; - case VK_GOT_PAGE: return ":got:"; - case VK_GOT_LO12: return ":got_lo12:"; - case VK_GOTTPREL_PAGE: return ":gottprel:"; - case VK_GOTTPREL_LO12_NC: return ":gottprel_lo12:"; - case VK_GOTTPREL_G1: return ":gottprel_g1:"; - case VK_GOTTPREL_G0_NC: return ":gottprel_g0_nc:"; - case VK_TLSDESC: return ""; - case VK_TLSDESC_PAGE: return ":tlsdesc:"; - default: - llvm_unreachable("Invalid ELF symbol kind"); - } -} - -void ARM64MCExpr::PrintImpl(raw_ostream &OS) const { - if (getKind() != VK_NONE) - OS << getVariantKindName(); - OS << *Expr; -} - -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -// FIXME: really do above: now that two backends are using it. -static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - break; - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbolsImpl(BE->getLHS(), Asm); - AddValueSymbolsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); - break; - } -} - -void ARM64MCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbolsImpl(getSubExpr(), Asm); -} - -const MCSection *ARM64MCExpr::FindAssociatedSection() const { - llvm_unreachable("FIXME: what goes here?"); -} - -bool ARM64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { - if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout)) - return false; - - Res = - MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); - - return true; -} - -static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { - switch (Expr->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expression"); - break; - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr); - fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); - fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: { - // We're known to be under a TLS fixup, so any symbol should be - // modified. There should be only one. - const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr); - MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); - MCELF::SetType(SD, ELF::STT_TLS); - break; - } - - case MCExpr::Unary: - fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm); - break; - } -} - -void ARM64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { - switch (getSymbolLoc(Kind)) { - default: - return; - case VK_DTPREL: - case VK_GOTTPREL: - case VK_TPREL: - case VK_TLSDESC: - break; - } - - fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h deleted file mode 100644 index a33fe43..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h +++ /dev/null @@ -1,162 +0,0 @@ -//=---- ARM64MCExpr.h - ARM64 specific MC expression classes ------*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes ARM64-specific MCExprs, used for modifiers like -// ":lo12:" or ":gottprel_g1:". -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ARM64MCEXPR_H -#define LLVM_ARM64MCEXPR_H - -#include "llvm/MC/MCExpr.h" -#include "llvm/Support/ErrorHandling.h" - -namespace llvm { - -class ARM64MCExpr : public MCTargetExpr { -public: - enum VariantKind { - VK_NONE = 0x000, - - // Symbol locations specifying (roughly speaking) what calculation should be - // performed to construct the final address for the relocated - // symbol. E.g. direct, via the GOT, ... - VK_ABS = 0x001, - VK_SABS = 0x002, - VK_GOT = 0x003, - VK_DTPREL = 0x004, - VK_GOTTPREL = 0x005, - VK_TPREL = 0x006, - VK_TLSDESC = 0x007, - VK_SymLocBits = 0x00f, - - // Variants specifying which part of the final address calculation is - // used. E.g. the low 12 bits for an ADD/LDR, the middle 16 bits for a - // MOVZ/MOVK. - VK_PAGE = 0x010, - VK_PAGEOFF = 0x020, - VK_G0 = 0x030, - VK_G1 = 0x040, - VK_G2 = 0x050, - VK_G3 = 0x060, - VK_AddressFragBits = 0x0f0, - - // Whether the final relocation is a checked one (where a linker should - // perform a range-check on the final address) or not. Note that this field - // is unfortunately sometimes omitted from the assembly syntax. E.g. :lo12: - // on its own is a non-checked relocation. We side with ELF on being - // explicit about this! - VK_NC = 0x100, - - // Convenience definitions for referring to specific textual representations - // of relocation specifiers. Note that this means the "_NC" is sometimes - // omitted in line with assembly syntax here (VK_LO12 rather than VK_LO12_NC - // since a user would write ":lo12:"). - VK_CALL = VK_ABS, - VK_ABS_PAGE = VK_ABS | VK_PAGE, - VK_ABS_G3 = VK_ABS | VK_G3, - VK_ABS_G2 = VK_ABS | VK_G2, - VK_ABS_G2_NC = VK_ABS | VK_G2 | VK_NC, - VK_ABS_G1 = VK_ABS | VK_G1, - VK_ABS_G1_NC = VK_ABS | VK_G1 | VK_NC, - VK_ABS_G0 = VK_ABS | VK_G0, - VK_ABS_G0_NC = VK_ABS | VK_G0 | VK_NC, - VK_LO12 = VK_ABS | VK_PAGEOFF | VK_NC, - VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC, - VK_GOT_PAGE = VK_GOT | VK_PAGE, - VK_DTPREL_G2 = VK_DTPREL | VK_G2, - VK_DTPREL_G1 = VK_DTPREL | VK_G1, - VK_DTPREL_G1_NC = VK_DTPREL | VK_G1 | VK_NC, - VK_DTPREL_G0 = VK_DTPREL | VK_G0, - VK_DTPREL_G0_NC = VK_DTPREL | VK_G0 | VK_NC, - VK_DTPREL_LO12 = VK_DTPREL | VK_PAGEOFF, - VK_DTPREL_LO12_NC = VK_DTPREL | VK_PAGEOFF | VK_NC, - VK_GOTTPREL_PAGE = VK_GOTTPREL | VK_PAGE, - VK_GOTTPREL_LO12_NC = VK_GOTTPREL | VK_PAGEOFF | VK_NC, - VK_GOTTPREL_G1 = VK_GOTTPREL | VK_G1, - VK_GOTTPREL_G0_NC = VK_GOTTPREL | VK_G0 | VK_NC, - VK_TPREL_G2 = VK_TPREL | VK_G2, - VK_TPREL_G1 = VK_TPREL | VK_G1, - VK_TPREL_G1_NC = VK_TPREL | VK_G1 | VK_NC, - VK_TPREL_G0 = VK_TPREL | VK_G0, - VK_TPREL_G0_NC = VK_TPREL | VK_G0 | VK_NC, - VK_TPREL_LO12 = VK_TPREL | VK_PAGEOFF, - VK_TPREL_LO12_NC = VK_TPREL | VK_PAGEOFF | VK_NC, - VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF | VK_NC, - VK_TLSDESC_PAGE = VK_TLSDESC | VK_PAGE, - - VK_INVALID = 0xfff - }; - -private: - const MCExpr *Expr; - const VariantKind Kind; - - explicit ARM64MCExpr(const MCExpr *Expr, VariantKind Kind) - : Expr(Expr), Kind(Kind) {} - -public: - /// @name Construction - /// @{ - - static const ARM64MCExpr *Create(const MCExpr *Expr, VariantKind Kind, - MCContext &Ctx); - - /// @} - /// @name Accessors - /// @{ - - /// Get the kind of this expression. - VariantKind getKind() const { return static_cast<VariantKind>(Kind); } - - /// Get the expression this modifier applies to. - const MCExpr *getSubExpr() const { return Expr; } - - /// @} - /// @name VariantKind information extractors. - /// @{ - - static VariantKind getSymbolLoc(VariantKind Kind) { - return static_cast<VariantKind>(Kind & VK_SymLocBits); - } - - static VariantKind getAddressFrag(VariantKind Kind) { - return static_cast<VariantKind>(Kind & VK_AddressFragBits); - } - - static bool isNotChecked(VariantKind Kind) { return Kind & VK_NC; } - - /// @} - - /// Convert the variant kind into an ELF-appropriate modifier - /// (e.g. ":got:", ":lo12:"). - StringRef getVariantKindName() const; - - void PrintImpl(raw_ostream &OS) const; - - void AddValueSymbols(MCAssembler *) const; - - const MCSection *FindAssociatedSection() const; - - bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const; - - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const; - - static bool classof(const MCExpr *E) { - return E->getKind() == MCExpr::Target; - } - - static bool classof(const ARM64MCExpr *) { return true; } - -}; -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp deleted file mode 100644 index 8d54412..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp +++ /dev/null @@ -1,167 +0,0 @@ -//===-- ARM64MCTargetDesc.cpp - ARM64 Target Descriptions -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides ARM64 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "ARM64MCTargetDesc.h" -#include "ARM64ELFStreamer.h" -#include "ARM64MCAsmInfo.h" -#include "InstPrinter/ARM64InstPrinter.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_MC_DESC -#include "ARM64GenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "ARM64GenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "ARM64GenRegisterInfo.inc" - -using namespace llvm; - -static MCInstrInfo *createARM64MCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitARM64MCInstrInfo(X); - return X; -} - -static MCSubtargetInfo *createARM64MCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitARM64MCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCRegisterInfo *createARM64MCRegisterInfo(StringRef Triple) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitARM64MCRegisterInfo(X, ARM64::LR); - return X; -} - -static MCAsmInfo *createARM64MCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { - Triple TheTriple(TT); - - MCAsmInfo *MAI; - if (TheTriple.isOSDarwin()) - MAI = new ARM64MCAsmInfoDarwin(); - else { - assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF"); - MAI = new ARM64MCAsmInfoELF(); - } - - // Initial state of the frame pointer is SP. - unsigned Reg = MRI.getDwarfRegNum(ARM64::SP, true); - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0); - MAI->addInitialFrameState(Inst); - - return MAI; -} - -static MCCodeGenInfo *createARM64MCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - Triple TheTriple(TT); - assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) && - "Only expect Darwin and ELF targets"); - - if (CM == CodeModel::Default) - CM = CodeModel::Small; - // The default MCJIT memory managers make no guarantees about where they can - // find an executable page; JITed code needs to be able to refer to globals - // no matter how far away they are. - else if (CM == CodeModel::JITDefault) - CM = CodeModel::Large; - else if (CM != CodeModel::Small && CM != CodeModel::Large) - report_fatal_error("Only small and large code models are allowed on ARM64"); - - // ARM64 Darwin is always PIC. - if (TheTriple.isOSDarwin()) - RM = Reloc::PIC_; - // On ELF platforms the default static relocation model has a smart enough - // linker to cope with referencing external symbols defined in a shared - // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. - else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) - RM = Reloc::Static; - - MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM, OL); - return X; -} - -static MCInstPrinter *createARM64MCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - if (SyntaxVariant == 0) - return new ARM64InstPrinter(MAI, MII, MRI, STI); - if (SyntaxVariant == 1) - return new ARM64AppleInstPrinter(MAI, MII, MRI, STI); - - return 0; -} - -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, - /*LabelSections*/ true); - - return createARM64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); -} - -// Force static initialization. -extern "C" void LLVMInitializeARM64TargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheARM64Target, createARM64MCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target, - createARM64MCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheARM64Target, createARM64MCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheARM64Target, createARM64MCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target, - createARM64MCSubtargetInfo); - - // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheARM64Target, createARM64AsmBackend); - - // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheARM64Target, - createARM64MCCodeEmitter); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheARM64Target, - createARM64MCInstPrinter); -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h deleted file mode 100644 index 0db2b22..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h +++ /dev/null @@ -1,62 +0,0 @@ -//===-- ARM64MCTargetDesc.h - ARM64 Target Descriptions ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides ARM64 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64MCTARGETDESC_H -#define ARM64MCTARGETDESC_H - -#include "llvm/Support/DataTypes.h" -#include <string> - -namespace llvm { -class MCAsmBackend; -class MCCodeEmitter; -class MCContext; -class MCInstrInfo; -class MCRegisterInfo; -class MCObjectWriter; -class MCSubtargetInfo; -class StringRef; -class Target; -class raw_ostream; - -extern Target TheARM64Target; - -MCCodeEmitter *createARM64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); -MCAsmBackend *createARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); - -MCObjectWriter *createARM64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI); - -MCObjectWriter *createARM64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, - uint32_t CPUSubtype); - -} // End llvm namespace - -// Defines symbolic names for ARM64 registers. This defines a mapping from -// register name to register number. -// -#define GET_REGINFO_ENUM -#include "ARM64GenRegisterInfo.inc" - -// Defines symbolic names for the ARM64 instructions. -// -#define GET_INSTRINFO_ENUM -#include "ARM64GenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "ARM64GenSubtargetInfo.inc" - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp deleted file mode 100644 index 1733dc5..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp +++ /dev/null @@ -1,396 +0,0 @@ -//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "MCTargetDesc/ARM64MCTargetDesc.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCAsmLayout.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCFixup.h" -#include "llvm/MC/MCMachObjectWriter.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCValue.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" -using namespace llvm; - -namespace { -class ARM64MachObjectWriter : public MCMachObjectTargetWriter { - bool getARM64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType, - const MCSymbolRefExpr *Sym, - unsigned &Log2Size, const MCAssembler &Asm); - -public: - ARM64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype) - : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/true) {} - - void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, - const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue); -}; -} - -bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo( - const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym, - unsigned &Log2Size, const MCAssembler &Asm) { - RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED); - Log2Size = ~0U; - - switch ((unsigned)Fixup.getKind()) { - default: - return false; - - case FK_Data_1: - Log2Size = llvm::Log2_32(1); - return true; - case FK_Data_2: - Log2Size = llvm::Log2_32(2); - return true; - case FK_Data_4: - Log2Size = llvm::Log2_32(4); - if (Sym->getKind() == MCSymbolRefExpr::VK_GOT) - RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT); - return true; - case FK_Data_8: - Log2Size = llvm::Log2_32(8); - if (Sym->getKind() == MCSymbolRefExpr::VK_GOT) - RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT); - return true; - case ARM64::fixup_arm64_add_imm12: - case ARM64::fixup_arm64_ldst_imm12_scale1: - case ARM64::fixup_arm64_ldst_imm12_scale2: - case ARM64::fixup_arm64_ldst_imm12_scale4: - case ARM64::fixup_arm64_ldst_imm12_scale8: - case ARM64::fixup_arm64_ldst_imm12_scale16: - Log2Size = llvm::Log2_32(4); - switch (Sym->getKind()) { - default: - assert(0 && "Unexpected symbol reference variant kind!"); - case MCSymbolRefExpr::VK_PAGEOFF: - RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12); - return true; - case MCSymbolRefExpr::VK_GOTPAGEOFF: - RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12); - return true; - case MCSymbolRefExpr::VK_TLVPPAGEOFF: - RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12); - return true; - } - case ARM64::fixup_arm64_pcrel_adrp_imm21: - Log2Size = llvm::Log2_32(4); - // This encompasses the relocation for the whole 21-bit value. - switch (Sym->getKind()) { - default: - Asm.getContext().FatalError(Fixup.getLoc(), - "ADR/ADRP relocations must be GOT relative"); - case MCSymbolRefExpr::VK_PAGE: - RelocType = unsigned(MachO::ARM64_RELOC_PAGE21); - return true; - case MCSymbolRefExpr::VK_GOTPAGE: - RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGE21); - return true; - case MCSymbolRefExpr::VK_TLVPPAGE: - RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGE21); - return true; - } - return true; - case ARM64::fixup_arm64_pcrel_branch26: - case ARM64::fixup_arm64_pcrel_call26: - Log2Size = llvm::Log2_32(4); - RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26); - return true; - } -} - -void ARM64MachObjectWriter::RecordRelocation( - MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { - unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - - // See <reloc.h>. - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment); - unsigned Log2Size = 0; - int64_t Value = 0; - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - unsigned Kind = Fixup.getKind(); - - FixupOffset += Fixup.getOffset(); - - // ARM64 pcrel relocation addends do not include the section offset. - if (IsPCRel) - FixedValue += FixupOffset; - - // ADRP fixups use relocations for the whole symbol value and only - // put the addend in the instruction itself. Clear out any value the - // generic code figured out from the sybmol definition. - if (Kind == ARM64::fixup_arm64_pcrel_adrp_imm21 || - Kind == ARM64::fixup_arm64_pcrel_imm19) - FixedValue = 0; - - // imm19 relocations are for conditional branches, which require - // assembler local symbols. If we got here, that's not what we have, - // so complain loudly. - if (Kind == ARM64::fixup_arm64_pcrel_imm19) { - Asm.getContext().FatalError(Fixup.getLoc(), - "conditional branch requires assembler-local" - " label. '" + - Target.getSymA()->getSymbol().getName() + - "' is external."); - return; - } - - // 14-bit branch relocations should only target internal labels, and so - // should never get here. - if (Kind == ARM64::fixup_arm64_pcrel_branch14) { - Asm.getContext().FatalError(Fixup.getLoc(), - "Invalid relocation on conditional branch!"); - return; - } - - if (!getARM64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size, - Asm)) { - Asm.getContext().FatalError(Fixup.getLoc(), "unknown ARM64 fixup kind!"); - return; - } - - Value = Target.getConstant(); - - if (Target.isAbsolute()) { // constant - // FIXME: Should this always be extern? - // SymbolNum of 0 indicates the absolute section. - Type = MachO::ARM64_RELOC_UNSIGNED; - Index = 0; - - if (IsPCRel) { - IsExtern = 1; - Asm.getContext().FatalError(Fixup.getLoc(), - "PC relative absolute relocation!"); - - // FIXME: x86_64 sets the type to a branch reloc here. Should we do - // something similar? - } - } else if (Target.getSymB()) { // A - B + constant - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData &A_SD = Asm.getSymbolData(*A); - const MCSymbolData *A_Base = Asm.getAtom(&A_SD); - - const MCSymbol *B = &Target.getSymB()->getSymbol(); - MCSymbolData &B_SD = Asm.getSymbolData(*B); - const MCSymbolData *B_Base = Asm.getAtom(&B_SD); - - // Check for "_foo@got - .", which comes through here as: - // Ltmp0: - // ... _foo@got - Ltmp0 - if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOT && - Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None && - Layout.getSymbolOffset(&B_SD) == - Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) { - // SymB is the PC, so use a PC-rel pointer-to-GOT relocation. - Index = A_Base->getIndex(); - IsExtern = 1; - Type = MachO::ARM64_RELOC_POINTER_TO_GOT; - IsPCRel = 1; - MachO::any_relocation_info MRE; - MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); - return; - } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) - // Otherwise, neither symbol can be modified. - Asm.getContext().FatalError(Fixup.getLoc(), - "unsupported relocation of modified symbol"); - - // We don't support PCrel relocations of differences. - if (IsPCRel) - Asm.getContext().FatalError(Fixup.getLoc(), - "unsupported pc-relative relocation of " - "difference"); - - // ARM64 always uses external relocations. If there is no symbol to use as - // a base address (a local symbol with no preceeding non-local symbol), - // error out. - // - // FIXME: We should probably just synthesize an external symbol and use - // that. - if (!A_Base) - Asm.getContext().FatalError( - Fixup.getLoc(), - "unsupported relocation of local symbol '" + A->getName() + - "'. Must have non-local symbol earlier in section."); - if (!B_Base) - Asm.getContext().FatalError( - Fixup.getLoc(), - "unsupported relocation of local symbol '" + B->getName() + - "'. Must have non-local symbol earlier in section."); - - if (A_Base == B_Base && A_Base) - Asm.getContext().FatalError(Fixup.getLoc(), - "unsupported relocation with identical base"); - - Value += (A_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress( - &A_SD, Layout)) - - (A_Base == NULL || A_Base->getFragment() == NULL - ? 0 - : Writer->getSymbolAddress(A_Base, Layout)); - Value -= (B_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress( - &B_SD, Layout)) - - (B_Base == NULL || B_Base->getFragment() == NULL - ? 0 - : Writer->getSymbolAddress(B_Base, Layout)); - - Index = A_Base->getIndex(); - IsExtern = 1; - Type = MachO::ARM64_RELOC_UNSIGNED; - - MachO::any_relocation_info MRE; - MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); - - Index = B_Base->getIndex(); - IsExtern = 1; - Type = MachO::ARM64_RELOC_SUBTRACTOR; - } else { // A + constant - const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - MCSymbolData &SD = Asm.getSymbolData(*Symbol); - const MCSymbolData *Base = Asm.getAtom(&SD); - const MCSectionMachO &Section = static_cast<const MCSectionMachO &>( - Fragment->getParent()->getSection()); - - // If the symbol is a variable and we weren't able to get a Base for it - // (i.e., it's not in the symbol table associated with a section) resolve - // the relocation based its expansion instead. - if (Symbol->isVariable() && !Base) { - // If the evaluation is an absolute value, just use that directly - // to keep things easy. - int64_t Res; - if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, Writer->getSectionAddressMap())) { - FixedValue = Res; - return; - } - - // FIXME: Will the Target we already have ever have any data in it - // we need to preserve and merge with the new Target? How about - // the FixedValue? - if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout)) - Asm.getContext().FatalError(Fixup.getLoc(), - "unable to resolve variable '" + - Symbol->getName() + "'"); - return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, - FixedValue); - } - - // Relocations inside debug sections always use local relocations when - // possible. This seems to be done because the debugger doesn't fully - // understand relocation entries and expects to find values that - // have already been fixed up. - if (Symbol->isInSection()) { - if (Section.hasAttribute(MachO::S_ATTR_DEBUG)) - Base = 0; - } - - // ARM64 uses external relocations as much as possible. For debug sections, - // and for pointer-sized relocations (.quad), we allow section relocations. - // It's code sections that run into trouble. - if (Base) { - Index = Base->getIndex(); - IsExtern = 1; - - // Add the local offset, if needed. - if (Base != &SD) - Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); - } else if (Symbol->isInSection()) { - // Pointer-sized relocations can use a local relocation. Otherwise, - // we have to be in a debug info section. - if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3) - Asm.getContext().FatalError( - Fixup.getLoc(), - "unsupported relocation of local symbol '" + Symbol->getName() + - "'. Must have non-local symbol earlier in section."); - // Adjust the relocation to be section-relative. - // The index is the section ordinal (1-based). - const MCSectionData &SymSD = - Asm.getSectionData(SD.getSymbol().getSection()); - Index = SymSD.getOrdinal() + 1; - IsExtern = 0; - Value += Writer->getSymbolAddress(&SD, Layout); - - if (IsPCRel) - Value -= Writer->getFragmentAddress(Fragment, Layout) + - Fixup.getOffset() + (1ULL << Log2Size); - } else { - // Resolve constant variables. - if (SD.getSymbol().isVariable()) { - int64_t Res; - if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, Writer->getSectionAddressMap())) { - FixedValue = Res; - return; - } - } - Asm.getContext().FatalError(Fixup.getLoc(), - "unsupported relocation of variable '" + - Symbol->getName() + "'"); - } - } - - // If the relocation kind is Branch26, Page21, or Pageoff12, any addend - // is represented via an Addend relocation, not encoded directly into - // the instruction. - if ((Type == MachO::ARM64_RELOC_BRANCH26 || - Type == MachO::ARM64_RELOC_PAGE21 || - Type == MachO::ARM64_RELOC_PAGEOFF12) && - Value) { - assert((Value & 0xff000000) == 0 && "Added relocation out of range!"); - - MachO::any_relocation_info MRE; - MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); - - // Now set up the Addend relocation. - Type = MachO::ARM64_RELOC_ADDEND; - Index = Value; - IsPCRel = 0; - Log2Size = 2; - IsExtern = 0; - - // Put zero into the instruction itself. The addend is in the relocation. - Value = 0; - } - - // If there's any addend left to handle, encode it in the instruction. - FixedValue = Value; - - // struct relocation_info (8 bytes) - MachO::any_relocation_info MRE; - MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); -} - -MCObjectWriter *llvm::createARM64MachObjectWriter(raw_ostream &OS, - uint32_t CPUType, - uint32_t CPUSubtype) { - return createMachObjectWriter(new ARM64MachObjectWriter(CPUType, CPUSubtype), - OS, /*IsLittleEndian=*/true); -} diff --git a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index f8665bc..0000000 --- a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -add_llvm_library(LLVMARM64Desc - ARM64AsmBackend.cpp - ARM64ELFObjectWriter.cpp - ARM64ELFStreamer.cpp - ARM64MCAsmInfo.cpp - ARM64MCCodeEmitter.cpp - ARM64MCExpr.cpp - ARM64MCTargetDesc.cpp - ARM64MachObjectWriter.cpp -) -add_dependencies(LLVMARM64Desc ARM64CommonTableGen) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt deleted file mode 100644 index e4c74d2..0000000 --- a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ARM64Desc -parent = ARM64 -required_libraries = ARM64AsmPrinter ARM64Info MC Support -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/MCTargetDesc/Makefile b/lib/Target/ARM64/MCTargetDesc/Makefile deleted file mode 100644 index 013cc63..0000000 --- a/lib/Target/ARM64/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/ARM64/TargetDesc/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64Desc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/Makefile b/lib/Target/ARM64/Makefile deleted file mode 100644 index 5f0f307..0000000 --- a/lib/Target/ARM64/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -##===- lib/Target/ARM64/Makefile ---------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMARM64CodeGen -TARGET = ARM64 - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = ARM64GenRegisterInfo.inc ARM64GenInstrInfo.inc \ - ARM64GenAsmWriter.inc ARM64GenAsmWriter1.inc \ - ARM64GenDAGISel.inc \ - ARM64GenCallingConv.inc ARM64GenAsmMatcher.inc \ - ARM64GenSubtargetInfo.inc ARM64GenMCCodeEmitter.inc \ - ARM64GenFastISel.inc ARM64GenDisassemblerTables.inc \ - ARM64GenMCPseudoLowering.inc - -DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp b/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp deleted file mode 100644 index dec09ed..0000000 --- a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- ARM64TargetInfo.cpp - ARM64 Target Implementation -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/Triple.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -namespace llvm { -Target TheARM64Target; -} // end namespace llvm - -extern "C" void LLVMInitializeARM64TargetInfo() { - RegisterTarget<Triple::arm64, /*HasJIT=*/true> X(TheARM64Target, "arm64", - "ARM64"); -} diff --git a/lib/Target/ARM64/TargetInfo/CMakeLists.txt b/lib/Target/ARM64/TargetInfo/CMakeLists.txt deleted file mode 100644 index a0142c4..0000000 --- a/lib/Target/ARM64/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARM64Info - ARM64TargetInfo.cpp - ) - -add_dependencies(LLVMARM64Info ARM64CommonTableGen) diff --git a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt b/lib/Target/ARM64/TargetInfo/LLVMBuild.txt deleted file mode 100644 index 5bea694..0000000 --- a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/ARM64/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ARM64Info -parent = ARM64 -required_libraries = MC Support -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/TargetInfo/Makefile b/lib/Target/ARM64/TargetInfo/Makefile deleted file mode 100644 index 2d5a1a0..0000000 --- a/lib/Target/ARM64/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM64/TargetInfo/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64Info - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common |