diff options
author | Tim Northover <Tim.Northover@arm.com> | 2013-01-31 12:12:40 +0000 |
---|---|---|
committer | Tim Northover <Tim.Northover@arm.com> | 2013-01-31 12:12:40 +0000 |
commit | 72062f5744557e270a38192554c3126ea5f97434 (patch) | |
tree | ae6e4c8abb4e0572745e7849b4948c58fea3e8d0 /lib | |
parent | d72b4d321e317327330e1e82d0f652d4e237c171 (diff) | |
download | external_llvm-72062f5744557e270a38192554c3126ea5f97434.zip external_llvm-72062f5744557e270a38192554c3126ea5f97434.tar.gz external_llvm-72062f5744557e270a38192554c3126ea5f97434.tar.bz2 |
Add AArch64 as an experimental target.
This patch adds support for AArch64 (ARM's 64-bit architecture) to
LLVM in the "experimental" category. Currently, it won't be built
unless requested explicitly.
This initial commit should have support for:
+ Assembly of all scalar (i.e. non-NEON, non-Crypto) instructions
(except the late addition CRC instructions).
+ CodeGen features required for C++03 and C99.
+ Compilation for the "small" memory model: code+static data <
4GB.
+ Absolute and position-independent code.
+ GNU-style (i.e. "__thread") TLS.
+ Debugging information.
The principal omission, currently, is performance tuning.
This patch excludes the NEON support also reviewed due to an outbreak of
batshit insanity in our legal department. That will be committed soon bringing
the changes to precisely what has been approved.
Further reviews would be gratefully received.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174054 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
72 files changed, 22640 insertions, 2 deletions
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index e5b749e..c4c8e6e 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -300,7 +300,9 @@ void MCELFStreamer::EmitFileDirective(StringRef Filename) { void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { switch (expr->getKind()) { - case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!"); + case MCExpr::Target: + cast<MCTargetExpr>(expr)->fixELFSymbolsInTLSFixups(getAssembler()); + break; case MCExpr::Constant: break; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 1f5548f..ae0abde 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -256,6 +256,25 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { TTypeEncoding = (CMModel == CodeModel::Small) ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; } + } else if (T.getArch() == Triple::aarch64) { + FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + + // The small model guarantees static code/data size < 4GB, but not where it + // will be in memory. Most of these could end up >2GB away so even a signed + // pc-relative 32-bit address is insufficient, theoretically. + if (RelocM == Reloc::PIC_) { + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata8; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8; + FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata8; + } else { + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + LSDAEncoding = dwarf::DW_EH_PE_absptr; + FDEEncoding = dwarf::DW_EH_PE_udata4; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + } } else if (T.getArch() == Triple::ppc64) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index ad7b189..d2508ac 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -19,6 +19,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { switch (Kind) { case UnknownArch: return "unknown"; + case aarch64: return "aarch64"; case arm: return "arm"; case hexagon: return "hexagon"; case mips: return "mips"; @@ -53,6 +54,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { default: return 0; + case aarch64: return "aarch64"; + case arm: case thumb: return "arm"; @@ -152,6 +155,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return StringSwitch<Triple::ArchType>(Name) + .Case("aarch64", aarch64) .Case("arm", arm) .Case("mips", mips) .Case("mipsel", mipsel) @@ -215,6 +219,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("powerpc", Triple::ppc) .Cases("powerpc64", "ppu", Triple::ppc64) .Case("mblaze", Triple::mblaze) + .Case("aarch64", Triple::aarch64) .Cases("arm", "xscale", Triple::arm) // FIXME: It would be good to replace these with explicit names for all the // various suffixes supported. @@ -676,6 +681,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::spir: return 32; + case llvm::Triple::aarch64: case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::nvptx64: @@ -704,6 +710,7 @@ Triple Triple::get32BitArchVariant() const { Triple T(*this); switch (getArch()) { case Triple::UnknownArch: + case Triple::aarch64: case Triple::msp430: T.setArch(UnknownArch); break; @@ -755,6 +762,7 @@ Triple Triple::get64BitArchVariant() const { T.setArch(UnknownArch); break; + case Triple::aarch64: case Triple::spir64: case Triple::mips64: case Triple::mips64el: diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h new file mode 100644 index 0000000..622814d --- /dev/null +++ b/lib/Target/AArch64/AArch64.h @@ -0,0 +1,42 @@ +//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// AArch64 back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_H +#define LLVM_TARGET_AARCH64_H + +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class AArch64AsmPrinter; +class FunctionPass; +class AArch64TargetMachine; +class MachineInstr; +class MCInst; + +FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel); + +FunctionPass *createAArch64ConstantIslandPass(); + +FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); + +void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AArch64AsmPrinter &AP); + + +} + +#endif diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td new file mode 100644 index 0000000..750fec7 --- /dev/null +++ b/lib/Target/AArch64/AArch64.td @@ -0,0 +1,68 @@ +//===- AArch64.td - Describe the AArch64 Target Machine ---------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the AArch64 target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// AArch64 Subtarget features. +// + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable Advanced SIMD instructions">; + +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable cryptographic instructions">; + +//===----------------------------------------------------------------------===// +// AArch64 Processors +// + +include "AArch64Schedule.td" + +def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "AArch64RegisterInfo.td" + +include "AArch64CallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "AArch64InstrInfo.td" + +def AArch64InstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// + +def A64InstPrinter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def AArch64 : Target { + let InstructionSet = AArch64InstrInfo; + let AssemblyWriters = [A64InstPrinter]; +} diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp new file mode 100644 index 0000000..63cc88f --- /dev/null +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -0,0 +1,361 @@ +//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format AArch64 assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "AArch64AsmPrinter.h" +#include "InstPrinter/AArch64InstPrinter.h" +#include "llvm/DebugInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +MachineLocation +AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + // See emitFrameIndexDebugValue in InstrInfo for where this instruction is + // expected to be created. + assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg() + && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE"); + return MachineLocation(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); +} + +/// Try to print a floating-point register as if it belonged to a specified +/// register-class. For example the inline asm operand modifier "b" requires its +/// argument to be printed as "bN". +static bool printModifiedFPRAsmOperand(const MachineOperand &MO, + const TargetRegisterInfo *TRI, + const TargetRegisterClass &RegClass, + raw_ostream &O) { + if (!MO.isReg()) + return true; + + for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { + if (RegClass.contains(*AR)) { + O << AArch64InstPrinter::getRegisterName(*AR); + return false; + } + } + return true; +} + +/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR +/// with the obvious type and an immediate 0 as either wzr or xzr. +static bool printModifiedGPRAsmOperand(const MachineOperand &MO, + const TargetRegisterInfo *TRI, + const TargetRegisterClass &RegClass, + raw_ostream &O) { + char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x'; + + if (MO.isImm() && MO.getImm() == 0) { + O << Prefix << "zr"; + return false; + } else if (MO.isReg()) { + if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) { + O << (Prefix == 'x' ? "sp" : "wsp"); + return false; + } + + for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { + if (RegClass.contains(*AR)) { + O << AArch64InstPrinter::getRegisterName(*AR); + return false; + } + } + } + + return true; +} + +bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO, + bool PrintImmediatePrefix, + StringRef Suffix, raw_ostream &O) { + StringRef Name; + StringRef Modifier; + switch (MO.getType()) { + default: llvm_unreachable("Unexpected operand for symbolic address constraint"); + case MachineOperand::MO_GlobalAddress: + Name = Mang->getSymbol(MO.getGlobal())->getName(); + + // Global variables may be accessed either via a GOT or in various fun and + // interesting TLS-model specific ways. Set the prefix modifier as + // appropriate here. + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) { + Reloc::Model RelocM = TM.getRelocationModel(); + if (GV->isThreadLocal()) { + switch (TM.getTLSModel(GV)) { + case TLSModel::GeneralDynamic: + Modifier = "tlsdesc"; + break; + case TLSModel::LocalDynamic: + Modifier = "dtprel"; + break; + case TLSModel::InitialExec: + Modifier = "gottprel"; + break; + case TLSModel::LocalExec: + Modifier = "tprel"; + break; + } + } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + Modifier = "got"; + } + } + break; + case MachineOperand::MO_BlockAddress: + Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName(); + break; + case MachineOperand::MO_ExternalSymbol: + Name = MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + Name = GetCPISymbol(MO.getIndex())->getName(); + break; + } + + // Some instructions (notably ADRP) don't take the # prefix for + // immediates. Only print it if asked to. + if (PrintImmediatePrefix) + O << '#'; + + // Only need the joining "_" if both the prefix and the suffix are + // non-null. This little block simply takes care of the four possibly + // combinations involved there. + if (Modifier == "" && Suffix == "") + O << Name; + else if (Modifier == "" && Suffix != "") + O << ":" << Suffix << ':' << Name; + else if (Modifier != "" && Suffix == "") + O << ":" << Modifier << ':' << Name; + else + O << ":" << Modifier << '_' << Suffix << ':' << Name; + + return false; +} + +bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (!ExtraCode || !ExtraCode[0]) { + // There's actually no operand modifier, which leads to a slightly eclectic + // set of behaviour which we have to handle here. + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + default: + llvm_unreachable("Unexpected operand for inline assembly"); + case MachineOperand::MO_Register: + // GCC prints the unmodified operand of a 'w' constraint as the vector + // register. Technically, we could allocate the argument as a VPR128, but + // that leads to extremely dodgy copies being generated to get the data + // there. + if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O)) + O << AArch64InstPrinter::getRegisterName(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + O << '#' << MO.getImm(); + break; + case MachineOperand::MO_FPImmediate: + assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected"); + O << "#0.0"; + break; + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + return printSymbolicAddress(MO, false, "", O); + } + return false; + } + + // We have a real modifier to handle. + switch(ExtraCode[0]) { + default: + // See if this is a generic operand + return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O); + case 'c': // Don't print "#" before an immediate operand. + if (!MI->getOperand(OpNum).isImm()) + return true; + O << MI->getOperand(OpNum).getImm(); + return false; + case 'w': + // Output 32-bit general register operand, constant zero as wzr, or stack + // pointer as wsp. Ignored when used with other operand types. + return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::GPR32RegClass, O); + case 'x': + // Output 64-bit general register operand, constant zero as xzr, or stack + // pointer as sp. Ignored when used with other operand types. + return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::GPR64RegClass, O); + case 'H': + // Output higher numbered of a 64-bit general register pair + case 'Q': + // Output least significant register of a 64-bit general register pair + case 'R': + // Output most significant register of a 64-bit general register pair + + // FIXME note: these three operand modifiers will require, to some extent, + // adding a paired GPR64 register class. Initial investigation suggests that + // assertions are hit unless it has a type and is made legal for that type + // in ISelLowering. After that step is made, the number of modifications + // needed explodes (operation legality, calling conventions, stores, reg + // copies ...). + llvm_unreachable("FIXME: Unimplemented register pairs"); + case 'b': + // Output 8-bit FP/SIMD scalar register operand, prefixed with b. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR8RegClass, O); + case 'h': + // Output 16-bit FP/SIMD scalar register operand, prefixed with h. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR16RegClass, O); + case 's': + // Output 32-bit FP/SIMD scalar register operand, prefixed with s. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR32RegClass, O); + case 'd': + // Output 64-bit FP/SIMD scalar register operand, prefixed with d. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR64RegClass, O); + case 'q': + // Output 128-bit FP/SIMD scalar register operand, prefixed with q. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR128RegClass, O); + case 'A': + // Output symbolic address with appropriate relocation modifier (also + // suitable for ADRP). + return printSymbolicAddress(MI->getOperand(OpNum), false, "", O); + case 'L': + // Output bits 11:0 of symbolic address with appropriate :lo12: relocation + // modifier. + return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O); + case 'G': + // Output bits 23:12 of symbolic address with appropriate :hi12: relocation + // modifier (currently only for TLS local exec). + return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O); + } + + +} + +bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + // Currently both the memory constraints (m and Q) behave the same and amount + // to the address as a single register. In future, we may allow "m" to provide + // both a base and an offset. + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isReg() && "unexpected inline assembly memory operand"); + O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']'; + return false; +} + +void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg()); + OS << '+' << MI->getOperand(1).getImm(); + OS << ']'; + OS << "+" << MI->getOperand(NOps - 2).getImm(); +} + + +#include "AArch64GenMCPseudoLowering.inc" + +void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(OutStreamer, MI)) + return; + + switch (MI->getOpcode()) { + case AArch64::CONSTPOOL_ENTRY: { + unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); + unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); + + OutStreamer.EmitLabel(GetCPISymbol(LabelId)); + + const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx]; + if (MCPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); + else + EmitGlobalConstant(MCPE.Val.ConstVal); + + return; + } + case AArch64::DBG_VALUE: { + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + SmallString<128> TmpStr; + raw_svector_ostream OS(TmpStr); + PrintDebugValueComment(MI, OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + return; + } + } + + MCInst TmpInst; + LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this); + OutStreamer.EmitInstruction(TmpInst); +} + +void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { + if (Subtarget->isTargetELF()) { + const TargetLoweringObjectFileELF &TLOFELF = + static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const DataLayout *TD = TM.getDataLayout(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + OutStreamer.EmitLabel(Stubs[i].first); + OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), + TD->getPointerSize(0), 0); + } + Stubs.clear(); + } + } +} + +bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + MCP = MF.getConstantPool(); + return AsmPrinter::runOnMachineFunction(MF); +} + +// Force static initialization. +extern "C" void LLVMInitializeAArch64AsmPrinter() { + RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64Target); +} + diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h new file mode 100644 index 0000000..492be66 --- /dev/null +++ b/lib/Target/AArch64/AArch64AsmPrinter.h @@ -0,0 +1,85 @@ +// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64ASMPRINTER_H +#define LLVM_AARCH64ASMPRINTER_H + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MCOperand; + +class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { + + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when printing asm code for different targets. + const AArch64Subtarget *Subtarget; + const MachineConstantPool *MCP; + + // emitPseudoExpansionLowering - tblgen'erated. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + + public: + explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<AArch64Subtarget>(); + } + + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; + + MCOperand lowerSymbolOperand(const MachineOperand &MO, + const MCSymbol *Sym) const; + + void EmitInstruction(const MachineInstr *MI); + void EmitEndOfAsmFile(Module &M); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + + /// printSymbolicAddress - Given some kind of reasonably bare symbolic + /// reference, print out the appropriate asm string to represent it. If + /// appropriate, a relocation-specifier will be produced, composed of a + /// general class derived from the MO parameter and an instruction-specific + /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is + /// given. + bool printSymbolicAddress(const MachineOperand &MO, + bool PrintImmediatePrefix, + StringRef Suffix, raw_ostream &O); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + + virtual const char *getPassName() const { + return "AArch64 Assembly Printer"; + } + + /// A no-op on AArch64 because we emit our constant pool entries inline with + /// the function. + virtual void EmitConstantPool() {} + + virtual bool runOnMachineFunction(MachineFunction &MF); +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td new file mode 100644 index 0000000..b880d83 --- /dev/null +++ b/lib/Target/AArch64/AArch64CallingConv.td @@ -0,0 +1,196 @@ +//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for AArch64 architecture. +//===----------------------------------------------------------------------===// + + +// The AArch64 Procedure Call Standard is unfortunately specified at a slightly +// higher level of abstraction than LLVM's target interface presents. In +// particular, it refers (like other ABIs, in fact) directly to +// structs. However, generic LLVM code takes the liberty of lowering structure +// arguments to the component fields before we see them. +// +// As a result, the obvious direct map from LLVM IR to PCS concepts can't be +// implemented, so the goals of this calling convention are, in decreasing +// priority order: +// 1. Expose *some* way to express the concepts required to implement the +// generic PCS from a front-end. +// 2. Provide a sane ABI for pure LLVM. +// 3. Follow the generic PCS as closely as is naturally possible. +// +// The suggested front-end implementation of PCS features is: +// * Integer, float and vector arguments of all sizes which end up in +// registers are passed and returned via the natural LLVM type. +// * Structure arguments with size <= 16 bytes are passed and returned in +// registers as similar integer or composite types. For example: +// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed). +// * HFAs in registers follow rules similar to small structs: appropriate +// composite types. +// * Structure arguments with size > 16 bytes are passed via a pointer, +// handled completely by the front-end. +// * Structure return values > 16 bytes via an sret pointer argument. +// * Other stack-based arguments (not large structs) are passed using byval +// pointers. Padding arguments are added beforehand to guarantee a large +// struct doesn't later use integer registers. +// +// N.b. this means that it is the front-end's responsibility (if it cares about +// PCS compliance) to check whether enough registers are available for an +// argument when deciding how to pass it. + +class CCIfAlign<int Align, CCAction A>: + CCIf<"ArgFlags.getOrigAlign() == " # Align, A>; + +def CC_A64_APCS : CallingConv<[ + // SRet is an LLVM-specific concept, so it takes precedence over general ABI + // concerns. However, this rule will be used by C/C++ frontends to implement + // structure return. + CCIfSRet<CCAssignToReg<[X8]>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal<CCPassByVal<8, 8>>, + + // Canonicalise the various types that live in different floating-point + // registers. This makes sense because the PCS does not distinguish Short + // Vectors and Floating-point types. + CCIfType<[v2i8], CCBitConvertToType<f16>>, + CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>, + CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCBitConvertToType<f128>>, + + // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision + // Floating-point or Short Vector Type and the NSRN is less than 8, then the + // argument is allocated to the least significant bits of register + // v[NSRN]. The NSRN is incremented by one. The argument has now been + // allocated." + CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated + // SIMD and Floating-point registers (NSRN - number of elements < 8), then the + // argument is allocated to SIMD and Floating-point registers (with one + // register per element of the HFA). The NSRN is incremented by the number of + // registers used. The argument has now been allocated." + // + // N.b. As above, this rule is the responsibility of the front-end. + + // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of + // the argument is rounded up to the nearest multiple of 8 bytes." + // + // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short + // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural + // Alignment of the Argument's type." + // + // It is expected that these will be satisfied by adding dummy arguments to + // the prototype. + + // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point + // type then the size of the argument is set to 8 bytes. The effect is as if + // the argument had been copied to the least significant bits of a 64-bit + // register and the remaining bits filled with unspecified values." + CCIfType<[f16, f32], CCPromoteToType<f64>>, + + // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad- + // precision Floating-point or Short Vector Type, then the argument is copied + // to memory at the adjusted NSAA. The NSAA is incremented by the size of the + // argument. The argument has now been allocated." + CCIfType<[f64], CCAssignToStack<8, 8>>, + CCIfType<[f128], CCAssignToStack<16, 16>>, + + // PCS: "C.7: If the argument is an Integral Type, the size of the argument is + // less than or equal to 8 bytes and the NGRN is less than 8, the argument is + // copied to the least significant bits of x[NGRN]. The NGRN is incremented by + // one. The argument has now been allocated." + + // First we implement C.8 and C.9 (128-bit types get even registers). i128 is + // represented as two i64s, the first one being split. If we delayed this + // operation C.8 would never be reached. + CCIfType<[i64], + CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>, + + // Note: the promotion also implements C.14. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // And now the real implementation of C.7 + CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, + + // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded + // up to the next even number." + // + // "C.9: If the argument is an Integral Type, the size of the argument is + // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN] + // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the + // memory representation of the argument. The NGRN is incremented by two. The + // argument has now been allocated." + // + // Subtlety here: what if alignment is 16 but it is not an integral type? All + // floating-point types have been allocated already, which leaves composite + // types: this is why a front-end may need to produce i128 for a struct <= 16 + // bytes. + + // PCS: "C.10 If the argument is a Composite Type and the size in double-words + // of the argument is not more than 8 minus NGRN, then the argument is copied + // into consecutive general-purpose registers, starting at x[NGRN]. The + // argument is passed as though it had been loaded into the registers from a + // double-word aligned address with an appropriate sequence of LDR + // instructions loading consecutive registers from memory (the contents of any + // unused parts of the registers are unspecified by this standard). The NGRN + // is incremented by the number of registers used. The argument has now been + // allocated." + // + // Another one that's the responsibility of the front-end (sigh). + + // PCS: "C.11: The NGRN is set to 8." + CCCustom<"CC_AArch64NoMoreRegs">, + + // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural + // Alignment of the argument's type." + // + // PCS: "C.13: If the argument is a composite type then the argument is copied + // to memory at the adjusted NSAA. The NSAA is by the size of the + // argument. The argument has now been allocated." + // + // Note that the effect of this corresponds to a memcpy rather than register + // stores so that the struct ends up correctly addressable at the adjusted + // NSAA. + + // PCS: "C.14: If the size of the argument is less than 8 bytes then the size + // of the argument is set to 8 bytes. The effect is as if the argument was + // copied to the least significant bits of a 64-bit register and the remaining + // bits filled with unspecified values." + // + // Integer types were widened above. Floating-point and composite types have + // already been allocated completely. Nothing to do. + + // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA + // is incremented by the size of the argument. The argument has now been + // allocated." + CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>, + CCIfType<[i64], CCAssignToStack<8, 8>> + +]>; + +// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits +// of vector registers (8-15) are callee-saved. The order here is is picked up +// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of +// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at +// [sp-16], ... +def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19), + (sequence "D%u", 15, 8))>; + + +// TLS descriptor calls are extremely restricted in their changes, to allow +// optimisations in the (hopefully) more common fast path where no real action +// is needed. They actually have to preserve all registers, except for the +// unavoidable X30 and the return register X0. +def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1), + (sequence "Q%u", 31, 0))>; diff --git a/lib/Target/AArch64/AArch64ConstantIslandPass.cpp b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp new file mode 100644 index 0000000..7734866 --- /dev/null +++ b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp @@ -0,0 +1,1420 @@ +//===-- AArch64ConstantIslandPass.cpp - AArch64 constant islands ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that splits the constant pool up into 'islands' +// which are scattered through-out the function. This is required due to the +// limited pc-relative displacements that AArch64 has. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-cp-islands" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64Subtarget.h" +#include "AArch64MachineFunctionInfo.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumCPEs, "Number of constpool entries"); +STATISTIC(NumSplit, "Number of uncond branches inserted"); +STATISTIC(NumCBrFixed, "Number of cond branches fixed"); + +// FIXME: This option should be removed once it has received sufficient testing. +static cl::opt<bool> +AlignConstantIslands("aarch64-align-constant-islands", cl::Hidden, + cl::init(true), cl::desc("Align constant islands in code")); + +/// Return the worst case padding that could result from unknown offset bits. +/// This does not include alignment padding caused by known offset bits. +/// +/// @param LogAlign log2(alignment) +/// @param KnownBits Number of known low offset bits. +static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { + if (KnownBits < LogAlign) + return (1u << LogAlign) - (1u << KnownBits); + return 0; +} + +namespace { + /// Due to limited PC-relative displacements, AArch64 requires constant pool + /// entries to be scattered among the instructions inside a function. To do + /// this, it completely ignores the normal LLVM constant pool; instead, it + /// places constants wherever it feels like with special instructions. + /// + /// The terminology used in this pass includes: + /// Islands - Clumps of constants placed in the function. + /// Water - Potential places where an island could be formed. + /// CPE - A constant pool entry that has been placed somewhere, which + /// tracks a list of users. + class AArch64ConstantIslands : public MachineFunctionPass { + /// Information about the offset and size of a single basic block. + struct BasicBlockInfo { + /// Distance from the beginning of the function to the beginning of this + /// basic block. + /// + /// Offsets are computed assuming worst case padding before an aligned + /// block. This means that subtracting basic block offsets always gives a + /// conservative estimate of the real distance which may be smaller. + /// + /// Because worst case padding is used, the computed offset of an aligned + /// block may not actually be aligned. + unsigned Offset; + + /// Size of the basic block in bytes. If the block contains inline + /// assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + /// The number of low bits in Offset that are known to be exact. The + /// remaining bits of Offset are an upper bound. + uint8_t KnownBits; + + /// When non-zero, the block contains instructions (inline asm) of unknown + /// size. The real size may be smaller than Size bytes by a multiple of 1 + /// << Unalign. + uint8_t Unalign; + + BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} + + /// Compute the number of known offset bits internally to this block. + /// This number should be used to predict worst case padding when + /// splitting the block. + unsigned internalKnownBits() const { + unsigned Bits = Unalign ? Unalign : KnownBits; + // If the block size isn't a multiple of the known bits, assume the + // worst case padding. + if (Size & ((1u << Bits) - 1)) + Bits = CountTrailingZeros_32(Size); + return Bits; + } + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + if (!LogAlign) + return PO; + // Add alignment padding from the terminator. + return PO + UnknownPadding(LogAlign, internalKnownBits()); + } + + /// Compute the number of known low bits of postOffset. If this block + /// contains inline asm, the number of known bits drops to the + /// instruction alignment. An aligned terminator may increase the number + /// of know bits. + /// If LogAlign is given, also consider the alignment of the next block. + unsigned postKnownBits(unsigned LogAlign = 0) const { + return std::max(LogAlign, internalKnownBits()); + } + }; + + std::vector<BasicBlockInfo> BBInfo; + + /// A sorted list of basic blocks where islands could be placed (i.e. blocks + /// that don't fall through to the following block, due to a return, + /// unreachable, or unconditional branch). + std::vector<MachineBasicBlock*> WaterList; + + /// The subset of WaterList that was created since the previous iteration by + /// inserting unconditional branches. + SmallSet<MachineBasicBlock*, 4> NewWaterList; + + typedef std::vector<MachineBasicBlock*>::iterator water_iterator; + + /// One user of a constant pool, keeping the machine instruction pointer, + /// the constant pool being referenced, and the number of bits used by the + /// instruction for displacement. The HighWaterMark records the highest + /// basic block where a new CPEntry can be placed. To ensure this pass + /// terminates, the CP entries are initially placed at the end of the + /// function and then move monotonically to lower addresses. The exception + /// to this rule is when the current CP entry for a particular CPUser is out + /// of range, but there is another CP entry for the same constant value in + /// range. We want to use the existing in-range CP entry, but if it later + /// moves out of range, the search for new water should resume where it left + /// off. The HighWaterMark is used to record that point. + struct CPUser { + MachineInstr *MI; + MachineInstr *CPEMI; + MachineBasicBlock *HighWaterMark; + private: + unsigned OffsetBits; + public: + CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned offsetbits) + : MI(mi), CPEMI(cpemi), OffsetBits(offsetbits) { + HighWaterMark = CPEMI->getParent(); + } + /// Returns the number of bits used to specify the offset. + unsigned getOffsetBits() const { + return OffsetBits; + } + + /// Returns the maximum positive displacement possible from this CPUser + /// (essentially INT<N>_MAX * 4). + unsigned getMaxPosDisp() const { + return (1 << (OffsetBits - 1)) - 1; + } + }; + + /// Keep track of all of the machine instructions that use various constant + /// pools and their max displacement. + std::vector<CPUser> CPUsers; + + /// One per constant pool entry, keeping the machine instruction pointer, + /// the constpool index, and the number of CPUser's which reference this + /// entry. + struct CPEntry { + MachineInstr *CPEMI; + unsigned CPI; + unsigned RefCount; + CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0) + : CPEMI(cpemi), CPI(cpi), RefCount(rc) {} + }; + + /// Keep track of all of the constant pool entry machine instructions. For + /// each original constpool index (i.e. those that existed upon entry to + /// this pass), it keeps a vector of entries. Original elements are cloned + /// as we go along; the clones are put in the vector of the original + /// element, but have distinct CPIs. + std::vector<std::vector<CPEntry> > CPEntries; + + /// One per immediate branch, keeping the machine instruction pointer, + /// conditional or unconditional, the max displacement, and (if IsCond is + /// true) the corresponding inverted branch opcode. + struct ImmBranch { + MachineInstr *MI; + unsigned OffsetBits : 31; + bool IsCond : 1; + ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) + : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} + }; + + /// Keep track of all the immediate branch instructions. + /// + std::vector<ImmBranch> ImmBranches; + + MachineFunction *MF; + MachineConstantPool *MCP; + const AArch64InstrInfo *TII; + const AArch64Subtarget *STI; + AArch64MachineFunctionInfo *AFI; + public: + static char ID; + AArch64ConstantIslands() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "AArch64 constant island placement pass"; + } + + private: + void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs); + CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); + unsigned getCPELogAlign(const MachineInstr *CPEMI); + void scanFunctionJumpTables(); + void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); + void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); + void adjustBBOffsetsAfter(MachineBasicBlock *BB); + bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); + int findInRangeCPEntry(CPUser& U, unsigned UserOffset); + bool findAvailableWater(CPUser&U, unsigned UserOffset, + water_iterator &WaterIter); + void createNewWater(unsigned CPUserIndex, unsigned UserOffset, + MachineBasicBlock *&NewMBB); + bool handleConstantPoolUser(unsigned CPUserIndex); + void removeDeadCPEMI(MachineInstr *CPEMI); + bool removeUnusedCPEntries(); + bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, + MachineInstr *CPEMI, unsigned OffsetBits, + bool DoDump = false); + bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water, + CPUser &U, unsigned &Growth); + bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, + unsigned OffsetBits); + bool fixupImmediateBr(ImmBranch &Br); + bool fixupConditionalBr(ImmBranch &Br); + + void computeBlockSize(MachineBasicBlock *MBB); + unsigned getOffsetOf(MachineInstr *MI) const; + unsigned getUserOffset(CPUser&) const; + void dumpBBs(); + void verify(); + + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, + unsigned BitsAvailable); + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, + const CPUser &U) { + return isOffsetInRange(UserOffset, TrialOffset, U.getOffsetBits()); + } + }; + char AArch64ConstantIslands::ID = 0; +} + +/// check BBOffsets, BBSizes, alignment of islands +void AArch64ConstantIslands::verify() { +#ifndef NDEBUG + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + unsigned MBBId = MBB->getNumber(); + assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); + } + DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { + CPUser &U = CPUsers[i]; + unsigned UserOffset = getUserOffset(U); + // Verify offset using the real max displacement without the safety + // adjustment. + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getOffsetBits(), + /* DoDump = */ true)) { + DEBUG(dbgs() << "OK\n"); + continue; + } + DEBUG(dbgs() << "Out of range.\n"); + dumpBBs(); + DEBUG(MF->dump()); + llvm_unreachable("Constant pool entry out of range!"); + } +#endif +} + +/// print block size and offset information - debugging +void AArch64ConstantIslands::dumpBBs() { + DEBUG({ + for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { + const BasicBlockInfo &BBI = BBInfo[J]; + dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + << " kb=" << unsigned(BBI.KnownBits) + << " ua=" << unsigned(BBI.Unalign) + << format(" size=%#x\n", BBInfo[J].Size); + } + }); +} + +/// Returns an instance of the constpool island pass. +FunctionPass *llvm::createAArch64ConstantIslandPass() { + return new AArch64ConstantIslands(); +} + +bool AArch64ConstantIslands::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MCP = mf.getConstantPool(); + + DEBUG(dbgs() << "***** AArch64ConstantIslands: " + << MCP->getConstants().size() << " CP entries, aligned to " + << MCP->getConstantPoolAlignment() << " bytes *****\n"); + + TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); + AFI = MF->getInfo<AArch64MachineFunctionInfo>(); + STI = &MF->getTarget().getSubtarget<AArch64Subtarget>(); + + // This pass invalidates liveness information when it splits basic blocks. + MF->getRegInfo().invalidateLiveness(); + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + MF->RenumberBlocks(); + + // Perform the initial placement of the constant pool entries. To start with, + // we put them all at the end of the function. + std::vector<MachineInstr*> CPEMIs; + if (!MCP->isEmpty()) + doInitialPlacement(CPEMIs); + + /// The next UID to take is the first unused one. + AFI->initPICLabelUId(CPEMIs.size()); + + // Do the initial scan of the function, building up information about the + // sizes of each block, the location of all the water, and finding all of the + // constant pool users. + initializeFunctionInfo(CPEMIs); + CPEMIs.clear(); + DEBUG(dumpBBs()); + + + /// Remove dead constant pool entries. + bool MadeChange = removeUnusedCPEntries(); + + // Iteratively place constant pool entries and fix up branches until there + // is no change. + unsigned NoCPIters = 0, NoBRIters = 0; + while (true) { + DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); + bool CPChange = false; + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) + CPChange |= handleConstantPoolUser(i); + if (CPChange && ++NoCPIters > 30) + report_fatal_error("Constant Island pass failed to converge!"); + DEBUG(dumpBBs()); + + // Clear NewWaterList now. If we split a block for branches, it should + // appear as "new water" for the next iteration of constant pool placement. + NewWaterList.clear(); + + DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); + bool BRChange = false; + for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) + BRChange |= fixupImmediateBr(ImmBranches[i]); + if (BRChange && ++NoBRIters > 30) + report_fatal_error("Branch Fix Up pass failed to converge!"); + DEBUG(dumpBBs()); + + if (!CPChange && !BRChange) + break; + MadeChange = true; + } + + // After a while, this might be made debug-only, but it is not expensive. + verify(); + + DEBUG(dbgs() << '\n'; dumpBBs()); + + BBInfo.clear(); + WaterList.clear(); + CPUsers.clear(); + CPEntries.clear(); + ImmBranches.clear(); + + return MadeChange; +} + +/// Perform the initial placement of the constant pool entries. To start with, +/// we put them all at the end of the function. +void +AArch64ConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { + // Create the basic block to hold the CPE's. + MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); + MF->push_back(BB); + + // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). + unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + + // Mark the basic block as required by the const-pool. + // If AlignConstantIslands isn't set, use 4-byte alignment for everything. + BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); + + // The function needs to be as aligned as the basic blocks. The linker may + // move functions around based on their alignment. + MF->ensureAlignment(BB->getAlignment()); + + // Order the entries in BB by descending alignment. That ensures correct + // alignment of all entries as long as BB is sufficiently aligned. Keep + // track of the insertion point for each alignment. We are going to bucket + // sort the entries as they are created. + SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end()); + + // Add all of the constants from the constant pool to the end block, use an + // identity mapping of CPI's to CPE's. + const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); + + const DataLayout &TD = *MF->getTarget().getDataLayout(); + for (unsigned i = 0, e = CPs.size(); i != e; ++i) { + unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); + assert(Size >= 4 && "Too small constant pool entry"); + unsigned Align = CPs[i].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid alignment"); + // Verify that all constant pool entries are a multiple of their alignment. + // If not, we would have to pad them out so that instructions stay aligned. + assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!"); + + // Insert CONSTPOOL_ENTRY before entries with a smaller alignment. + unsigned LogAlign = Log2_32(Align); + MachineBasicBlock::iterator InsAt = InsPoint[LogAlign]; + MachineInstr *CPEMI = + BuildMI(*BB, InsAt, DebugLoc(), TII->get(AArch64::CONSTPOOL_ENTRY)) + .addImm(i).addConstantPoolIndex(i).addImm(Size); + CPEMIs.push_back(CPEMI); + + // Ensure that future entries with higher alignment get inserted before + // CPEMI. This is bucket sort with iterators. + for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) + if (InsPoint[a] == InsAt) + InsPoint[a] = CPEMI; + + // Add a new CPEntry, but no corresponding CPUser yet. + std::vector<CPEntry> CPEs; + CPEs.push_back(CPEntry(CPEMI, i)); + CPEntries.push_back(CPEs); + ++NumCPEs; + DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = " + << Size << ", align = " << Align <<'\n'); + } + DEBUG(BB->dump()); +} + +/// Return true if the specified basic block can fallthrough into the block +/// immediately after it. +static bool BBHasFallthrough(MachineBasicBlock *MBB) { + // Get the next machine basic block in the function. + MachineFunction::iterator MBBI = MBB; + // Can't fall off end of function. + if (llvm::next(MBBI) == MBB->getParent()->end()) + return false; + + MachineBasicBlock *NextBB = llvm::next(MBBI); + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I == NextBB) + return true; + + return false; +} + +/// Given the constpool index and CONSTPOOL_ENTRY MI, look up the corresponding +/// CPEntry. +AArch64ConstantIslands::CPEntry +*AArch64ConstantIslands::findConstPoolEntry(unsigned CPI, + const MachineInstr *CPEMI) { + std::vector<CPEntry> &CPEs = CPEntries[CPI]; + // Number of entries per constpool index should be small, just do a + // linear search. + for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { + if (CPEs[i].CPEMI == CPEMI) + return &CPEs[i]; + } + return NULL; +} + +/// Returns the required alignment of the constant pool entry represented by +/// CPEMI. Alignment is measured in log2(bytes) units. +unsigned AArch64ConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { + assert(CPEMI && CPEMI->getOpcode() == AArch64::CONSTPOOL_ENTRY); + + // Everything is 4-byte aligned unless AlignConstantIslands is set. + if (!AlignConstantIslands) + return 2; + + unsigned CPI = CPEMI->getOperand(1).getIndex(); + assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); + unsigned Align = MCP->getConstants()[CPI].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); + return Log2_32(Align); +} + +/// Do the initial scan of the function, building up information about the sizes +/// of each block, the location of all the water, and finding all of the +/// constant pool users. +void AArch64ConstantIslands:: +initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { + BBInfo.clear(); + BBInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) + computeBlockSize(I); + + // The known bits of the entry block offset are determined by the function + // alignment. + BBInfo.front().KnownBits = MF->getAlignment(); + + // Compute block offsets and known bits. + adjustBBOffsetsAfter(MF->begin()); + + // Now go back through the instructions and build up our data structures. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + + // If this block doesn't fall through into the next MBB, then this is + // 'water' that a constant pool island could be placed. + if (!BBHasFallthrough(&MBB)) + WaterList.push_back(&MBB); + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + if (I->isDebugValue()) + continue; + + int Opc = I->getOpcode(); + if (I->isBranch()) { + bool IsCond = false; + + // The offsets encoded in instructions here scale by the instruction + // size (4 bytes), effectively increasing their range by 2 bits. + unsigned Bits = 0; + switch (Opc) { + default: + continue; // Ignore other JT branches + case AArch64::TBZxii: + case AArch64::TBZwii: + case AArch64::TBNZxii: + case AArch64::TBNZwii: + IsCond = true; + Bits = 14 + 2; + break; + case AArch64::Bcc: + case AArch64::CBZx: + case AArch64::CBZw: + case AArch64::CBNZx: + case AArch64::CBNZw: + IsCond = true; + Bits = 19 + 2; + break; + case AArch64::Bimm: + Bits = 26 + 2; + break; + } + + // Record this immediate branch. + ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); + } + + if (Opc == AArch64::CONSTPOOL_ENTRY) + continue; + + // Scan the instructions for constant pool operands. + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) + if (I->getOperand(op).isCPI()) { + // We found one. The addressing mode tells us the max displacement + // from the PC that this instruction permits. + + // The offsets encoded in instructions here scale by the instruction + // size (4 bytes), effectively increasing their range by 2 bits. + unsigned Bits = 0; + + switch (Opc) { + default: + llvm_unreachable("Unknown addressing mode for CP reference!"); + + case AArch64::LDRw_lit: + case AArch64::LDRx_lit: + case AArch64::LDRs_lit: + case AArch64::LDRd_lit: + case AArch64::LDRq_lit: + case AArch64::LDRSWx_lit: + case AArch64::PRFM_lit: + Bits = 19 + 2; + } + + // Remember that this is a user of a CP entry. + unsigned CPI = I->getOperand(op).getIndex(); + MachineInstr *CPEMI = CPEMIs[CPI]; + CPUsers.push_back(CPUser(I, CPEMI, Bits)); + + // Increment corresponding CPEntry reference count. + CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); + assert(CPE && "Cannot find a corresponding CPEntry!"); + CPE->RefCount++; + + // Instructions can only use one CP entry, don't bother scanning the + // rest of the operands. + break; + } + } + } +} + +/// Compute the size and some alignment information for MBB. This function +/// updates BBInfo directly. +void AArch64ConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { + BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; + BBI.Size = 0; + BBI.Unalign = 0; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + BBI.Size += TII->getInstSizeInBytes(*I); + // For inline asm, GetInstSizeInBytes returns a conservative estimate. + // The actual size may be smaller, but still a multiple of the instr size. + if (I->isInlineAsm()) + BBI.Unalign = 2; + } +} + +/// Return the current offset of the specified machine instruction from the +/// start of the function. This offset changes as stuff is moved around inside +/// the function. +unsigned AArch64ConstantIslands::getOffsetOf(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + + // The offset is composed of two things: the sum of the sizes of all MBB's + // before this instruction's block, and the offset from the start of the block + // it is in. + unsigned Offset = BBInfo[MBB->getNumber()].Offset; + + // Sum instructions before MI in MBB. + for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { + assert(I != MBB->end() && "Didn't find MI in its own basic block?"); + Offset += TII->getInstSizeInBytes(*I); + } + return Offset; +} + +/// Little predicate function to sort the WaterList by MBB ID. +static bool CompareMBBNumbers(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) { + return LHS->getNumber() < RHS->getNumber(); +} + +/// When a block is newly inserted into the machine function, it upsets all of +/// the block numbers. Renumber the blocks and update the arrays that parallel +/// this numbering. +void AArch64ConstantIslands:: +updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { + // Renumber the MBB's to keep them consecutive. + NewBB->getParent()->RenumberBlocks(NewBB); + + // Insert an entry into BBInfo to align it properly with the (newly + // renumbered) block numbers. + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + // Next, update WaterList. Specifically, we need to add NewMBB as having + // available water after it. + water_iterator IP = + std::lower_bound(WaterList.begin(), WaterList.end(), NewBB, + CompareMBBNumbers); + WaterList.insert(IP, NewBB); +} + + +/// Split the basic block containing MI into two blocks, which are joined by +/// an unconditional branch. Update data structures and renumber blocks to +/// account for this change and returns the newly created block. +MachineBasicBlock * +AArch64ConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { + MachineBasicBlock *OrigBB = MI->getParent(); + + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MachineFunction::iterator MBBI = OrigBB; ++MBBI; + MF->insert(MBBI, NewBB); + + // Splice the instructions starting with MI over to NewBB. + NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); + + // Add an unconditional branch from OrigBB to NewBB. + // Note the new unconditional branch is not being recorded. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond to anything in the source. + BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); + ++NumSplit; + + // Update the CFG. All succs of OrigBB are now succs of NewBB. + NewBB->transferSuccessors(OrigBB); + + // OrigBB branches to NewBB. + OrigBB->addSuccessor(NewBB); + + // Update internal data structures to account for the newly inserted MBB. + // This is almost the same as updateForInsertedWaterBlock, except that + // the Water goes after OrigBB, not NewBB. + MF->RenumberBlocks(NewBB); + + // Insert an entry into BBInfo to align it properly with the (newly + // renumbered) block numbers. + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + // Next, update WaterList. Specifically, we need to add OrigMBB as having + // available water after it (but not if it's already there, which happens + // when splitting before a conditional branch that is followed by an + // unconditional branch - in that case we want to insert NewBB). + water_iterator IP = + std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB, + CompareMBBNumbers); + MachineBasicBlock* WaterBB = *IP; + if (WaterBB == OrigBB) + WaterList.insert(llvm::next(IP), NewBB); + else + WaterList.insert(IP, OrigBB); + NewWaterList.insert(OrigBB); + + // Figure out how large the OrigBB is. As the first half of the original + // block, it cannot contain a tablejump. The size includes + // the new jump we added. (It should be possible to do this without + // recounting everything, but it's very confusing, and this is rarely + // executed.) + computeBlockSize(OrigBB); + + // Figure out how large the NewMBB is. As the second half of the original + // block, it may contain a tablejump. + computeBlockSize(NewBB); + + // All BBOffsets following these blocks must be modified. + adjustBBOffsetsAfter(OrigBB); + + return NewBB; +} + +/// Compute the offset of U.MI as seen by the hardware displacement computation. +unsigned AArch64ConstantIslands::getUserOffset(CPUser &U) const { + return getOffsetOf(U.MI); +} + +/// Checks whether UserOffset (the location of a constant pool reference) is +/// within OffsetBits of TrialOffset (a proposed location of a constant pool +/// entry). +bool AArch64ConstantIslands::isOffsetInRange(unsigned UserOffset, + unsigned TrialOffset, + unsigned OffsetBits) { + return isIntN(OffsetBits, static_cast<int64_t>(TrialOffset) - UserOffset); +} + +/// Returns true if a CPE placed after the specified Water (a basic block) will +/// be in range for the specific MI. +/// +/// Compute how much the function will grow by inserting a CPE after Water. +bool AArch64ConstantIslands::isWaterInRange(unsigned UserOffset, + MachineBasicBlock* Water, CPUser &U, + unsigned &Growth) { + unsigned CPELogAlign = getCPELogAlign(U.CPEMI); + unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); + unsigned NextBlockOffset, NextBlockAlignment; + MachineFunction::const_iterator NextBlock = Water; + if (++NextBlock == MF->end()) { + NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); + NextBlockAlignment = 0; + } else { + NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; + NextBlockAlignment = NextBlock->getAlignment(); + } + unsigned Size = U.CPEMI->getOperand(2).getImm(); + unsigned CPEEnd = CPEOffset + Size; + + // The CPE may be able to hide in the alignment padding before the next + // block. It may also cause more padding to be required if it is more aligned + // that the next block. + if (CPEEnd > NextBlockOffset) { + Growth = CPEEnd - NextBlockOffset; + // Compute the padding that would go at the end of the CPE to align the next + // block. + Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); + + // If the CPE is to be inserted before the instruction, that will raise + // the offset of the instruction. Also account for unknown alignment padding + // in blocks between CPE and the user. + if (CPEOffset < UserOffset) + UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); + } else + // CPE fits in existing padding. + Growth = 0; + + return isOffsetInRange(UserOffset, CPEOffset, U); +} + +/// Returns true if the distance between specific MI and specific ConstPool +/// entry instruction can fit in MI's displacement field. +bool AArch64ConstantIslands::isCPEntryInRange(MachineInstr *MI, + unsigned UserOffset, + MachineInstr *CPEMI, + unsigned OffsetBits, bool DoDump) { + unsigned CPEOffset = getOffsetOf(CPEMI); + + if (DoDump) { + DEBUG({ + unsigned Block = MI->getParent()->getNumber(); + const BasicBlockInfo &BBI = BBInfo[Block]; + dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() + << " bits available=" << OffsetBits + << format(" insn address=%#x", UserOffset) + << " in BB#" << Block << ": " + << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI + << format("CPE address=%#x offset=%+d: ", CPEOffset, + int(CPEOffset-UserOffset)); + }); + } + + return isOffsetInRange(UserOffset, CPEOffset, OffsetBits); +} + +#ifndef NDEBUG +/// Return true of the specified basic block's only predecessor unconditionally +/// branches to its only successor. +static bool BBIsJumpedOver(MachineBasicBlock *MBB) { + if (MBB->pred_size() != 1 || MBB->succ_size() != 1) + return false; + + MachineBasicBlock *Succ = *MBB->succ_begin(); + MachineBasicBlock *Pred = *MBB->pred_begin(); + MachineInstr *PredMI = &Pred->back(); + if (PredMI->getOpcode() == AArch64::Bimm) + return PredMI->getOperand(0).getMBB() == Succ; + return false; +} +#endif // NDEBUG + +void AArch64ConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) { + unsigned BBNum = BB->getNumber(); + for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); + unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); + unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + + // This is where block i begins. Stop if the offset is already correct, + // and we have updated 2 blocks. This is the maximum number of blocks + // changed before calling this function. + if (i > BBNum + 2 && + BBInfo[i].Offset == Offset && + BBInfo[i].KnownBits == KnownBits) + break; + + BBInfo[i].Offset = Offset; + BBInfo[i].KnownBits = KnownBits; + } +} + +/// Find the constant pool entry with index CPI and instruction CPEMI, and +/// decrement its refcount. If the refcount becomes 0 remove the entry and +/// instruction. Returns true if we removed the entry, false if we didn't. +bool AArch64ConstantIslands::decrementCPEReferenceCount(unsigned CPI, + MachineInstr *CPEMI) { + // Find the old entry. Eliminate it if it is no longer used. + CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); + assert(CPE && "Unexpected!"); + if (--CPE->RefCount == 0) { + removeDeadCPEMI(CPEMI); + CPE->CPEMI = NULL; + --NumCPEs; + return true; + } + return false; +} + +/// See if the currently referenced CPE is in range; if not, see if an in-range +/// clone of the CPE is in range, and if so, change the data structures so the +/// user references the clone. Returns: +/// 0 = no existing entry found +/// 1 = entry found, and there were no code insertions or deletions +/// 2 = entry found, and there were code insertions or deletions +int AArch64ConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) +{ + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + + // Check to see if the CPE is already in-range. + if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getOffsetBits(), true)) { + DEBUG(dbgs() << "In range\n"); + return 1; + } + + // No. Look for previously created clones of the CPE that are in range. + unsigned CPI = CPEMI->getOperand(1).getIndex(); + std::vector<CPEntry> &CPEs = CPEntries[CPI]; + for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { + // We already tried this one + if (CPEs[i].CPEMI == CPEMI) + continue; + // Removing CPEs can leave empty entries, skip + if (CPEs[i].CPEMI == NULL) + continue; + if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getOffsetBits())) { + DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" + << CPEs[i].CPI << "\n"); + // Point the CPUser node to the replacement + U.CPEMI = CPEs[i].CPEMI; + // Change the CPI in the instruction operand to refer to the clone. + for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j) + if (UserMI->getOperand(j).isCPI()) { + UserMI->getOperand(j).setIndex(CPEs[i].CPI); + break; + } + // Adjust the refcount of the clone... + CPEs[i].RefCount++; + // ...and the original. If we didn't remove the old entry, none of the + // addresses changed, so we don't need another pass. + return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1; + } + } + return 0; +} + +/// Look for an existing entry in the WaterList in which we can place the CPE +/// referenced from U so it's within range of U's MI. Returns true if found, +/// false if not. If it returns true, WaterIter is set to the WaterList +/// entry. To ensure that this pass terminates, the CPE location for a +/// particular CPUser is only allowed to move to a lower address, so search +/// backward from the end of the list and prefer the first water that is in +/// range. +bool AArch64ConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, + water_iterator &WaterIter) { + if (WaterList.empty()) + return false; + + unsigned BestGrowth = ~0u; + for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; + --IP) { + MachineBasicBlock* WaterBB = *IP; + // Check if water is in range and is either at a lower address than the + // current "high water mark" or a new water block that was created since + // the previous iteration by inserting an unconditional branch. In the + // latter case, we want to allow resetting the high water mark back to + // this new water since we haven't seen it before. Inserting branches + // should be relatively uncommon and when it does happen, we want to be + // sure to take advantage of it for all the CPEs near that block, so that + // we don't insert more branches than necessary. + unsigned Growth; + if (isWaterInRange(UserOffset, WaterBB, U, Growth) && + (WaterBB->getNumber() < U.HighWaterMark->getNumber() || + NewWaterList.count(WaterBB)) && Growth < BestGrowth) { + // This is the least amount of required padding seen so far. + BestGrowth = Growth; + WaterIter = IP; + DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() + << " Growth=" << Growth << '\n'); + + // Keep looking unless it is perfect. + if (BestGrowth == 0) + return true; + } + if (IP == B) + break; + } + return BestGrowth != ~0u; +} + +/// No existing WaterList entry will work for CPUsers[CPUserIndex], so create a +/// place to put the CPE. The end of the block is used if in range, and the +/// conditional branch munged so control flow is correct. Otherwise the block +/// is split to create a hole with an unconditional branch around it. In either +/// case NewMBB is set to a block following which the new island can be inserted +/// (the WaterList is not adjusted). +void AArch64ConstantIslands::createNewWater(unsigned CPUserIndex, + unsigned UserOffset, + MachineBasicBlock *&NewMBB) { + CPUser &U = CPUsers[CPUserIndex]; + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + unsigned CPELogAlign = getCPELogAlign(CPEMI); + MachineBasicBlock *UserMBB = UserMI->getParent(); + const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; + + // If the block does not end in an unconditional branch already, and if the + // end of the block is within range, make new water there. + if (BBHasFallthrough(UserMBB)) { + // Size of branch to insert. + unsigned InstrSize = 4; + // Compute the offset where the CPE will begin. + unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + InstrSize; + + if (isOffsetInRange(UserOffset, CPEOffset, U)) { + DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() + << format(", expected CPE offset %#x\n", CPEOffset)); + NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); + // Add an unconditional branch from UserMBB to fallthrough block. Record + // it for branch lengthening; this new branch will not get out of range, + // but if the preceding conditional branch is out of range, the targets + // will be exchanged, and the altered branch may be out of range, so the + // machinery has to know about it. + BuildMI(UserMBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewMBB); + + // 26 bits written down, specifying a multiple of 4. + unsigned OffsetBits = 26 + 2; + ImmBranches.push_back(ImmBranch(&UserMBB->back(), OffsetBits, false)); + BBInfo[UserMBB->getNumber()].Size += InstrSize; + adjustBBOffsetsAfter(UserMBB); + return; + } + } + + // What a big block. Find a place within the block to split it. We make a + // first guess, then walk through the instructions between the one currently + // being looked at and the possible insertion point, and make sure any other + // instructions that reference CPEs will be able to use the same island area; + // if not, we back up the insertion point. + + // Try to split the block so it's fully aligned. Compute the latest split + // point where we can add a 4-byte branch instruction, and then align to + // LogAlign which is the largest possible alignment in the function. + unsigned LogAlign = MF->getAlignment(); + assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + unsigned KnownBits = UserBBI.internalKnownBits(); + unsigned UPad = UnknownPadding(LogAlign, KnownBits); + unsigned BaseInsertOffset = UserOffset + U.getMaxPosDisp() - UPad; + DEBUG(dbgs() << format("Split in middle of big block before %#x", + BaseInsertOffset)); + + // The 4 in the following is for the unconditional branch we'll be inserting + // Alignment of the island is handled inside isOffsetInRange. + BaseInsertOffset -= 4; + + DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) + << " la=" << LogAlign + << " kb=" << KnownBits + << " up=" << UPad << '\n'); + + // This could point off the end of the block if we've already got constant + // pool entries following this block; only the last one is in the water list. + // Back past any possible branches (allow for a conditional and a maximally + // long unconditional). + if (BaseInsertOffset + 8 >= UserBBI.postOffset()) { + BaseInsertOffset = UserBBI.postOffset() - UPad - 8; + DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); + } + unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + + CPEMI->getOperand(2).getImm(); + MachineBasicBlock::iterator MI = UserMI; + ++MI; + unsigned CPUIndex = CPUserIndex+1; + unsigned NumCPUsers = CPUsers.size(); + for (unsigned Offset = UserOffset+TII->getInstSizeInBytes(*UserMI); + Offset < BaseInsertOffset; + Offset += TII->getInstSizeInBytes(*MI), + MI = llvm::next(MI)) { + assert(MI != UserMBB->end() && "Fell off end of block"); + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { + CPUser &U = CPUsers[CPUIndex]; + if (!isOffsetInRange(Offset, EndInsertOffset, U)) { + // Shift intertion point by one unit of alignment so it is within reach. + BaseInsertOffset -= 1u << LogAlign; + EndInsertOffset -= 1u << LogAlign; + } + // This is overly conservative, as we don't account for CPEMIs being + // reused within the block, but it doesn't matter much. Also assume CPEs + // are added in order with alignment padding. We may eventually be able + // to pack the aligned CPEs better. + EndInsertOffset += U.CPEMI->getOperand(2).getImm(); + CPUIndex++; + } + } + + --MI; + NewMBB = splitBlockBeforeInstr(MI); +} + +/// Analyze the specified user, checking to see if it is out-of-range. If so, +/// pick up the constant pool value and move it some place in-range. Return +/// true if we changed any addresses, false otherwise. +bool AArch64ConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { + CPUser &U = CPUsers[CPUserIndex]; + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned Size = CPEMI->getOperand(2).getImm(); + // Compute this only once, it's expensive. + unsigned UserOffset = getUserOffset(U); + + // See if the current entry is within range, or there is a clone of it + // in range. + int result = findInRangeCPEntry(U, UserOffset); + if (result==1) return false; + else if (result==2) return true; + + // No existing clone of this CPE is within range. + // We will be generating a new clone. Get a UID for it. + unsigned ID = AFI->createPICLabelUId(); + + // Look for water where we can place this CPE. + MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); + MachineBasicBlock *NewMBB; + water_iterator IP; + if (findAvailableWater(U, UserOffset, IP)) { + DEBUG(dbgs() << "Found water in range\n"); + MachineBasicBlock *WaterBB = *IP; + + // If the original WaterList entry was "new water" on this iteration, + // propagate that to the new island. This is just keeping NewWaterList + // updated to match the WaterList, which will be updated below. + if (NewWaterList.count(WaterBB)) { + NewWaterList.erase(WaterBB); + NewWaterList.insert(NewIsland); + } + // The new CPE goes before the following block (NewMBB). + NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); + + } else { + // No water found. + DEBUG(dbgs() << "No water found\n"); + createNewWater(CPUserIndex, UserOffset, NewMBB); + + // splitBlockBeforeInstr adds to WaterList, which is important when it is + // called while handling branches so that the water will be seen on the + // next iteration for constant pools, but in this context, we don't want + // it. Check for this so it will be removed from the WaterList. + // Also remove any entry from NewWaterList. + MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB)); + IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); + if (IP != WaterList.end()) + NewWaterList.erase(WaterBB); + + // We are adding new water. Update NewWaterList. + NewWaterList.insert(NewIsland); + } + + // Remove the original WaterList entry; we want subsequent insertions in + // this vicinity to go after the one we're about to insert. This + // considerably reduces the number of times we have to move the same CPE + // more than once and is also important to ensure the algorithm terminates. + if (IP != WaterList.end()) + WaterList.erase(IP); + + // Okay, we know we can put an island before NewMBB now, do it! + MF->insert(NewMBB, NewIsland); + + // Update internal data structures to account for the newly inserted MBB. + updateForInsertedWaterBlock(NewIsland); + + // Decrement the old entry, and remove it if refcount becomes 0. + decrementCPEReferenceCount(CPI, CPEMI); + + // Now that we have an island to add the CPE to, clone the original CPE and + // add it to the island. + U.HighWaterMark = NewIsland; + U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(AArch64::CONSTPOOL_ENTRY)) + .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); + CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); + ++NumCPEs; + + // Mark the basic block as aligned as required by the const-pool entry. + NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); + + // Increase the size of the island block to account for the new entry. + BBInfo[NewIsland->getNumber()].Size += Size; + adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); + + // Finally, change the CPI in the instruction operand to be ID. + for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) + if (UserMI->getOperand(i).isCPI()) { + UserMI->getOperand(i).setIndex(ID); + break; + } + + DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI + << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset)); + + return true; +} + +/// Remove a dead constant pool entry instruction. Update sizes and offsets of +/// impacted basic blocks. +void AArch64ConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { + MachineBasicBlock *CPEBB = CPEMI->getParent(); + unsigned Size = CPEMI->getOperand(2).getImm(); + CPEMI->eraseFromParent(); + BBInfo[CPEBB->getNumber()].Size -= Size; + // All succeeding offsets have the current size value added in, fix this. + if (CPEBB->empty()) { + BBInfo[CPEBB->getNumber()].Size = 0; + + // This block no longer needs to be aligned. <rdar://problem/10534709>. + CPEBB->setAlignment(0); + } else + // Entries are sorted by descending alignment, so realign from the front. + CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); + + adjustBBOffsetsAfter(CPEBB); + // An island has only one predecessor BB and one successor BB. Check if + // this BB's predecessor jumps directly to this BB's successor. This + // shouldn't happen currently. + assert(!BBIsJumpedOver(CPEBB) && "How did this happen?"); + // FIXME: remove the empty blocks after all the work is done? +} + +/// Remove constant pool entries whose refcounts are zero. +bool AArch64ConstantIslands::removeUnusedCPEntries() { + unsigned MadeChange = false; + for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { + std::vector<CPEntry> &CPEs = CPEntries[i]; + for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { + if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { + removeDeadCPEMI(CPEs[j].CPEMI); + CPEs[j].CPEMI = NULL; + MadeChange = true; + } + } + } + return MadeChange; +} + +/// Returns true if the distance between specific MI and specific BB can fit in +/// MI's displacement field. +bool AArch64ConstantIslands::isBBInRange(MachineInstr *MI, + MachineBasicBlock *DestBB, + unsigned OffsetBits) { + int64_t BrOffset = getOffsetOf(MI); + int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; + + DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() + << " from BB#" << MI->getParent()->getNumber() + << " bits available=" << OffsetBits + << " from " << getOffsetOf(MI) << " to " << DestOffset + << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); + + return isIntN(OffsetBits, DestOffset - BrOffset); +} + +/// Fix up an immediate branch whose destination is too far away to fit in its +/// displacement field. +bool AArch64ConstantIslands::fixupImmediateBr(ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *DestBB = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isMBB()) { + DestBB = MI->getOperand(i).getMBB(); + break; + } + } + assert(DestBB && "Branch with no destination BB?"); + + // Check to see if the DestBB is already in-range. + if (isBBInRange(MI, DestBB, Br.OffsetBits)) + return false; + + assert(Br.IsCond && "Only conditional branches should need fixup"); + return fixupConditionalBr(Br); +} + +/// Fix up a conditional branch whose destination is too far away to fit in its +/// displacement field. It is converted to an inverse conditional branch + an +/// unconditional branch to the destination. +bool +AArch64ConstantIslands::fixupConditionalBr(ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *MBB = MI->getParent(); + unsigned CondBrMBBOperand = 0; + + // The general idea is to add an unconditional branch to the destination and + // invert the conditional branch to jump over it. Complications occur around + // fallthrough and unreachable ends to the block. + // b.lt L1 + // => + // b.ge L2 + // b L1 + // L2: + + // First we invert the conditional branch, by creating a replacement if + // necessary. This if statement contains all the special handling of different + // branch types. + if (MI->getOpcode() == AArch64::Bcc) { + // The basic block is operand number 1 for Bcc + CondBrMBBOperand = 1; + + A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); + CC = A64InvertCondCode(CC); + MI->getOperand(0).setImm(CC); + } else { + MachineInstrBuilder InvertedMI; + int InvertedOpcode; + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown branch type"); + case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; + case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; + case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; + case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; + case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; + case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; + case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; + case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; + } + + InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); + for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { + InvertedMI.addOperand(MI->getOperand(i)); + if (MI->getOperand(i).isMBB()) + CondBrMBBOperand = i; + } + + MI->eraseFromParent(); + MI = Br.MI = InvertedMI; + } + + // If the branch is at the end of its MBB and that has a fall-through block, + // direct the updated conditional branch to the fall-through + // block. Otherwise, split the MBB before the next instruction. + MachineInstr *BMI = &MBB->back(); + bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); + + ++NumCBrFixed; + if (BMI != MI) { + if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && + BMI->getOpcode() == AArch64::Bimm) { + // Last MI in the BB is an unconditional branch. We can swap destinations: + // b.eq L1 (temporarily b.ne L1 after first change) + // b L2 + // => + // b.ne L2 + // b L1 + MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); + if (isBBInRange(MI, NewDest, Br.OffsetBits)) { + DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " + << *BMI); + MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); + BMI->getOperand(0).setMBB(DestBB); + MI->getOperand(CondBrMBBOperand).setMBB(NewDest); + return true; + } + } + } + + if (NeedSplit) { + MachineBasicBlock::iterator MBBI = MI; ++MBBI; + splitBlockBeforeInstr(MBBI); + // No need for the branch to the next block. We're adding an unconditional + // branch to the destination. + int delta = TII->getInstSizeInBytes(MBB->back()); + BBInfo[MBB->getNumber()].Size -= delta; + MBB->back().eraseFromParent(); + // BBInfo[SplitBB].Offset is wrong temporarily, fixed below + } + + // After splitting and removing the unconditional branch from the original BB, + // the structure is now: + // oldbb: + // [things] + // b.invertedCC L1 + // splitbb/fallthroughbb: + // [old b L2/real continuation] + // + // We now have to change the conditional branch to point to splitbb and add an + // unconditional branch after it to L1, giving the final structure: + // oldbb: + // [things] + // b.invertedCC splitbb + // b L1 + // splitbb/fallthroughbb: + // [old b L2/real continuation] + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + + DEBUG(dbgs() << " Insert B to BB#" + << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() + << " also invert condition and change dest. to BB#" + << NextBB->getNumber() << "\n"); + + // Insert a new unconditional branch and fixup the destination of the + // conditional one. Also update the ImmBranch as well as adding a new entry + // for the new branch. + BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) + .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); + MI->getOperand(CondBrMBBOperand).setMBB(NextBB); + + BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); + + // 26 bits written down in Bimm, specifying a multiple of 4. + unsigned OffsetBits = 26 + 2; + ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); + + adjustBBOffsetsAfter(MBB); + return true; +} diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp new file mode 100644 index 0000000..2301114 --- /dev/null +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -0,0 +1,644 @@ +//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64FrameLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64InstrInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +void AArch64FrameLowering::splitSPAdjustments(uint64_t Total, + uint64_t &Initial, + uint64_t &Residual) const { + // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP + // instructions have a 7-bit signed immediate scaled by 8, giving a reach of + // 0x1f8, but stack adjustment should always be a multiple of 16. + if (Total <= 0x1f0) { + Initial = Total; + Residual = 0; + } else { + Initial = 0x1f0; + Residual = Total - Initial; + } +} + +void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo<AArch64MachineFunctionInfo>(); + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + MachineModuleInfo &MMI = MF.getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + bool NeedsFrameMoves = MMI.hasDebugInfo() + || MF.getFunction()->needsUnwindTableEntry(); + + uint64_t NumInitialBytes, NumResidualBytes; + + // Currently we expect the stack to be laid out by + // sub sp, sp, #initial + // stp x29, x30, [sp, #offset] + // ... + // str xxx, [sp, #offset] + // sub sp, sp, #rest (possibly via extra instructions). + if (MFI->getCalleeSavedInfo().size()) { + // If there are callee-saved registers, we want to store them efficiently as + // a block, and virtual base assignment happens too early to do it for us so + // we adjust the stack in two phases: first just for callee-saved fiddling, + // then to allocate the rest of the frame. + splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes); + } else { + // If there aren't any callee-saved registers, two-phase adjustment is + // inefficient. It's more efficient to adjust with NumInitialBytes too + // because when we're in a "callee pops argument space" situation, that pop + // must be tacked onto Initial for correctness. + NumInitialBytes = MFI->getStackSize(); + NumResidualBytes = 0; + } + + // Tell everyone else how much adjustment we're expecting them to use. In + // particular if an adjustment is required for a tail call the epilogue could + // have a different view of things. + FuncInfo->setInitialStackAdjust(NumInitialBytes); + + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes, + MachineInstr::FrameSetup); + + if (NeedsFrameMoves && NumInitialBytes) { + // We emit this update even if the CFA is set from a frame pointer later so + // that the CFA is valid in the interim. + MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(SPLabel); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, NumInitialBytes); + Moves.push_back(MachineMove(SPLabel, Dst, Src)); + } + + // Otherwise we need to set the frame pointer and/or add a second stack + // adjustment. + + bool FPNeedsSetting = hasFP(MF); + for (; MBBI != MBB.end(); ++MBBI) { + // Note that this search makes strong assumptions about the operation used + // to store the frame-pointer: it must be "STP x29, x30, ...". This could + // change in future, but until then there's no point in implementing + // untestable more generic cases. + if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR + && MBBI->getOperand(0).getReg() == AArch64::X29) { + int64_t X29FrameIdx = MBBI->getOperand(2).getIndex(); + FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx)); + + ++MBBI; + emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP, + AArch64::X29, + NumInitialBytes + MFI->getObjectOffset(X29FrameIdx), + MachineInstr::FrameSetup); + + // The offset adjustment used when emitting debugging locations relative + // to whatever frame base is set. AArch64 uses the default frame base (FP + // or SP) and this adjusts the calculations to be correct. + MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx) + - MFI->getStackSize()); + + if (NeedsFrameMoves) { + MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(FPLabel); + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx)); + Moves.push_back(MachineMove(FPLabel, Dst, Src)); + } + + FPNeedsSetting = false; + } + + if (!MBBI->getFlag(MachineInstr::FrameSetup)) + break; + } + + assert(!FPNeedsSetting && "Frame pointer couldn't be set"); + + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes, + MachineInstr::FrameSetup); + + // Now we emit the rest of the frame setup information, if necessary: we've + // already noted the FP and initial SP moves so we're left with the prologue's + // final SP update and callee-saved register locations. + if (!NeedsFrameMoves) + return; + + // Reuse the label if appropriate, so create it in this outer scope. + MCSymbol *CSLabel = 0; + + // The rest of the stack adjustment + if (!hasFP(MF) && NumResidualBytes) { + CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(CSLabel); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes); + Moves.push_back(MachineMove(CSLabel, Dst, Src)); + } + + // And any callee-saved registers (it's fine to leave them to the end here, + // because the old values are still valid at this point. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + if (CSI.size()) { + if (!CSLabel) { + CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(CSLabel); + } + + for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + MachineLocation Dst(MachineLocation::VirtualFP, MFI->getObjectOffset(I->getFrameIdx())); + MachineLocation Src(I->getReg()); + Moves.push_back(MachineMove(CSLabel, Dst, Src)); + } + } +} + +void +AArch64FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo<AArch64MachineFunctionInfo>(); + + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + DebugLoc DL = MBBI->getDebugLoc(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned RetOpcode = MBBI->getOpcode(); + + // Initial and residual are named for consitency with the prologue. Note that + // in the epilogue, the residual adjustment is executed first. + uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); + uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; + uint64_t ArgumentPopSize = 0; + if (RetOpcode == AArch64::TC_RETURNdi || + RetOpcode == AArch64::TC_RETURNxi) { + MachineOperand &JumpTarget = MBBI->getOperand(0); + MachineOperand &StackAdjust = MBBI->getOperand(1); + + MachineInstrBuilder MIB; + if (RetOpcode == AArch64::TC_RETURNdi) { + MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); + if (JumpTarget.isGlobal()) { + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else { + assert(JumpTarget.isSymbol() && "unexpected tail call destination"); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + } else { + assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() + && "Unexpected tail call"); + + MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); + MIB.addReg(JumpTarget.getReg(), RegState::Kill); + } + + // Add the extra operands onto the new tail call instruction even though + // they're not used directly (so that liveness is tracked properly etc). + for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) + MIB->addOperand(MBBI->getOperand(i)); + + + // Delete the pseudo instruction TC_RETURN. + MachineInstr *NewMI = prior(MBBI); + MBB.erase(MBBI); + MBBI = NewMI; + + // For a tail-call in a callee-pops-arguments environment, some or all of + // the stack may actually be in use for the call's arguments, this is + // calculated during LowerCall and consumed here... + ArgumentPopSize = StackAdjust.getImm(); + } else { + // ... otherwise the amount to pop is *all* of the argument space, + // conveniently stored in the MachineFunctionInfo by + // LowerFormalArguments. This will, of course, be zero for the C calling + // convention. + ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); + } + + assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 + && "refusing to adjust stack by misaligned amt"); + + // We may need to address callee-saved registers differently, so find out the + // bound on the frame indices. + const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The "residual" stack update comes first from this direction and guarantees + // that SP is NumInitialBytes below its value on function entry, either by a + // direct update or restoring it from the frame pointer. + if (NumInitialBytes + ArgumentPopSize != 0) { + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, + NumInitialBytes + ArgumentPopSize); + --MBBI; + } + + + // MBBI now points to the instruction just past the last callee-saved + // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" + // otherwise). + + // Now we need to find out where to put the bulk of the stack adjustment + MachineBasicBlock::iterator FirstEpilogue = MBBI; + while (MBBI != MBB.begin()) { + --MBBI; + + unsigned FrameOp; + for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { + if (MBBI->getOperand(FrameOp).isFI()) + break; + } + + // If this instruction doesn't have a frame index we've reached the end of + // the callee-save restoration. + if (FrameOp == MBBI->getNumOperands()) + break; + + // Likewise if it *is* a local reference, but not to a callee-saved object. + int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); + if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) + break; + + FirstEpilogue = MBBI; + } + + if (MF.getFrameInfo()->hasVarSizedObjects()) { + int64_t StaticFrameBase; + StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); + emitRegUpdate(MBB, FirstEpilogue, DL, TII, + AArch64::XSP, AArch64::X29, AArch64::NoRegister, + StaticFrameBase); + } else { + emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); + } +} + +int64_t +AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, + int FrameIndex, + unsigned &FrameReg, + int SPAdj, + bool IsCalleeSaveOp) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo<AArch64MachineFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex); + + assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0) + && "callee-saved register in unexpected place"); + + // If the frame for this function is particularly large, we adjust the stack + // in two phases which means the callee-save related operations see a + // different (intermediate) stack size. + int64_t FrameRegPos; + if (IsCalleeSaveOp) { + FrameReg = AArch64::XSP; + FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust()); + } else if (useFPForAddressing(MF)) { + // Have to use the frame pointer since we have no idea where SP is. + FrameReg = AArch64::X29; + FrameRegPos = FuncInfo->getFramePointerOffset(); + } else { + FrameReg = AArch64::XSP; + FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj; + } + + return TopOfFrameOffset - FrameRegPos; +} + +/// Estimate and return the size of the frame. +static unsigned estimateStackSize(MachineFunction &MF) { + // FIXME: Make generic? Really consider after upstreaming. This code is now + // shared between PEI, ARM *and* here. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -MFI->getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + Offset += MFI->getObjectSize(i); + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} + +void +AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + const AArch64RegisterInfo *RegInfo = + static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo()); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64InstrInfo &TII = + *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo()); + + if (hasFP(MF)) { + MF.getRegInfo().setPhysRegUsed(AArch64::X29); + MF.getRegInfo().setPhysRegUsed(AArch64::X30); + } + + // If addressing of local variables is going to be more complicated than + // shoving a base register and an offset into the instruction then we may well + // need to scavenge registers. We should either specifically add an + // callee-save register for this purpose or allocate an extra spill slot. + + bool BigStack = + (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + || MFI->hasVarSizedObjects() // Access will be from X29: messes things up + || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); + + if (!BigStack) + return; + + // We certainly need some slack space for the scavenger, preferably an extra + // register. + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + uint16_t ExtraReg = AArch64::NoRegister; + + for (unsigned i = 0; CSRegs[i]; ++i) { + if (AArch64::GPR64RegClass.contains(CSRegs[i]) && + !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) { + ExtraReg = CSRegs[i]; + break; + } + } + + if (ExtraReg != 0) { + MF.getRegInfo().setPhysRegUsed(ExtraReg); + } else { + // Create a stack slot for scavenging purposes. PrologEpilogInserter + // helpfully places it near either SP or FP for us to avoid + // infinitely-regression during scavenging. + const TargetRegisterClass *RC = &AArch64::GPR64RegClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } +} + +bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB, + unsigned Reg) const { + // If @llvm.returnaddress is called then it will refer to X30 by some means; + // the prologue store does not kill the register. + if (Reg == AArch64::X30) { + if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken() + && MBB.getParent()->getRegInfo().isLiveIn(Reg)) + return false; + } + + // In all other cases, physical registers are dead after they've been saved + // but live at the beginning of the prologue block. + MBB.addLiveIn(Reg); + return true; +} + +void +AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI, + LoadStoreMethod PossClasses[], + unsigned NumClasses) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // A certain amount of implicit contract is present here. The actual stack + // offsets haven't been allocated officially yet, so for strictly correct code + // we rely on the fact that the elements of CSI are allocated in order + // starting at SP, purely as dictated by size and alignment. In practice since + // this function handles the only accesses to those slots it's not quite so + // important. + // + // We have also ordered the Callee-saved register list in AArch64CallingConv + // so that the above scheme puts registers in order: in particular we want + // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2) + for (unsigned i = 0, e = CSI.size(); i < e; ++i) { + unsigned Reg = CSI[i].getReg(); + + // First we need to find out which register class the register belongs to so + // that we can use the correct load/store instrucitons. + unsigned ClassIdx; + for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) { + if (PossClasses[ClassIdx].RegClass->contains(Reg)) + break; + } + assert(ClassIdx != NumClasses + && "Asked to store register in unexpected class"); + const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass; + + // Now we need to decide whether it's possible to emit a paired instruction: + // for this we want the next register to be in the same class. + MachineInstrBuilder NewMI; + bool Pair = false; + if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) { + Pair = true; + unsigned StLow = 0, StHigh = 0; + if (isPrologue) { + // Most of these registers will be live-in to the MBB and killed by our + // store, though there are exceptions (see determinePrologueDeath). + StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg())); + StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); + } else { + StLow = RegState::Define; + StHigh = RegState::Define; + } + + NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode)) + .addReg(CSI[i+1].getReg(), StLow) + .addReg(CSI[i].getReg(), StHigh); + + // If it's a paired op, we've consumed two registers + ++i; + } else { + unsigned State; + if (isPrologue) { + State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); + } else { + State = RegState::Define; + } + + NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].SingleOpcode)) + .addReg(CSI[i].getReg(), State); + } + + // Note that the FrameIdx refers to the second register in a pair: it will + // be allocated the smaller numeric address and so is the one an LDP/STP + // address must use. + int FrameIdx = CSI[i].getFrameIdx(); + MachineMemOperand::MemOperandFlags Flags; + Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + Flags, + Pair ? TheClass.getSize() * 2 : TheClass.getSize(), + MFI.getObjectAlignment(FrameIdx)); + + NewMI.addFrameIndex(FrameIdx) + .addImm(0) // address-register offset + .addMemOperand(MMO); + + if (isPrologue) + NewMI.setMIFlags(MachineInstr::FrameSetup); + + // For aesthetic reasons, during an epilogue we want to emit complementary + // operations to the prologue, but in the opposite order. So we still + // iterate through the CalleeSavedInfo list in order, but we put the + // instructions successively earlier in the MBB. + if (!isPrologue) + --MBBI; + } +} + +bool +AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + static LoadStoreMethod PossibleClasses[] = { + {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR}, + {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR}, + }; + unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + + emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI, + PossibleClasses, NumClasses); + + return true; +} + +bool +AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + if (CSI.empty()) + return false; + + static LoadStoreMethod PossibleClasses[] = { + {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR}, + {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR}, + }; + unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + + emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI, + PossibleClasses, NumClasses); + + return true; +} + +bool +AArch64FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); + + // This is a decision of ABI compliance. The AArch64 PCS gives various options + // for conformance, and even at the most stringent level more or less permits + // elimination for leaf functions because there's no loss of functionality + // (for debugging etc).. + if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls()) + return true; + + // The following are hard-limits: incorrect code will be generated if we try + // to omit the frame. + return (RI->needsStackRealignment(MF) || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken()); +} + +bool +AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const { + return MF.getFrameInfo()->hasVarSizedObjects(); +} + +bool +AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Of the various reasons for having a frame pointer, it's actually only + // variable-sized objects that prevent reservation of a call frame. + return !(hasFP(MF) && MFI->hasVarSizedObjects()); +} diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h new file mode 100644 index 0000000..dfa66ec --- /dev/null +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -0,0 +1,103 @@ +//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_FRAMEINFO_H +#define LLVM_AARCH64_FRAMEINFO_H + +#include "AArch64Subtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { +class AArch64Subtarget; + +class AArch64FrameLowering : public TargetFrameLowering { +private: + // In order to unify the spilling and restoring of callee-saved registers into + // emitFrameMemOps, we need to be able to specify which instructions to use + // for the relevant memory operations on each register class. An array of the + // following struct is populated and passed in to achieve this. + struct LoadStoreMethod { + const TargetRegisterClass *RegClass; // E.g. GPR64RegClass + + // The preferred instruction. + unsigned PairOpcode; // E.g. LSPair64_STR + + // Sometimes only a single register can be handled at once. + unsigned SingleOpcode; // E.g. LS64_STR + }; +protected: + const AArch64Subtarget &STI; + +public: + explicit AArch64FrameLowering(const AArch64Subtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16), + STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + /// Decides how much stack adjustment to perform in each phase of the prologue + /// and epilogue. + void splitSPAdjustments(uint64_t Total, uint64_t &Initial, + uint64_t &Residual) const; + + int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex, + unsigned &FrameReg, int SPAdj, + bool IsCalleeSaveOp) const; + + virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; + + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + /// If the register is X30 (i.e. LR) and the return address is used in the + /// function then the callee-save store doesn't actually kill the register, + /// otherwise it does. + bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const; + + /// This function emits the loads or stores required during prologue and + /// epilogue as efficiently as possible. + /// + /// The operations involved in setting up and tearing down the frame are + /// similar enough to warrant a shared function, particularly as discrepancies + /// between the two would be disastrous. + void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI, + LoadStoreMethod PossibleClasses[], + unsigned NumClasses) const; + + + virtual bool hasFP(const MachineFunction &MF) const; + + virtual bool useFPForAddressing(const MachineFunction &MF) const; + + /// On AA + virtual bool hasReservedCallFrame(const MachineFunction &MF) const; + +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp new file mode 100644 index 0000000..9be8ba1 --- /dev/null +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -0,0 +1,422 @@ +//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the AArch64 target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-isel" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===--------------------------------------------------------------------===// +/// AArch64 specific code to select AArch64 machine instructions for +/// SelectionDAG operations. +/// +namespace { + +class AArch64DAGToDAGISel : public SelectionDAGISel { + AArch64TargetMachine &TM; + const AArch64InstrInfo *TII; + + /// Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when generating code for different targets. + const AArch64Subtarget *Subtarget; + +public: + explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), TM(tm), + TII(static_cast<const AArch64InstrInfo*>(TM.getInstrInfo())), + Subtarget(&TM.getSubtarget<AArch64Subtarget>()) { + } + + virtual const char *getPassName() const { + return "AArch64 Instruction Selection"; + } + + // Include the pieces autogenerated from the target description. +#include "AArch64GenDAGISel.inc" + + template<unsigned MemSize> + bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN || CN->getZExtValue() % MemSize != 0 + || CN->getZExtValue() / MemSize > 0xfff) + return false; + + UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64); + return true; + } + + template<unsigned RegWidth> + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { + return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); + } + + bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); + + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); + + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); + + bool SelectLogicalImm(SDValue N, SDValue &Imm); + + template<unsigned RegWidth> + bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) { + return SelectTSTBOperand(N, FixedPos, RegWidth); + } + + bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); + + SDNode *TrySelectToMoveImm(SDNode *N); + SDNode *SelectToLitPool(SDNode *N); + SDNode *SelectToFPLitPool(SDNode *N); + + SDNode* Select(SDNode*); +private: +}; +} + +bool +AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth) { + const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); + if (!CN) return false; + + // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits + // is between 1 and 32 for a destination w-register, or 1 and 64 for an + // x-register. + // + // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we + // want THIS_NODE to be 2^fbits. This is much easier to deal with using + // integers. + bool IsExact; + + // fbits is between 1 and 64 in the worst-case, which means the fmul + // could have 2^64 as an actual operand. Need 65 bits of precision. + APSInt IntVal(65, true); + CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); + + // N.b. isPowerOf2 also checks for > 0. + if (!IsExact || !IntVal.isPowerOf2()) return false; + unsigned FBits = IntVal.logBase2(); + + // Checks above should have guaranteed that we haven't lost information in + // finding FBits, but it must still be in range. + if (FBits == 0 || FBits > RegWidth) return false; + + FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32); + return true; +} + +bool +AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps) { + switch (ConstraintCode) { + default: llvm_unreachable("Unrecognised AArch64 memory constraint"); + case 'm': + // FIXME: more freedom is actually permitted for 'm'. We can go + // hunting for a base and an offset if we want. Of course, since + // we don't really know how the operand is going to be used we're + // probably restricted to the load/store pair's simm7 as an offset + // range anyway. + case 'Q': + OutOps.push_back(Op); + } + + return false; +} + +bool +AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { + ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N); + if (!Imm || !Imm->getValueAPF().isPosZero()) + return false; + + // Doesn't actually carry any information, but keeps TableGen quiet. + Dummy = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { + uint32_t Bits; + uint32_t RegWidth = N.getValueType().getSizeInBits(); + + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) return false; + + if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) + return false; + + Imm = CurDAG->getTargetConstant(Bits, MVT::i32); + return true; +} + +SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { + SDNode *ResNode; + DebugLoc dl = Node->getDebugLoc(); + EVT DestType = Node->getValueType(0); + unsigned DestWidth = DestType.getSizeInBits(); + + unsigned MOVOpcode; + EVT MOVType; + int UImm16, Shift; + uint32_t LogicalBits; + + uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue(); + if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) { + MOVType = DestType; + MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii; + } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) { + MOVType = DestType; + MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii; + } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) { + // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can + // use a 32-bit instruction: "movn w0, 0xedbc". + MOVType = MVT::i32; + MOVOpcode = AArch64::MOVNwii; + } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) { + MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi; + uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR; + + return CurDAG->getMachineNode(MOVOpcode, dl, DestType, + CurDAG->getRegister(ZR, DestType), + CurDAG->getTargetConstant(LogicalBits, MVT::i32)); + } else { + // Can't handle it in one instruction. There's scope for permitting two (or + // more) instructions, but that'll need more thought. + return NULL; + } + + ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType, + CurDAG->getTargetConstant(UImm16, MVT::i32), + CurDAG->getTargetConstant(Shift, MVT::i32)); + + if (MOVType != DestType) { + ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, + MVT::i64, MVT::i32, MVT::Other, + CurDAG->getTargetConstant(0, MVT::i64), + SDValue(ResNode, 0), + CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); + } + + return ResNode; +} + +SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); + uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue(); + int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue(); + EVT DestType = Node->getValueType(0); + + // Since we may end up loading a 64-bit constant from a 32-bit entry the + // constant in the pool may have a different type to the eventual node. + SDValue PoolEntry; + EVT LoadType; + unsigned LoadInst; + + assert((DestType == MVT::i64 || DestType == MVT::i32) + && "Only expect integer constants at the moment"); + + if (DestType == MVT::i32 || UnsignedVal <= UINT32_MAX) { + // LDR w3, lbl + LoadInst = AArch64::LDRw_lit; + LoadType = MVT::i32; + + PoolEntry = CurDAG->getTargetConstantPool( + ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), UnsignedVal), + MVT::i32); + } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) { + // We can use a sign-extending 32-bit load: LDRSW x3, lbl + LoadInst = AArch64::LDRSWx_lit; + LoadType = MVT::i64; + + PoolEntry = CurDAG->getTargetConstantPool( + ConstantInt::getSigned(Type::getInt32Ty(*CurDAG->getContext()), + SignedVal), + MVT::i32); + } else { + // Full 64-bit load needed: LDR x3, lbl + LoadInst = AArch64::LDRx_lit; + LoadType = MVT::i64; + + PoolEntry = CurDAG->getTargetConstantPool( + ConstantInt::get(Type::getInt64Ty(*CurDAG->getContext()), UnsignedVal), + MVT::i64); + } + + SDNode *ResNode = CurDAG->getMachineNode(LoadInst, dl, + LoadType, MVT::Other, + PoolEntry, CurDAG->getEntryNode()); + + if (DestType != LoadType) { + // We used the implicit zero-extension of "LDR w3, lbl", tell LLVM this + // fact. + assert(DestType == MVT::i64 && LoadType == MVT::i32 + && "Unexpected load combination"); + + ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, + MVT::i64, MVT::i32, MVT::Other, + CurDAG->getTargetConstant(0, MVT::i64), + SDValue(ResNode, 0), + CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); + } + + return ResNode; +} + +SDNode *AArch64DAGToDAGISel::SelectToFPLitPool(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); + const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue(); + EVT DestType = Node->getValueType(0); + + unsigned LoadInst; + switch (DestType.getSizeInBits()) { + case 32: + LoadInst = AArch64::LDRs_lit; + break; + case 64: + LoadInst = AArch64::LDRd_lit; + break; + case 128: + LoadInst = AArch64::LDRq_lit; + break; + default: llvm_unreachable("cannot select floating-point litpool"); + } + + SDValue PoolEntry = CurDAG->getTargetConstantPool(FV, DestType); + SDNode *ResNode = CurDAG->getMachineNode(LoadInst, dl, + DestType, MVT::Other, + PoolEntry, CurDAG->getEntryNode()); + + return ResNode; +} + +bool +AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth) { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) return false; + + uint64_t Val = CN->getZExtValue(); + + if (!isPowerOf2_64(Val)) return false; + + unsigned TestedBit = Log2_64(Val); + // Checks above should have guaranteed that we haven't lost information in + // finding TestedBit, but it must still be in range. + if (TestedBit >= RegWidth) return false; + + FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64); + return true; +} + +SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { + // Dump information about the Node being selected + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); + + if (Node->isMachineOpcode()) { + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); + return NULL; + } + + switch (Node->getOpcode()) { + case ISD::FrameIndex: { + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + EVT PtrTy = TLI.getPointerTy(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy); + return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, + TFI, CurDAG->getTargetConstant(0, PtrTy)); + } + case ISD::ConstantPool: { + // Constant pools are fine, just create a Target entry. + ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node); + const Constant *C = CN->getConstVal(); + SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0)); + + ReplaceUses(SDValue(Node, 0), CP); + return NULL; + } + case ISD::Constant: { + SDNode *ResNode = 0; + if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) { + // XZR and WZR are probably even better than an actual move: most of the + // time they can be folded into another instruction with *no* cost. + + EVT Ty = Node->getValueType(0); + assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type"); + uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR; + ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + Node->getDebugLoc(), + Register, Ty).getNode(); + } + + // Next best option is a move-immediate, see if we can do that. + if (!ResNode) { + ResNode = TrySelectToMoveImm(Node); + } + + // If even that fails we fall back to a lit-pool entry at the moment. Future + // tuning or restrictions like non-readable code-sections may mandate a + // sequence of MOVZ/MOVN/MOVK instructions. + if (!ResNode) { + ResNode = SelectToLitPool(Node); + } + + assert(ResNode && "We need *some* way to materialise a constant"); + + ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + return NULL; + } + case ISD::ConstantFP: { + if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) { + // FMOV will take care of it from TableGen + break; + } + + SDNode *ResNode = SelectToFPLitPool(Node); + ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + return NULL; + } + default: + break; // Let generic code handle it + } + + SDNode *ResNode = SelectCode(Node); + + DEBUG(dbgs() << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << "\n"); + + return ResNode; +} + +/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for +/// instruction scheduling. +FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new AArch64DAGToDAGISel(TM, OptLevel); +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp new file mode 100644 index 0000000..42e8f09 --- /dev/null +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -0,0 +1,2957 @@ +//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that AArch64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-isel" +#include "AArch64.h" +#include "AArch64ISelLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "AArch64TargetObjectFile.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/CallingConv.h" + +using namespace llvm; + +static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) { + const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>(); + + if (Subtarget->isTargetLinux()) + return new AArch64LinuxTargetObjectFile(); + if (Subtarget->isTargetELF()) + return new TargetLoweringObjectFileELF(); + llvm_unreachable("unknown subtarget type"); +} + + +AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) + : TargetLowering(TM, createTLOF(TM)), + Subtarget(&TM.getSubtarget<AArch64Subtarget>()), + RegInfo(TM.getRegisterInfo()), + Itins(TM.getInstrItineraryData()) { + + // SIMD compares set the entire lane's bits to 1 + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + // Scalar register <-> type mapping + addRegisterClass(MVT::i32, &AArch64::GPR32RegClass); + addRegisterClass(MVT::i64, &AArch64::GPR64RegClass); + addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); + + // And the vectors + addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass); + addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass); + addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass); + addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass); + addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass); + addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass); + addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass); + addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass); + addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass); + + computeRegisterProperties(); + + // Some atomic operations can be folded into load-acquire or store-release + // instructions on AArch64. It's marginally simpler to let LLVM expand + // everything out to a barrier and then recombine the (few) barriers we can. + setInsertFencesForAtomic(true); + setTargetDAGCombine(ISD::ATOMIC_FENCE); + setTargetDAGCombine(ISD::ATOMIC_STORE); + + // We combine OR nodes for bitfield and NEON BSL operations. + setTargetDAGCombine(ISD::OR); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::SRA); + + // AArch64 does not have i1 loads, or much of anything for i1 really. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + + setStackPointerRegisterToSaveRestore(AArch64::XSP); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + + // We'll lower globals to wrappers for selection. + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + + // A64 instructions have the comparison predicate attached to the user of the + // result, but having a separate comparison is valuable for matching. + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::i64, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::f64, Custom); + + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::i64, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + + setOperationAction(ISD::SETCC, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::i64, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + + // Legal floating-point operations. + setOperationAction(ISD::FABS, MVT::f32, Legal); + setOperationAction(ISD::FABS, MVT::f64, Legal); + + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + + setOperationAction(ISD::FNEG, MVT::f32, Legal); + setOperationAction(ISD::FNEG, MVT::f64, Legal); + + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + + setOperationAction(ISD::FSQRT, MVT::f32, Legal); + setOperationAction(ISD::FSQRT, MVT::f64, Legal); + + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f128, Legal); + + // Illegal floating-point operations. + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + + setOperationAction(ISD::FEXP, MVT::f32, Expand); + setOperationAction(ISD::FEXP, MVT::f64, Expand); + + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + + setOperationAction(ISD::FLOG, MVT::f32, Expand); + setOperationAction(ISD::FLOG, MVT::f64, Expand); + + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + + setOperationAction(ISD::FPOWI, MVT::f32, Expand); + setOperationAction(ISD::FPOWI, MVT::f64, Expand); + + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + + + // Virtually no operation on f128 is legal, but LLVM can't expand them when + // there's a valid register class, so we need custom operations in most cases. + setOperationAction(ISD::FABS, MVT::f128, Expand); + setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + setOperationAction(ISD::FCOS, MVT::f128, Expand); + setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FMA, MVT::f128, Expand); + setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FNEG, MVT::f128, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f128, Expand); + setOperationAction(ISD::FPOW, MVT::f128, Expand); + setOperationAction(ISD::FREM, MVT::f128, Expand); + setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FSQRT, MVT::f128, Expand); + setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + setOperationAction(ISD::SETCC, MVT::f128, Custom); + setOperationAction(ISD::BR_CC, MVT::f128, Custom); + setOperationAction(ISD::SELECT, MVT::f128, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + + // Lowering for many of the conversions is actually specified by the non-f128 + // type. The LowerXXX function will be trivial when f128 isn't involved. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); + + // This prevents LLVM trying to compress double constants into a floating + // constant-pool entry and trying to load from there. It's of doubtful benefit + // for A64: we'd need LDR followed by FCVT, I believe. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + + setExceptionPointerRegister(AArch64::X0); + setExceptionSelectorRegister(AArch64::X1); +} + +EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const { + // It's reasonably important that this value matches the "natural" legal + // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself + // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64). + if (!VT.isVector()) return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc, + unsigned &strOpc) { + switch (Size) { + default: llvm_unreachable("unsupported size for atomic binary op!"); + case 1: + ldrOpc = AArch64::LDXR_byte; + strOpc = AArch64::STXR_byte; + break; + case 2: + ldrOpc = AArch64::LDXR_hword; + strOpc = AArch64::STXR_hword; + break; + case 4: + ldrOpc = AArch64::LDXR_word; + strOpc = AArch64::STXR_word; + break; + case 8: + ldrOpc = AArch64::LDXR_dword; + strOpc = AArch64::STXR_dword; + break; + } +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, + unsigned BinOpcode) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + const TargetRegisterClass *TRC + = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldxr dest, ptr + // <binop> scratch, dest, incr + // stxr stxr_status, scratch, ptr + // cmp stxr_status, #0 + // b.ne loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (BinOpcode) { + // All arithmetic operations we'll be creating are designed to take an extra + // shift or extend operand, which we can conveniently set to zero. + + // Operand order needs to go the other way for NAND. + if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl) + BuildMI(BB, dl, TII->get(BinOpcode), scratch) + .addReg(incr).addReg(dest).addImm(0); + else + BuildMI(BB, dl, TII->get(BinOpcode), scratch) + .addReg(dest).addReg(incr).addImm(0); + } + + // From the stxr, the register is GPR32; from the cmp it's GPR32wsp + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) + .addReg(stxr_status).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(loopMBB); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + unsigned CmpOp, + A64CC::CondCodes Cond) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + unsigned oldval = dest; + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const TargetRegisterClass *TRC, *TRCsp; + if (Size == 8) { + TRC = &AArch64::GPR64RegClass; + TRCsp = &AArch64::GPR64xspRegClass; + } else { + TRC = &AArch64::GPR32RegClass; + TRCsp = &AArch64::GPR32wspRegClass; + } + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + unsigned scratch = MRI.createVirtualRegister(TRC); + MRI.constrainRegClass(scratch, TRCsp); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldxr dest, ptr + // cmp incr, dest (, sign extend if necessary) + // csel scratch, dest, incr, cond + // stxr stxr_status, scratch, ptr + // cmp stxr_status, #0 + // b.ne loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + + // Build compare and cmov instructions. + MRI.constrainRegClass(incr, TRCsp); + BuildMI(BB, dl, TII->get(CmpOp)) + .addReg(incr).addReg(oldval).addImm(0); + + BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc), + scratch) + .addReg(oldval).addReg(incr).addImm(Cond); + + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status) + .addReg(scratch).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) + .addReg(stxr_status).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(loopMBB); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size) const { + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned oldval = MI->getOperand(2).getReg(); + unsigned newval = MI->getOperand(3).getReg(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const TargetRegisterClass *TRCsp; + TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineFunction *MF = BB->getParent(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; // insert the new blocks after the current block + + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB->addSuccessor(loop1MBB); + + // loop1MBB: + // ldxr dest, [ptr] + // cmp dest, oldval + // b.ne exitMBB + BB = loop1MBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + + unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl; + MRI.constrainRegClass(dest, TRCsp); + BuildMI(BB, dl, TII->get(CmpOp)) + .addReg(dest).addReg(oldval).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(exitMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + + // loop2MBB: + // strex stxr_status, newval, [ptr] + // cmp stxr_status, #0 + // b.ne loop1MBB + BB = loop2MBB; + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) + .addReg(stxr_status).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(loop1MBB); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, + MachineBasicBlock *MBB) const { + // We materialise the F128CSEL pseudo-instruction using conditional branches + // and loads, giving an instruciton sequence like: + // str q0, [sp] + // b.ne IfTrue + // b Finish + // IfTrue: + // str q1, [sp] + // Finish: + // ldr q0, [sp] + // + // Using virtual registers would probably not be beneficial since COPY + // instructions are expensive for f128 (there's no actual instruction to + // implement them). + // + // An alternative would be to do an integer-CSEL on some address. E.g.: + // mov x0, sp + // add x1, sp, #16 + // str q0, [x0] + // str q1, [x1] + // csel x0, x0, x1, ne + // ldr q0, [x0] + // + // It's unclear which approach is actually optimal. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineFunction *MF = MBB->getParent(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction::iterator It = MBB; + ++It; + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned IfTrueReg = MI->getOperand(1).getReg(); + unsigned IfFalseReg = MI->getOperand(2).getReg(); + unsigned CondCode = MI->getOperand(3).getImm(); + bool NZCVKilled = MI->getOperand(4).isKill(); + + MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, TrueBB); + MF->insert(It, EndBB); + + // Transfer rest of current basic-block to EndBB + EndBB->splice(EndBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + EndBB->transferSuccessorsAndUpdatePHIs(MBB); + + // We need somewhere to store the f128 value needed. + int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16); + + // [... start of incoming MBB ...] + // str qIFFALSE, [sp] + // b.cc IfTrue + // b Done + BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR)) + .addReg(IfFalseReg) + .addFrameIndex(ScratchFI) + .addImm(0); + BuildMI(MBB, DL, TII->get(AArch64::Bcc)) + .addImm(CondCode) + .addMBB(TrueBB); + BuildMI(MBB, DL, TII->get(AArch64::Bimm)) + .addMBB(EndBB); + MBB->addSuccessor(TrueBB); + MBB->addSuccessor(EndBB); + + // IfTrue: + // str qIFTRUE, [sp] + BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR)) + .addReg(IfTrueReg) + .addFrameIndex(ScratchFI) + .addImm(0); + + // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the + // blocks. + TrueBB->addSuccessor(EndBB); + + // Done: + // ldr qDEST, [sp] + // [... rest of incoming MBB ...] + if (!NZCVKilled) + EndBB->addLiveIn(AArch64::NZCV); + MachineInstr *StartOfEnd = EndBB->begin(); + BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg) + .addFrameIndex(ScratchFI) + .addImm(0); + + MI->eraseFromParent(); + return EndBB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { + switch (MI->getOpcode()) { + default: llvm_unreachable("Unhandled instruction with custom inserter"); + case AArch64::F128CSEL: + return EmitF128CSEL(MI, MBB); + case AArch64::ATOMIC_LOAD_ADD_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl); + + case AArch64::ATOMIC_LOAD_SUB_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl); + + case AArch64::ATOMIC_LOAD_AND_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl); + + case AArch64::ATOMIC_LOAD_OR_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl); + + case AArch64::ATOMIC_LOAD_XOR_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl); + + case AArch64::ATOMIC_LOAD_NAND_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl); + + case AArch64::ATOMIC_LOAD_MIN_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT); + + case AArch64::ATOMIC_LOAD_MAX_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT); + + case AArch64::ATOMIC_LOAD_UMIN_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI); + + case AArch64::ATOMIC_LOAD_UMAX_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO); + + case AArch64::ATOMIC_SWAP_I8: + return emitAtomicBinary(MI, MBB, 1, 0); + case AArch64::ATOMIC_SWAP_I16: + return emitAtomicBinary(MI, MBB, 2, 0); + case AArch64::ATOMIC_SWAP_I32: + return emitAtomicBinary(MI, MBB, 4, 0); + case AArch64::ATOMIC_SWAP_I64: + return emitAtomicBinary(MI, MBB, 8, 0); + + case AArch64::ATOMIC_CMP_SWAP_I8: + return emitAtomicCmpSwap(MI, MBB, 1); + case AArch64::ATOMIC_CMP_SWAP_I16: + return emitAtomicCmpSwap(MI, MBB, 2); + case AArch64::ATOMIC_CMP_SWAP_I32: + return emitAtomicCmpSwap(MI, MBB, 4); + case AArch64::ATOMIC_CMP_SWAP_I64: + return emitAtomicCmpSwap(MI, MBB, 8); + } +} + + +const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC"; + case AArch64ISD::Call: return "AArch64ISD::Call"; + case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV"; + case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad"; + case AArch64ISD::BFI: return "AArch64ISD::BFI"; + case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; + case AArch64ISD::Ret: return "AArch64ISD::Ret"; + case AArch64ISD::SBFX: return "AArch64ISD::SBFX"; + case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC"; + case AArch64ISD::SETCC: return "AArch64ISD::SETCC"; + case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; + case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; + case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL"; + case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; + + default: return NULL; + } +} + +static const uint16_t AArch64FPRArgRegs[] = { + AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, + AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7 +}; +static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs); + +static const uint16_t AArch64ArgRegs[] = { + AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, + AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7 +}; +static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs); + +static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + // Mark all remaining general purpose registers as allocated. We don't + // backtrack: if (for example) an i128 gets put on the stack, no subsequent + // i64 will go in registers (C.11). + for (unsigned i = 0; i < NumArgRegs; ++i) + State.AllocateReg(AArch64ArgRegs[i]); + + return false; +} + +#include "AArch64GenCallingConv.inc" + +CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { + + switch(CC) { + default: llvm_unreachable("Unsupported calling convention"); + case CallingConv::Fast: + case CallingConv::C: + return CC_A64_APCS; + } +} + +void +AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc DL, SDValue &Chain) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>(); + + SmallVector<SDValue, 8> MemOps; + + unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs, + NumArgRegs); + unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs, + NumFPRArgRegs); + + unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR); + int GPRIdx = 0; + if (GPRSaveSize != 0) { + GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); + + SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); + SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 8), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(8, getPointerTy())); + } + } + + unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); + int FPRIdx = 0; + if (FPRSaveSize != 0) { + FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); + + SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], + &AArch64::FPR128RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); + SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 16), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(16, getPointerTy())); + } + } + + int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true); + + FuncInfo->setVariadicStackIdx(StackIdx); + FuncInfo->setVariadicGPRIdx(GPRIdx); + FuncInfo->setVariadicGPRSize(GPRSaveSize); + FuncInfo->setVariadicFPRIdx(FPRIdx); + FuncInfo->setVariadicFPRSize(FPRSaveSize); + + if (!MemOps.empty()) { + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], + MemOps.size()); + } +} + + +SDValue +AArch64TargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); + + SmallVector<SDValue, 16> ArgValues; + + SDValue ArgValue; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + + if (Flags.isByVal()) { + // Byval is used for small structs and HFAs in the PCS, but the system + // should work in a non-compliant manner for larger structs. + EVT PtrTy = getPointerTy(); + int Size = Flags.getByValSize(); + unsigned NumRegs = (Size + 7) / 8; + + unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs, + VA.getLocMemOffset(), + false); + SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); + InVals.push_back(FrameIdxN); + + continue; + } else if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + const TargetRegisterClass *RC = getRegClassFor(RegVT); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + + ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + } else { // VA.isRegLoc() + assert(VA.isMemLoc()); + + int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); + + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); + + + } + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: { + unsigned DestSize = VA.getValVT().getSizeInBits(); + unsigned DestSubReg; + + switch (DestSize) { + case 8: DestSubReg = AArch64::sub_8; break; + case 16: DestSubReg = AArch64::sub_16; break; + case 32: DestSubReg = AArch64::sub_32; break; + case 64: DestSubReg = AArch64::sub_64; break; + default: llvm_unreachable("Unexpected argument promotion"); + } + + ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + VA.getValVT(), ArgValue, + DAG.getTargetConstant(DestSubReg, MVT::i32)), + 0); + break; + } + } + + InVals.push_back(ArgValue); + } + + if (isVarArg) + SaveVarArgRegisters(CCInfo, DAG, dl, Chain); + + unsigned StackArgSize = CCInfo.getNextStackOffset(); + if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { + // This is a non-standard ABI so by fiat I say we're allowed to make full + // use of the stack area to be popped, which must be aligned to 16 bytes in + // any case: + StackArgSize = RoundUpToAlignment(StackArgSize, 16); + + // If we're expected to restore the stack (e.g. fastcc) then we'll be adding + // a multiple of 16. + FuncInfo->setArgumentStackToRestore(StackArgSize); + + // This realignment carries over to the available bytes below. Our own + // callers will guarantee the space is free by giving an aligned value to + // CALLSEQ_START. + } + // Even if we're not expected to free up the space, it's useful to know how + // much is there while considering tail calls (because we can reuse it). + FuncInfo->setBytesInStackArgArea(StackArgSize); + + return Chain; +} + +SDValue +AArch64TargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + // CCValAssign - represent the assignment of the return value to a location. + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slots. + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + // Analyze outgoing return values. + CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv)); + + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + // PCS: "If the type, T, of the result of a function is such that void func(T + // arg) would require that arg be passed as a value in a register (or set of + // registers) according to the rules in 5.4, then the result is returned in + // the same registers as would be used for such an argument. + // + // Otherwise, the caller shall reserve a block of memory of sufficient + // size and alignment to hold the result. The address of the memory block + // shall be passed as an additional argument to the function in x8." + // + // This is implemented in two places. The register-return values are dealt + // with here, more complex returns are passed as an sret parameter, which + // means we don't have to worry about it during actual return. + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Only register-returns should be created by PCS"); + + + SDValue Arg = OutVals[i]; + + // There's no convenient note in the ABI about this as there is for normal + // arguments, but it says return values are passed in the same registers as + // an argument would be. I believe that includes the comments about + // unspecified higher bits, putting the burden of widening on the *caller* + // for return values. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: + // Floating-point values should only be extended when they're going into + // memory, which can't happen here so an integer extend is acceptable. + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); + break; + } + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) { + return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, Chain, Flag); + } else { + return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, Chain); + } +} + +SDValue +AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; + SmallVector<SDValue, 32> &OutVals = CLI.OutVals; + SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet(); + bool IsSibCall = false; + + if (IsTailCall) { + IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), + Outs, OutVals, Ins, DAG); + + // A sibling call is one where we're under the usual C ABI and not planning + // to change that but can still do a tail call: + if (!TailCallOpt && IsTailCall) + IsSibCall = true; + } + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); + + // On AArch64 (and all other architectures I'm aware of) the most this has to + // do is adjust the stack pointer. + unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16); + if (IsSibCall) { + // Since we're not changing the ABI to make this a tail call, the memory + // operands are already available in the caller's incoming argument space. + NumBytes = 0; + } + + // FPDiff is the byte offset of the call's argument area from the callee's. + // Stores to callee stack arguments will be placed in FixedStackSlots offset + // by this amount for a tail call. In a sibling call it must be 0 because the + // caller will deallocate the entire stack and the callee still expects its + // arguments to begin at SP+0. Completely unused for non-tail calls. + int FPDiff = 0; + + if (IsTailCall && !IsSibCall) { + unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); + + // FPDiff will be negative if this tail call requires more space than we + // would automatically have in our incoming argument space. Positive if we + // can actually shrink the stack. + FPDiff = NumReusableBytes - NumBytes; + + // The stack pointer must be 16-byte aligned at all times it's used for a + // memory operation, which in practice means at *all* times and in + // particular across call boundaries. Therefore our own arguments started at + // a 16-byte aligned SP and the delta applied for the tail call should + // satisfy the same constraint. + assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); + } + + if (!IsSibCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, getPointerTy()); + + SmallVector<SDValue, 8> MemOpChains; + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + SDValue Arg = OutVals[i]; + + // Callee does the actual widening, so all extensions just use an implicit + // definition of the rest of the Loc. Aesthetically, this would be nicer as + // an ANY_EXTEND, but that isn't valid for floating-point types and this + // alternative works on integer types too. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: { + unsigned SrcSize = VA.getValVT().getSizeInBits(); + unsigned SrcSubReg; + + switch (SrcSize) { + case 8: SrcSubReg = AArch64::sub_8; break; + case 16: SrcSubReg = AArch64::sub_16; break; + case 32: SrcSubReg = AArch64::sub_32; break; + case 64: SrcSubReg = AArch64::sub_64; break; + default: llvm_unreachable("Unexpected argument promotion"); + } + + Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, + VA.getLocVT(), + DAG.getUNDEF(VA.getLocVT()), + Arg, + DAG.getTargetConstant(SrcSubReg, MVT::i32)), + 0); + + break; + } + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + // A normal register (sub-) argument. For now we just note it down because + // we want to copy things into registers as late as possible to avoid + // register-pressure (and possibly worse). + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; + } + + assert(VA.isMemLoc() && "unexpected argument location"); + + SDValue DstAddr; + MachinePointerInfo DstInfo; + if (IsTailCall) { + uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() : + VA.getLocVT().getSizeInBits(); + OpSize = (OpSize + 7) / 8; + int32_t Offset = VA.getLocMemOffset() + FPDiff; + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); + + DstAddr = DAG.getFrameIndex(FI, getPointerTy()); + DstInfo = MachinePointerInfo::getFixedStack(FI); + + // Make sure any stack arguments overlapping with where we're storing are + // loaded before this eventual operation. Otherwise they'll be clobbered. + Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); + } else { + SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()); + + DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset()); + } + + if (Flags.isByVal()) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64); + SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode, + Flags.getByValAlign(), + /*isVolatile = */ false, + /*alwaysInline = */ false, + DstInfo, MachinePointerInfo(0)); + MemOpChains.push_back(Cpy); + } else { + // Normal stack argument, put it where it's needed. + SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo, + false, false, 0); + MemOpChains.push_back(Store); + } + } + + // The loads and stores generated above shouldn't clash with each + // other. Combining them with this TokenFactor notes that fact for the rest of + // the backend. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Most of the rest of the instructions need to be glued together; we don't + // want assignments to actual registers used by a call to be rearranged by a + // well-meaning scheduler. + SDValue InFlag; + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // The linker is responsible for inserting veneers when necessary to put a + // function call destination in range, so we don't need to bother with a + // wrapper here. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + const char *Sym = S->getSymbol(); + Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + } + + // We don't usually want to end the call-sequence here because we would tidy + // the frame up *after* the call, however in the ABI-changing tail-call case + // we've carefully laid out the parameters so that when sp is reset they'll be + // in the correct location. + if (IsTailCall && !IsSibCall) { + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + } + + // We produce the following DAG scheme for the actual call instruction: + // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag? + // + // Most arguments aren't going to be used and just keep the values live as + // far as LLVM is concerned. It's expected to be selected as simply "bl + // callee" (for a direct, non-tail call). + std::vector<SDValue> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + if (IsTailCall) { + // Each tail call may have to adjust the stack by a different amount, so + // this information must travel along with the operation for eventual + // consumption by emitEpilogue. + Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32)); + } + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + + // Add a register mask operand representing the call-preserved registers. This + // is used later in codegen to constrain register-allocation. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // If we needed glue, put it in as the last argument. + if (InFlag.getNode()) + Ops.push_back(InFlag); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + if (IsTailCall) { + return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + } + + Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Now we can reclaim the stack, just as well do it before working out where + // our return value is. + if (!IsSibCall) { + uint64_t CalleePopBytes + = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0; + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(CalleePopBytes, true), + InFlag); + InFlag = Chain.getValue(1); + } + + return LowerCallResult(Chain, InFlag, CallConv, + IsVarArg, Ins, dl, DAG, InVals); +} + +SDValue +AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv)); + + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + // Return values that are too big to fit into registers should use an sret + // pointer, so this can be a lot simpler than the main argument code. + assert(VA.isRegLoc() && "Memory locations not expected for call return"); + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); + break; + case CCValAssign::ZExt: + case CCValAssign::SExt: + case CCValAssign::AExt: + // Floating-point arguments only get extended/truncated if they're going + // in memory, so using the integer operation is acceptable here. + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + break; + } + + InVals.push_back(Val); + } + + return Chain; +} + +bool +AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool IsVarArg, + bool IsCalleeStructRet, + bool IsCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + + // For CallingConv::C this function knows whether the ABI needs + // changing. That's not true for other conventions so they will have to opt in + // manually. + if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) + return false; + + const MachineFunction &MF = DAG.getMachineFunction(); + const Function *CallerF = MF.getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. Working around this *is* possible (see + // X86) but less efficient and uglier in LowerCall. + for (Function::const_arg_iterator i = CallerF->arg_begin(), + e = CallerF->arg_end(); i != e; ++i) + if (i->hasByValAttr()) + return false; + + if (getTargetMachine().Options.GuaranteedTailCallOpt) { + if (IsTailCallConvention(CalleeCC) && CCMatch) + return true; + return false; + } + + // Now we search for cases where we can use a tail call without changing the + // ABI. Sibcall is used in some places (particularly gcc) to refer to this + // concept. + + // I want anyone implementing a new calling convention to think long and hard + // about this assert. + assert((!IsVarArg || CalleeCC == CallingConv::C) + && "Unexpected variadic calling convention"); + + if (IsVarArg && !Outs.empty()) { + // At least two cases here: if caller is fastcc then we can't have any + // memory arguments (we'd be expected to clean up the stack afterwards). If + // caller is C then we could potentially use its argument area. + + // FIXME: for now we take the most conservative of these in both cases: + // disallow all variadic memory operands. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) + if (!ArgLocs[i].isRegLoc()) + return false; + } + + // If the calling conventions do not match, then we'd better make sure the + // results are returned in the same way as what the caller expects. + if (!CCMatch) { + SmallVector<CCValAssign, 16> RVLocs1; + CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), + getTargetMachine(), RVLocs1, *DAG.getContext()); + CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC)); + + SmallVector<CCValAssign, 16> RVLocs2; + CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), + getTargetMachine(), RVLocs2, *DAG.getContext()); + CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC)); + + if (RVLocs1.size() != RVLocs2.size()) + return false; + for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { + if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) + return false; + if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) + return false; + if (RVLocs1[i].isRegLoc()) { + if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) + return false; + } else { + if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) + return false; + } + } + } + + // Nothing more to check if the callee is taking no arguments + if (Outs.empty()) + return true; + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + + const AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + + // If the stack arguments for this call would fit into our own save area then + // the call can be made tail. + return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea(); +} + +bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, + bool TailCallOpt) const { + return CallCC == CallingConv::Fast && TailCallOpt; +} + +bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const { + return CallCC == CallingConv::Fast; +} + +SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, + SelectionDAG &DAG, + MachineFrameInfo *MFI, + int ClobberedFI) const { + SmallVector<SDValue, 8> ArgChains; + int64_t FirstByte = MFI->getObjectOffset(ClobberedFI); + int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1; + + // Include the original chain at the beginning of the list. When this is + // used by target LowerCall hooks, this helps legalize find the + // CALLSEQ_BEGIN node. + ArgChains.push_back(Chain); + + // Add a chain value for each stack argument corresponding + for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), + UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U) + if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) + if (FI->getIndex() < 0) { + int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex()); + int64_t InLastByte = InFirstByte; + InLastByte += MFI->getObjectSize(FI->getIndex()) - 1; + + if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || + (FirstByte <= InFirstByte && InFirstByte <= LastByte)) + ArgChains.push_back(SDValue(L, 1)); + } + + // Build a tokenfactor for all the chains. + return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + &ArgChains[0], ArgChains.size()); +} + +static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) { + switch (CC) { + case ISD::SETEQ: return A64CC::EQ; + case ISD::SETGT: return A64CC::GT; + case ISD::SETGE: return A64CC::GE; + case ISD::SETLT: return A64CC::LT; + case ISD::SETLE: return A64CC::LE; + case ISD::SETNE: return A64CC::NE; + case ISD::SETUGT: return A64CC::HI; + case ISD::SETUGE: return A64CC::HS; + case ISD::SETULT: return A64CC::LO; + case ISD::SETULE: return A64CC::LS; + default: llvm_unreachable("Unexpected condition code"); + } +} + +bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const { + // icmp is implemented using adds/subs immediate, which take an unsigned + // 12-bit immediate, optionally shifted left by 12 bits. + + // Symmetric by using adds/subs + if (Val < 0) + Val = -Val; + + return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0; +} + +SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDValue &A64cc, + SelectionDAG &DAG, DebugLoc &dl) const { + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { + int64_t C = 0; + EVT VT = RHSC->getValueType(0); + bool knownInvalid = false; + + // I'm not convinced the rest of LLVM handles these edge cases properly, but + // we can at least get it right. + if (isSignedIntSetCC(CC)) { + C = RHSC->getSExtValue(); + } else if (RHSC->getZExtValue() > INT64_MAX) { + // A 64-bit constant not representable by a signed 64-bit integer is far + // too big to fit into a SUBS immediate anyway. + knownInvalid = true; + } else { + C = RHSC->getZExtValue(); + } + + if (!knownInvalid && !isLegalICmpImmediate(C)) { + // Constant does not fit, try adjusting it by one? + switch (CC) { + default: break; + case ISD::SETLT: + case ISD::SETGE: + if (isLegalICmpImmediate(C-1)) { + CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; + RHS = DAG.getConstant(C-1, VT); + } + break; + case ISD::SETULT: + case ISD::SETUGE: + if (isLegalICmpImmediate(C-1)) { + CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; + RHS = DAG.getConstant(C-1, VT); + } + break; + case ISD::SETLE: + case ISD::SETGT: + if (isLegalICmpImmediate(C+1)) { + CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; + RHS = DAG.getConstant(C+1, VT); + } + break; + case ISD::SETULE: + case ISD::SETUGT: + if (isLegalICmpImmediate(C+1)) { + CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + RHS = DAG.getConstant(C+1, VT); + } + break; + } + } + } + + A64CC::CondCodes CondCode = IntCCToA64CC(CC); + A64cc = DAG.getConstant(CondCode, MVT::i32); + return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); +} + +static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC, + A64CC::CondCodes &Alternative) { + A64CC::CondCodes CondCode = A64CC::Invalid; + Alternative = A64CC::Invalid; + + switch (CC) { + default: llvm_unreachable("Unknown FP condition!"); + case ISD::SETEQ: + case ISD::SETOEQ: CondCode = A64CC::EQ; break; + case ISD::SETGT: + case ISD::SETOGT: CondCode = A64CC::GT; break; + case ISD::SETGE: + case ISD::SETOGE: CondCode = A64CC::GE; break; + case ISD::SETOLT: CondCode = A64CC::MI; break; + case ISD::SETOLE: CondCode = A64CC::LS; break; + case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break; + case ISD::SETO: CondCode = A64CC::VC; break; + case ISD::SETUO: CondCode = A64CC::VS; break; + case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break; + case ISD::SETUGT: CondCode = A64CC::HI; break; + case ISD::SETUGE: CondCode = A64CC::PL; break; + case ISD::SETLT: + case ISD::SETULT: CondCode = A64CC::LT; break; + case ISD::SETLE: + case ISD::SETULE: CondCode = A64CC::LE; break; + case ISD::SETNE: + case ISD::SETUNE: CondCode = A64CC::NE; break; + } + return CondCode; +} + +SDValue +AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT PtrVT = getPointerTy(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + + assert(getTargetMachine().getCodeModel() == CodeModel::Small + && "Only small code model supported at the moment"); + + // The most efficient code is PC-relative anyway for the small memory model, + // so we don't need to worry about relocation model. + return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_LO12), + DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); +} + + +// (BRCOND chain, val, dest) +SDValue +AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Chain = Op.getOperand(0); + SDValue TheBit = Op.getOperand(1); + SDValue DestBB = Op.getOperand(2); + + // AArch64 BooleanContents is the default UndefinedBooleanContent, which means + // that as the consumer we are responsible for ignoring rubbish in higher + // bits. + TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, + DAG.getConstant(1, MVT::i32)); + + SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, + DAG.getConstant(0, TheBit.getValueType()), + DAG.getCondCode(ISD::SETNE)); + + return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain, + A64CMP, DAG.getConstant(A64CC::NE, MVT::i32), + DestBB); +} + +// (BR_CC chain, condcode, lhs, rhs, dest) +SDValue +AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue DestBB = Op.getOperand(4); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons are lowered to runtime calls by a routine which sets + // LHS, RHS and CC appropriately for the rest of this function to continue. + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (RHS.getNode() == 0) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + Chain, CmpOp, A64cc, DestBB); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + Chain, SetCC, A64cc, DestBB); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + A64BR_CC, SetCC, A64cc, DestBB); + + } + + return A64BR_CC; +} + +SDValue +AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const { + ArgListTy Args; + ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op.getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy()); + + Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext()); + + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + + // isTailCall may be true since the callee does not reference caller stack + // frame. Check if it's in the right position. + SDValue TCChain = InChain; + bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain); + if (isTailCall) + InChain = TCChain; + + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, false, false, false, false, + 0, getLibcallCallingConv(Call), isTailCall, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, Op->getDebugLoc()); + std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); + + if (!CallInfo.second.getNode()) + // It's a tailcall, return the chain (which is the DAG root). + return DAG.getRoot(); + + return CallInfo.first; +} + +SDValue +AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + if (Op.getOperand(0).getValueType() != MVT::f128) { + // It's legal except when f128 is involved + return Op; + } + + RTLIB::Libcall LC; + LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, Op.getDebugLoc()); +} + +SDValue +AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + +SDValue +AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + bool IsSigned) const { + if (Op.getOperand(0).getValueType() != MVT::f128) { + // It's legal except when f128 is involved + return Op; + } + + RTLIB::Libcall LC; + if (IsSigned) + LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); + else + LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + +SDValue +AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, + SelectionDAG &DAG) const { + // TableGen doesn't have easy access to the CodeModel or RelocationModel, so + // we make that distinction here. + + // We support the static, small memory model for now. + assert(getTargetMachine().getCodeModel() == CodeModel::Small); + + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); + const GlobalValue *GV = GN->getGlobal(); + unsigned Alignment = GV->getAlignment(); + + if (Alignment == 0) { + const PointerType *GVPtrTy = cast<PointerType>(GV->getType()); + if (GVPtrTy->getElementType()->isSized()) + Alignment = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType()); + else { + // Be conservative if we can't guess, not that it really matters: + // functions and labels aren't valid for loads, and the methods used to + // actually calculate an address work with any alignment. + Alignment = 1; + } + } + + unsigned char HiFixup, LoFixup; + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM); + + if (UseGOT) { + HiFixup = AArch64II::MO_GOT; + LoFixup = AArch64II::MO_GOT_LO12; + Alignment = 8; + } else { + HiFixup = AArch64II::MO_NO_FLAG; + LoFixup = AArch64II::MO_LO12; + } + + // AArch64's small model demands the following sequence: + // ADRP x0, somewhere + // ADD x0, x0, #:lo12:somewhere ; (or LDR directly). + SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + HiFixup), + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + LoFixup), + DAG.getConstant(Alignment, MVT::i32)); + + if (UseGOT) { + GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(), + GlobalRef); + } + + if (GN->getOffset() != 0) + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef, + DAG.getConstant(GN->getOffset(), PtrVT)); + + return GlobalRef; +} + +SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, + SDValue DescAddr, + DebugLoc DL, + SelectionDAG &DAG) const { + EVT PtrVT = getPointerTy(); + + // The function we need to call is simply the first entry in the GOT for this + // descriptor, load it in preparation. + SDValue Func, Chain; + Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), + DescAddr); + + // The function takes only one argument: the address of the descriptor itself + // in X0. + SDValue Glue; + Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); + Glue = Chain.getValue(1); + + // Finally, there's a special calling-convention which means that the lookup + // must preserve all registers (except X0, obviously). + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const AArch64RegisterInfo *A64RI + = static_cast<const AArch64RegisterInfo *>(TRI); + const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask(); + + // We're now ready to populate the argument list, as with a normal call: + std::vector<SDValue> Ops; + Ops.push_back(Chain); + Ops.push_back(Func); + Ops.push_back(SymAddr); + Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); + Ops.push_back(DAG.getRegisterMask(Mask)); + Ops.push_back(Glue); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0], Ops.size()); + Glue = Chain.getValue(1); + + // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it + // back to the generic handling code. + return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); +} + +SDValue +AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetELF() && + "TLS not implemented for non-ELF targets"); + const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); + + TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + + SDValue TPOff; + EVT PtrVT = getPointerTy(); + DebugLoc DL = Op.getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + + SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); + + if (Model == TLSModel::InitialExec) { + TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_GOTTPREL), + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_GOTTPREL_LO12), + DAG.getConstant(8, MVT::i32)); + TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), + TPOff); + } else if (Model == TLSModel::LocalExec) { + SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_TPREL_G1); + SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_TPREL_G0_NC); + + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + } else if (Model == TLSModel::GeneralDynamic) { + // Accesses used in this sequence go via the TLS descriptor which lives in + // the GOT. Prepare an address we can use to handle this. + SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_TLSDESC); + SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_TLSDESC_LO12); + SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + HiDesc, LoDesc, DAG.getConstant(8, MVT::i32)); + SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0); + + TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); + } else if (Model == TLSModel::LocalDynamic) { + // Local-dynamic accesses proceed in two phases. A general-dynamic TLS + // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate + // the beginning of the module's TLS region, followed by a DTPREL offset + // calculation. + + // These accesses will need deduplicating if there's more than one. + AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction() + .getInfo<AArch64MachineFunctionInfo>(); + MFI->incNumLocalDynamicTLSAccesses(); + + + // Get the location of _TLS_MODULE_BASE_: + SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, + AArch64II::MO_TLSDESC); + SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, + AArch64II::MO_TLSDESC_LO12); + SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + HiDesc, LoDesc, DAG.getConstant(8, MVT::i32)); + SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT); + + ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); + + // Get the variable's offset from _TLS_MODULE_BASE_ + SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_DTPREL_G1); + SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_DTPREL_G0_NC); + + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + } else + llvm_unreachable("Unsupported TLS access model"); + + + return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); +} + +SDValue +AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + bool IsSigned) const { + if (Op.getValueType() != MVT::f128) { + // Legal for everything except f128. + return Op; + } + + RTLIB::Libcall LC; + if (IsSigned) + LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + else + LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + + +SDValue +AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + DebugLoc dl = JT->getDebugLoc(); + + // When compiling PIC, jump tables get put in the code section so a static + // relocation-style is acceptable for both cases. + return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(), + DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()), + DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), + AArch64II::MO_LO12), + DAG.getConstant(1, MVT::i32)); +} + +// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) +SDValue +AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue IfTrue = Op.getOperand(2); + SDValue IfFalse = Op.getOperand(3); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons are lowered to libcalls, but slot in nicely here + // afterwards. + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (RHS.getNode() == 0) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + CmpOp, IfTrue, IfFalse, A64cc); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + SetCC, IfTrue, IfFalse, A64cc); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + SetCC, IfTrue, A64SELECT_CC, A64cc); + + } + + return A64SELECT_CC; +} + +// (SELECT testbit, iftrue, iffalse) +SDValue +AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue TheBit = Op.getOperand(0); + SDValue IfTrue = Op.getOperand(1); + SDValue IfFalse = Op.getOperand(2); + + // AArch64 BooleanContents is the default UndefinedBooleanContent, which means + // that as the consumer we are responsible for ignoring rubbish in higher + // bits. + TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, + DAG.getConstant(1, MVT::i32)); + SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, + DAG.getConstant(0, TheBit.getValueType()), + DAG.getCondCode(ISD::SETNE)); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + A64CMP, IfTrue, IfFalse, + DAG.getConstant(A64CC::NE, MVT::i32)); +} + +// (SETCC lhs, rhs, condcode) +SDValue +AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + EVT VT = Op.getValueType(); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS + // for the rest of the function (some i32 or i64 values). + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, use it. + if (RHS.getNode() == 0) { + assert(LHS.getValueType() == Op.getValueType() && + "Unexpected setcc expansion!"); + return LHS; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, + CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT), + A64cc); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, + CmpOp, DAG.getConstant(1, VT), + DAG.getConstant(0, VT), A64cc); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, + DAG.getConstant(1, VT), A64SELECT_CC, A64cc); + } + + return A64SELECT_CC; +} + +SDValue +AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { + const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); + + // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes + // rather than just 8. + return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(), + Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(32, MVT::i32), 8, false, false, + MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); +} + +SDValue +AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + // The layout of the va_list struct is specified in the AArch64 Procedure Call + // Standard, section B.3. + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo = MF.getInfo<AArch64MachineFunctionInfo>(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Chain = Op.getOperand(0); + SDValue VAList = Op.getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + SmallVector<SDValue, 4> MemOps; + + // void *__stack at offset 0 + SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(), + getPointerTy()); + MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, + MachinePointerInfo(SV), false, false, 0)); + + // void *__gr_top at offset 8 + int GPRSize = FuncInfo->getVariadicGPRSize(); + if (GPRSize > 0) { + SDValue GRTop, GRTopAddr; + + GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(8, getPointerTy())); + + GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy()); + GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, + DAG.getConstant(GPRSize, getPointerTy())); + + MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, + MachinePointerInfo(SV, 8), + false, false, 0)); + } + + // void *__vr_top at offset 16 + int FPRSize = FuncInfo->getVariadicFPRSize(); + if (FPRSize > 0) { + SDValue VRTop, VRTopAddr; + VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(16, getPointerTy())); + + VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy()); + VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, + DAG.getConstant(FPRSize, getPointerTy())); + + MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, + MachinePointerInfo(SV, 16), + false, false, 0)); + } + + // int __gr_offs at offset 24 + SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(24, getPointerTy())); + MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32), + GROffsAddr, MachinePointerInfo(SV, 24), + false, false, 0)); + + // int __vr_offs at offset 28 + SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(28, getPointerTy())); + MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32), + VROffsAddr, MachinePointerInfo(SV, 28), + false, false, 0)); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], + MemOps.size()); +} + +SDValue +AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: llvm_unreachable("Don't know how to custom lower this!"); + case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128); + case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128); + case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128); + case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128); + case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true); + case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false); + case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true); + case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false); + case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::BR_CC: return LowerBR_CC(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::VACOPY: return LowerVACOPY(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + } + + return SDValue(); +} + +static SDValue PerformANDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // We're looking for an SRA/SHL pair which form an SBFX. + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + if (!isa<ConstantSDNode>(N->getOperand(1))) + return SDValue(); + + uint64_t TruncMask = N->getConstantOperandVal(1); + if (!isMask_64(TruncMask)) + return SDValue(); + + uint64_t Width = CountPopulation_64(TruncMask); + SDValue Shift = N->getOperand(0); + + if (Shift.getOpcode() != ISD::SRL) + return SDValue(); + + if (!isa<ConstantSDNode>(Shift->getOperand(1))) + return SDValue(); + uint64_t LSB = Shift->getConstantOperandVal(1); + + if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) + return SDValue(); + + return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0), + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(LSB + Width - 1, MVT::i64)); +} + +static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, + TargetLowering::DAGCombinerInfo &DCI) { + // An atomic operation followed by an acquiring atomic fence can be reduced to + // an acquiring load. The atomic operation provides a convenient pointer to + // load from. If the original operation was a load anyway we can actually + // combine the two operations into an acquiring load. + SelectionDAG &DAG = DCI.DAG; + SDValue AtomicOp = FenceNode->getOperand(0); + AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp); + + // A fence on its own can't be optimised + if (!AtomicNode) + return SDValue(); + + uint64_t FenceOrder = FenceNode->getConstantOperandVal(1); + uint64_t FenceScope = FenceNode->getConstantOperandVal(2); + + if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope()) + return SDValue(); + + // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so + // the chain we use should be its input, otherwise we'll put our store after + // it so we use its output chain. + SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ? + AtomicNode->getChain() : AtomicOp; + + // We have an acquire fence with a handy atomic operation nearby, we can + // convert the fence into a load-acquire, discarding the result. + DebugLoc DL = FenceNode->getDebugLoc(); + SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(), + AtomicNode->getValueType(0), + Chain, // Chain + AtomicOp.getOperand(1), // Pointer + AtomicNode->getMemOperand(), Acquire, + static_cast<SynchronizationScope>(FenceScope)); + + if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD) + DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode()); + + return Op.getValue(1); +} + +static SDValue PerformATOMIC_STORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // A releasing atomic fence followed by an atomic store can be combined into a + // single store operation. + SelectionDAG &DAG = DCI.DAG; + AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N); + SDValue FenceOp = AtomicNode->getOperand(0); + + if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE) + return SDValue(); + + uint64_t FenceOrder + = cast<ConstantSDNode>(FenceOp.getOperand(1))->getZExtValue(); + uint64_t FenceScope + = cast<ConstantSDNode>(FenceOp.getOperand(2))->getZExtValue(); + + if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope()) + return SDValue(); + + DebugLoc DL = AtomicNode->getDebugLoc(); + return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(), + FenceOp.getOperand(0), // Chain + AtomicNode->getOperand(1), // Pointer + AtomicNode->getOperand(2), // Value + AtomicNode->getMemOperand(), Release, + static_cast<SynchronizationScope>(FenceScope)); +} + +/// For a true bitfield insert, the bits getting into that contiguous mask +/// should come from the low part of an existing value: they must be formed from +/// a compatible SHL operation (unless they're already low). This function +/// checks that condition and returns the least-significant bit that's +/// intended. If the operation not a field preparation, -1 is returned. +static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT, + SDValue &MaskedVal, uint64_t Mask) { + if (!isShiftedMask_64(Mask)) + return -1; + + // Now we need to alter MaskedVal so that it is an appropriate input for a BFI + // instruction. BFI will do a left-shift by LSB before applying the mask we've + // spotted, so in general we should pre-emptively "undo" that by making sure + // the incoming bits have had a right-shift applied to them. + // + // This right shift, however, will combine with existing left/right shifts. In + // the simplest case of a completely straight bitfield operation, it will be + // expected to completely cancel out with an existing SHL. More complicated + // cases (e.g. bitfield to bitfield copy) may still need a real shift before + // the BFI. + + uint64_t LSB = CountTrailingZeros_64(Mask); + int64_t ShiftRightRequired = LSB; + if (MaskedVal.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(MaskedVal.getOperand(1))) { + ShiftRightRequired -= MaskedVal.getConstantOperandVal(1); + MaskedVal = MaskedVal.getOperand(0); + } else if (MaskedVal.getOpcode() == ISD::SRL && + isa<ConstantSDNode>(MaskedVal.getOperand(1))) { + ShiftRightRequired += MaskedVal.getConstantOperandVal(1); + MaskedVal = MaskedVal.getOperand(0); + } + + if (ShiftRightRequired > 0) + MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal, + DAG.getConstant(ShiftRightRequired, MVT::i64)); + else if (ShiftRightRequired < 0) { + // We could actually end up with a residual left shift, for example with + // "struc.bitfield = val << 1". + MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal, + DAG.getConstant(-ShiftRightRequired, MVT::i64)); + } + + return LSB; +} + +/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by +/// a mask and an extension. Returns true if a BFI was found and provides +/// information on its surroundings. +static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask, + bool &Extended) { + Extended = false; + if (N.getOpcode() == ISD::ZERO_EXTEND) { + Extended = true; + N = N.getOperand(0); + } + + if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { + Mask = N->getConstantOperandVal(1); + N = N.getOperand(0); + } else { + // Mask is the whole width. + Mask = (1ULL << N.getValueType().getSizeInBits()) - 1; + } + + if (N.getOpcode() == AArch64ISD::BFI) { + BFI = N; + return true; + } + + return false; +} + +/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which +/// is roughly equivalent to (and (BFI ...), mask). This form is used because it +/// can often be further combined with a larger mask. Ultimately, we want mask +/// to be 2^32-1 or 2^64-1 so the AND can be skipped. +static SDValue tryCombineToBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + assert(N->getOpcode() == ISD::OR && "Unexpected root"); + + // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or + // abandon the effort. + SDValue LHS = N->getOperand(0); + if (LHS.getOpcode() != ISD::AND) + return SDValue(); + + uint64_t LHSMask; + if (isa<ConstantSDNode>(LHS.getOperand(1))) + LHSMask = LHS->getConstantOperandVal(1); + else + return SDValue(); + + // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask + // is or abandon the effort. + SDValue RHS = N->getOperand(1); + if (RHS.getOpcode() != ISD::AND) + return SDValue(); + + uint64_t RHSMask; + if (isa<ConstantSDNode>(RHS.getOperand(1))) + RHSMask = RHS->getConstantOperandVal(1); + else + return SDValue(); + + // Can't do anything if the masks are incompatible. + if (LHSMask & RHSMask) + return SDValue(); + + // Now we need one of the masks to be a contiguous field. Without loss of + // generality that should be the RHS one. + SDValue Bitfield = LHS.getOperand(0); + if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) { + // We know that LHS is a candidate new value, and RHS isn't already a better + // one. + std::swap(LHS, RHS); + std::swap(LHSMask, RHSMask); + } + + // We've done our best to put the right operands in the right places, all we + // can do now is check whether a BFI exists. + Bitfield = RHS.getOperand(0); + int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask); + if (LSB == -1) + return SDValue(); + + uint32_t Width = CountPopulation_64(RHSMask); + assert(Width && "Expected non-zero bitfield width"); + + SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, + LHS.getOperand(0), Bitfield, + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(Width, MVT::i64)); + + // Mask is trivial + if ((LHSMask | RHSMask) == (1ULL << VT.getSizeInBits()) - 1) + return BFI; + + return DAG.getNode(ISD::AND, DL, VT, BFI, + DAG.getConstant(LHSMask | RHSMask, VT)); +} + +/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its +/// original input. This is surprisingly common because SROA splits things up +/// into i8 chunks, so the originally detected MaskedBFI may actually only act +/// on the low (say) byte of a word. This is then orred into the rest of the +/// word afterwards. +/// +/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)). +/// +/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the +/// MaskedBFI. We can also deal with a certain amount of extend/truncate being +/// involved. +static SDValue tryCombineToLargerBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // First job is to hunt for a MaskedBFI on either the left or right. Swap + // operands if it's actually on the right. + SDValue BFI; + SDValue PossExtraMask; + uint64_t ExistingMask = 0; + bool Extended = false; + if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended)) + PossExtraMask = N->getOperand(1); + else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended)) + PossExtraMask = N->getOperand(0); + else + return SDValue(); + + // We can only combine a BFI with another compatible mask. + if (PossExtraMask.getOpcode() != ISD::AND || + !isa<ConstantSDNode>(PossExtraMask.getOperand(1))) + return SDValue(); + + uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1); + + // Masks must be compatible. + if (ExtraMask & ExistingMask) + return SDValue(); + + SDValue OldBFIVal = BFI.getOperand(0); + SDValue NewBFIVal = BFI.getOperand(1); + if (Extended) { + // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be + // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments + // need to be made compatible. + assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32 + && "Invalid types for BFI"); + OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal); + NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal); + } + + // We need the MaskedBFI to be combined with a mask of the *same* value. + if (PossExtraMask.getOperand(0) != OldBFIVal) + return SDValue(); + + BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, + OldBFIVal, NewBFIVal, + BFI.getOperand(2), BFI.getOperand(3)); + + // If the masking is trivial, we don't need to create it. + if ((ExtraMask | ExistingMask) == (1ULL << VT.getSizeInBits()) - 1) + return BFI; + + return DAG.getNode(ISD::AND, DL, VT, BFI, + DAG.getConstant(ExtraMask | ExistingMask, VT)); +} + +/// An EXTR instruction is made up of two shifts, ORed together. This helper +/// searches for and classifies those shifts. +static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, + bool &FromHi) { + if (N.getOpcode() == ISD::SHL) + FromHi = false; + else if (N.getOpcode() == ISD::SRL) + FromHi = true; + else + return false; + + if (!isa<ConstantSDNode>(N.getOperand(1))) + return false; + + ShiftAmount = N->getConstantOperandVal(1); + Src = N->getOperand(0); + return true; +} + +/// EXTR instruciton extracts a contiguous chunk of bits from two existing +/// registers viewed as a high/low pair. This function looks for the pattern: +/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an +/// EXTR. Can't quite be done in TableGen because the two immediates aren't +/// independent. +static SDValue tryCombineToEXTR(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + assert(N->getOpcode() == ISD::OR && "Unexpected root"); + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + SDValue LHS; + uint32_t ShiftLHS = 0; + bool LHSFromHi = 0; + if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) + return SDValue(); + + SDValue RHS; + uint32_t ShiftRHS = 0; + bool RHSFromHi = 0; + if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) + return SDValue(); + + // If they're both trying to come from the high part of the register, they're + // not really an EXTR. + if (LHSFromHi == RHSFromHi) + return SDValue(); + + if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) + return SDValue(); + + if (LHSFromHi) { + std::swap(LHS, RHS); + std::swap(ShiftLHS, ShiftRHS); + } + + return DAG.getNode(AArch64ISD::EXTR, DL, VT, + LHS, RHS, + DAG.getConstant(ShiftRHS, MVT::i64)); +} + +/// Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + // Attempt to recognise bitfield-insert operations. + SDValue Res = tryCombineToBFI(N, DCI, Subtarget); + if (Res.getNode()) + return Res; + + // Attempt to combine an existing MaskedBFI operation into one with a larger + // mask. + Res = tryCombineToLargerBFI(N, DCI, Subtarget); + if (Res.getNode()) + return Res; + + Res = tryCombineToEXTR(N, DCI); + if (Res.getNode()) + return Res; + + return SDValue(); +} + +/// Target-specific dag combine xforms for ISD::SRA +static SDValue PerformSRACombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // We're looking for an SRA/SHL pair which form an SBFX. + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + if (!isa<ConstantSDNode>(N->getOperand(1))) + return SDValue(); + + uint64_t ExtraSignBits = N->getConstantOperandVal(1); + SDValue Shift = N->getOperand(0); + + if (Shift.getOpcode() != ISD::SHL) + return SDValue(); + + if (!isa<ConstantSDNode>(Shift->getOperand(1))) + return SDValue(); + + uint64_t BitsOnLeft = Shift->getConstantOperandVal(1); + uint64_t Width = VT.getSizeInBits() - ExtraSignBits; + uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft; + + if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) + return SDValue(); + + return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0), + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(LSB + Width - 1, MVT::i64)); +} + + +SDValue +AArch64TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + default: break; + case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI); + case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI); + case ISD::OR: return PerformORCombine(N, DCI, Subtarget); + case ISD::SRA: return PerformSRACombine(N, DCI); + } + return SDValue(); +} + +AArch64TargetLowering::ConstraintType +AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'w': // An FP/SIMD vector register + return C_RegisterClass; + case 'I': // Constant that can be used with an ADD instruction + case 'J': // Constant that can be used with a SUB instruction + case 'K': // Constant that can be used with a 32-bit logical instruction + case 'L': // Constant that can be used with a 64-bit logical instruction + case 'M': // Constant that can be used as a 32-bit MOV immediate + case 'N': // Constant that can be used as a 64-bit MOV immediate + case 'Y': // Floating point constant zero + case 'Z': // Integer constant zero + return C_Other; + case 'Q': // A memory reference with base register and no offset + return C_Memory; + case 'S': // A symbolic address + return C_Other; + } + } + + // FIXME: Ump, Utf, Usa, Ush + // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, whatever they may be + // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be + // Usa: An absolute symbolic address + // Ush: The high part (bits 32:12) of a pc-relative symbolic address + assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa" + && Constraint != "Ush" && "Unimplemented constraints"); + + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight +AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const { + + llvm_unreachable("Constraint weight unimplemented"); +} + +void +AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + SDValue Result(0, 0); + + // Only length 1 constraints are C_Other. + if (Constraint.size() != 1) return; + + // Only C_Other constraints get lowered like this. That means constants for us + // so return early if there's no hope the constraint can be lowered. + + switch(Constraint[0]) { + default: break; + case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'Z': { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + if (!C) + return; + + uint64_t CVal = C->getZExtValue(); + uint32_t Bits; + + switch (Constraint[0]) { + default: + // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J' + // is a peculiarly useless SUB constraint. + llvm_unreachable("Unimplemented C_Other constraint"); + case 'I': + if (CVal <= 0xfff) + break; + return; + case 'K': + if (A64Imms::isLogicalImm(32, CVal, Bits)) + break; + return; + case 'L': + if (A64Imms::isLogicalImm(64, CVal, Bits)) + break; + return; + case 'Z': + if (CVal == 0) + break; + return; + } + + Result = DAG.getTargetConstant(CVal, Op.getValueType()); + break; + } + case 'S': { + // An absolute symbolic address or label reference. + if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { + Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), + GA->getValueType(0)); + } else if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { + Result = DAG.getTargetBlockAddress(BA->getBlockAddress(), + BA->getValueType(0)); + } else if (const ExternalSymbolSDNode *ES + = dyn_cast<ExternalSymbolSDNode>(Op)) { + Result = DAG.getTargetExternalSymbol(ES->getSymbol(), + ES->getValueType(0)); + } else + return; + break; + } + case 'Y': + if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) { + if (CFP->isExactlyValue(0.0)) { + Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0)); + break; + } + } + return; + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + + // It's an unknown constraint for us. Let generic code have a go. + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +std::pair<unsigned, const TargetRegisterClass*> +AArch64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + if (VT.getSizeInBits() <= 32) + return std::make_pair(0U, &AArch64::GPR32RegClass); + else if (VT == MVT::i64) + return std::make_pair(0U, &AArch64::GPR64RegClass); + break; + case 'w': + if (VT == MVT::f16) + return std::make_pair(0U, &AArch64::FPR16RegClass); + else if (VT == MVT::f32) + return std::make_pair(0U, &AArch64::FPR32RegClass); + else if (VT == MVT::f64) + return std::make_pair(0U, &AArch64::FPR64RegClass); + else if (VT.getSizeInBits() == 64) + return std::make_pair(0U, &AArch64::VPR64RegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &AArch64::FPR128RegClass); + else if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &AArch64::VPR128RegClass); + break; + } + } + + // Use the default implementation in TargetLowering to convert the register + // constraint into a member of a register class. + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h new file mode 100644 index 0000000..66985c1 --- /dev/null +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -0,0 +1,247 @@ +//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that AArch64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H +#define LLVM_TARGET_AARCH64_ISELLOWERING_H + +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + + +namespace llvm { +namespace AArch64ISD { + enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // This is a conditional branch which also notes the flag needed + // (eq/sgt/...). A64 puts this information on the branches rather than + // compares as LLVM does. + BR_CC, + + // A node to be selected to an actual call operation: either BL or BLR in + // the absence of tail calls. + Call, + + // Indicates a floating-point immediate which fits into the format required + // by the FMOV instructions. First (and only) operand is the 8-bit encoded + // value of that immediate. + FPMOV, + + // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS + // and an LSB. + EXTR, + + // Wraps a load from the GOT, which should always be performed with a 64-bit + // load instruction. This prevents the DAG combiner folding a truncate to + // form a smaller memory access. + GOTLoad, + + // Performs a bitfield insert. Arguments are: the value being inserted into; + // the value being inserted; least significant bit changed; width of the + // field. + BFI, + + // Simply a convenient node inserted during ISelLowering to represent + // procedure return. Will almost certainly be selected to "RET". + Ret, + + /// Extracts a field of contiguous bits from the source and sign extends + /// them into a single register. Arguments are: source; immr; imms. Note + /// these are pre-encoded since DAG matching can't cope with combining LSB + /// and Width into these values itself. + SBFX, + + /// This is an A64-ification of the standard LLVM SELECT_CC operation. The + /// main difference is that it only has the values and an A64 condition, + /// which will be produced by a setcc instruction. + SELECT_CC, + + /// This serves most of the functions of the LLVM SETCC instruction, for two + /// purposes. First, it prevents optimisations from fiddling with the + /// compare after we've moved the CondCode information onto the SELECT_CC or + /// BR_CC instructions. Second, it gives a legal instruction for the actual + /// comparison. + /// + /// It keeps a record of the condition flags asked for because certain + /// instructions are only valid for a subset of condition codes. + SETCC, + + // Designates a node which is a tail call: both a call and a return + // instruction as far as selction is concerned. It should be selected to an + // unconditional branch. Has the usual plethora of call operands, but: 1st + // is callee, 2nd is stack adjustment required immediately before branch. + TC_RETURN, + + // Designates a call used to support the TLS descriptor ABI. The call itself + // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall + // var") must be attached somehow during code generation. It takes two + // operands: the callee and the symbol to be relocated against. + TLSDESCCALL, + + // Leaf node which will be lowered to an appropriate MRS to obtain the + // thread pointer: TPIDR_EL0. + THREAD_POINTER, + + /// Extracts a field of contiguous bits from the source and zero extends + /// them into a single register. Arguments are: source; immr; imms. Note + /// these are pre-encoded since DAG matching can't cope with combining LSB + /// and Width into these values itself. + UBFX, + + // Wraps an address which the ISelLowering phase has decided should be + // created using the small absolute memory model: i.e. adrp/add or + // adrp/mem-op. This exists to prevent bare TargetAddresses which may never + // get selected. + WrapperSmall + }; +} + + +class AArch64Subtarget; +class AArch64TargetMachine; + +class AArch64TargetLowering : public TargetLowering { +public: + explicit AArch64TargetLowering(AArch64TargetMachine &TM); + + const char *getTargetNodeName(unsigned Opcode) const; + + CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const; + + SDValue LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const; + + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc DL, SDValue &Chain) const; + + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool IsVarArg, + bool IsCalleeStructRet, + bool IsCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; + + /// Finds the incoming stack arguments which overlap the given fixed stack + /// object and incorporates their load into the current chain. This prevents an + /// upcoming store from clobbering the stack argument before it's used. + SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, + MachineFrameInfo *MFI, int ClobberedFI) const; + + EVT getSetCCResultType(EVT VT) const; + + bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; + + bool IsTailCallConvention(CallingConv::ID CallCC) const; + + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + bool isLegalICmpImmediate(int64_t Val) const; + SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + MachineBasicBlock * + emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Size, unsigned Opcode) const; + + MachineBasicBlock * + emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, unsigned CmpOp, + A64CC::CondCodes Cond) const; + MachineBasicBlock * + emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size) const; + + MachineBasicBlock * + EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const; + + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL, + SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } + + ConstraintType getConstraintType(const std::string &Constraint) const; + + ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const; + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const; + + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; +private: + const AArch64Subtarget *Subtarget; + const TargetRegisterInfo *RegInfo; + const InstrItineraryData *Itins; +}; +} // namespace llvm + +#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td new file mode 100644 index 0000000..ce66396 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -0,0 +1,1011 @@ +//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// A64 Instruction Format Definitions. +// + +// A64 is currently the only instruction set supported by the AArch64 +// architecture. +class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : Instruction +{ + // All A64 instructions are 32-bit. This field will be filled in + // graually going down the hierarchy. + field bits<32> Inst; + + field bits<32> Unpredictable = 0; + // SoftFail is the generic name for this field, but we alias it so + // as to make it more obvious what it means in ARM-land. + field bits<32> SoftFail = Unpredictable; + + // LLVM-level model of the AArch64/A64 distinction. + let Namespace = "AArch64"; + let DecoderNamespace = "A64"; + let Size = 4; + + // Set the templated fields + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asmstr; + let Pattern = patterns; + let Itinerary = itin; +} + +class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction +{ + let Namespace = "AArch64"; + + let OutOperandList = outs; + let InOperandList= ins; + let Pattern = patterns; + let isCodeGenOnly = 1; + let isPseudo = 1; +} + +// Represents a pseudo-instruction that represents a single A64 instruction for +// whatever reason, the eventual result will be a 32-bit real instruction. +class A64PseudoInst<dag outs, dag ins, list<dag> patterns> + : PseudoInst<outs, ins, patterns> +{ + let Size = 4; +} + +// As above, this will be a single A64 instruction, but we can actually give the +// expansion in TableGen. +class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result> + : A64PseudoInst<outs, ins, patterns>, + PseudoInstExpansion<Result>; + + +// First, some common cross-hierarchy register formats. + +class A64InstRd<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> +{ + bits<5> Rd; + + let Inst{4-0} = Rd; +} + +class A64InstRt<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> +{ + bits<5> Rt; + + let Inst{4-0} = Rt; +} + + +class A64InstRdn<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> +{ + // Inherit rdt + bits<5> Rn; + + let Inst{9-5} = Rn; +} + +class A64InstRtn<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> +{ + // Inherit rdt + bits<5> Rn; + + let Inst{9-5} = Rn; +} + +// Instructions taking Rt,Rt2,Rn +class A64InstRtt2n<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> +{ + bits<5> Rt2; + + let Inst{14-10} = Rt2; +} + +class A64InstRdnm<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ + bits<5> Rm; + + let Inst{20-16} = Rm; +} + +//===----------------------------------------------------------------------===// +// +// Actual A64 Instruction Formats +// + +// Format for Add-subtract (extended register) instructions. +class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + bits<3> Imm3; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b01011; + let Inst{23-22} = opt; + let Inst{21} = 0b1; + // Rm inherited in 20-16 + let Inst{15-13} = option; + let Inst{12-10} = Imm3; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Add-subtract (immediate) instructions. +class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ + bits<12> Imm12; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b10001; + let Inst{23-22} = shift; + let Inst{21-10} = Imm12; +} + +// Format for Add-subtract (shifted register) instructions. +class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + bits<6> Imm6; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b01011; + let Inst{23-22} = shift; + let Inst{21} = 0b0; + // Rm inherited in 20-16 + let Inst{15-10} = Imm6; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Add-subtract (with carry) instructions. +class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-21} = 0b11010000; + // Rm inherited in 20-16 + let Inst{15-10} = opcode2; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + + +// Format for Bitfield instructions +class A64I_bitfield<bit sf, bits<2> opc, bit n, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ + bits<6> ImmR; + bits<6> ImmS; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100110; + let Inst{22} = n; + let Inst{21-16} = ImmR; + let Inst{15-10} = ImmS; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for compare and branch (immediate) instructions. +class A64I_cmpbr<bit sf, bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> +{ + bits<19> Label; + + let Inst{31} = sf; + let Inst{30-25} = 0b011010; + let Inst{24} = op; + let Inst{23-5} = Label; + // Inherit Rt in 4-0 +} + +// Format for conditional branch (immediate) instructions. +class A64I_condbr<bit o1, bit o0, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> +{ + bits<19> Label; + bits<4> Cond; + + let Inst{31-25} = 0b0101010; + let Inst{24} = o1; + let Inst{23-5} = Label; + let Inst{4} = o0; + let Inst{3-0} = Cond; +} + +// Format for conditional compare (immediate) instructions. +class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> +{ + bits<5> Rn; + bits<5> UImm5; + bits<4> NZCVImm; + bits<4> Cond; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010010; + let Inst{20-16} = UImm5; + let Inst{15-12} = Cond; + let Inst{11} = 0b1; + let Inst{10} = o2; + let Inst{9-5} = Rn; + let Inst{4} = o3; + let Inst{3-0} = NZCVImm; +} + +// Format for conditional compare (register) instructions. +class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> +{ + bits<5> Rn; + bits<5> Rm; + bits<4> NZCVImm; + bits<4> Cond; + + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010010; + let Inst{20-16} = Rm; + let Inst{15-12} = Cond; + let Inst{11} = 0b0; + let Inst{10} = o2; + let Inst{9-5} = Rn; + let Inst{4} = o3; + let Inst{3-0} = NZCVImm; +} + +// Format for conditional select instructions. +class A64I_condsel<bit sf, bit op, bit s, bits<2> op2, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + bits<4> Cond; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010100; + // Inherit Rm in 20-16 + let Inst{15-12} = Cond; + let Inst{11-10} = op2; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for data processing (1 source) instructions +class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode, + string asmstr, dag outs, dag ins, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ + let Inst{31} = sf; + let Inst{30} = 0b1; + let Inst{29} = S; + let Inst{28-21} = 0b11010110; + let Inst{20-16} = opcode2; + let Inst{15-10} = opcode; +} + +// Format for data processing (2 source) instructions +class A64I_dp_2src<bit sf, bits<6> opcode, bit S, + string asmstr, dag outs, dag ins, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = S; + let Inst{28-21} = 0b11010110; + let Inst{15-10} = opcode; +} + +// Format for data-processing (3 source) instructions + +class A64I_dp3<bit sf, bits<6> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + bits<5> Ra; + + let Inst{31} = sf; + let Inst{30-29} = opcode{5-4}; + let Inst{28-24} = 0b11011; + let Inst{23-21} = opcode{3-1}; + // Inherits Rm in 20-16 + let Inst{15} = opcode{0}; + let Inst{14-10} = Ra; + // Inherits Rn in 9-5 + // Inherits Rd in 4-0 +} + +// Format for exception generation instructions +class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> +{ + bits<16> UImm16; + + let Inst{31-24} = 0b11010100; + let Inst{23-21} = opc; + let Inst{20-5} = UImm16; + let Inst{4-2} = op2; + let Inst{1-0} = ll; +} + +// Format for extract (immediate) instructions +class A64I_extract<bit sf, bits<3> op, bit n, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + bits<6> LSB; + + let Inst{31} = sf; + let Inst{30-29} = op{2-1}; + let Inst{28-23} = 0b100111; + let Inst{22} = n; + let Inst{21} = op{0}; + // Inherits Rm in bits 20-16 + let Inst{15-10} = LSB; + // Inherits Rn in 9-5 + // Inherits Rd in 4-0 +} + +// Format for floating-point compare instructions. +class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> +{ + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-14} = op; + let Inst{13-10} = 0b1000; + let Inst{9-5} = Rn; + let Inst{4-0} = opcode2; +} + +// Format for floating-point conditional compare instructions. +class A64I_fpccmp<bit m, bit s, bits<2> type, bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ + bits<5> Rn; + bits<5> Rm; + bits<4> NZCVImm; + bits<4> Cond; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-12} = Cond; + let Inst{11-10} = 0b01; + let Inst{9-5} = Rn; + let Inst{4} = op; + let Inst{3-0} = NZCVImm; +} + +// Format for floating-point conditional select instructions. +class A64I_fpcondsel<bit m, bit s, bits<2> type, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + bits<4> Cond; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = Cond; + let Inst{11-10} = 0b11; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + + +// Format for floating-point data-processing (1 source) instructions. +class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-15} = opcode; + let Inst{14-10} = 0b10000; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point data-processing (2 sources) instructions. +class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = opcode; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point data-processing (3 sources) instructions. +class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + bits<5> Ra; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11111; + let Inst{23-22} = type; + let Inst{21} = o1; + // Inherit Rm in 20-16 + let Inst{15} = o0; + let Inst{14-10} = Ra; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point <-> fixed-point conversion instructions. +class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ + bits<6> Scale; + + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b0; + let Inst{20-19} = mode; + let Inst{18-16} = opcode; + let Inst{15-10} = Scale; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point <-> integer conversion instructions. +class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + + +// Format for floating-point immediate instructions. +class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> +{ + bits<8> Imm8; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-13} = Imm8; + let Inst{12-10} = 0b100; + let Inst{9-5} = imm5; + // Inherit Rd in 4-0 +} + +// Format for load-register (literal) instructions. +class A64I_LDRlit<bits<2> opc, bit v, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> +{ + bits<19> Imm19; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b011; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-5} = Imm19; + // Inherit Rt in 4-0 +} + +// Format for load-store exclusive instructions. +class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> +{ + let Inst{31-30} = size; + let Inst{29-24} = 0b001000; + let Inst{23} = o2; + let Inst{22} = L; + let Inst{21} = o1; + let Inst{15} = o0; +} + +class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin>: + A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ + bits<5> Rt2; + let Inst{14-10} = Rt2; +} + +class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin>: + A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ + bits<5> Rs; + let Inst{20-16} = Rs; +} + +class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin>: + A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ + bits<5> Rt2; + let Inst{14-10} = Rt2; +} + +// Format for load-store register (immediate post-indexed) instructions +class A64I_LSpostind<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> +{ + bits<9> SImm9; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b01; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store register (immediate pre-indexed) instructions +class A64I_LSpreind<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> +{ + bits<9> SImm9; + + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b11; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store register (unprivileged) instructions +class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> +{ + bits<9> SImm9; + + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store (unscaled immediate) instructions. +class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> +{ + bits<9> SImm9; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b00; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + + +// Format for load-store (unsigned immediate) instructions. +class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> +{ + bits<12> UImm12; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b01; + let Inst{23-22} = opc; + let Inst{21-10} = UImm12; +} + +// Format for load-store register (register offset) instructions. +class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> +{ + bits<5> Rm; + + // Complex operand selection needed for these instructions, so they + // need an "addr" field for encoding/decoding to be generated. + bits<3> Ext; + // OptionHi = Ext{2-1} + // S = Ext{0} + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-14} = Ext{2-1}; + let Inst{13} = optionlo; + let Inst{12} = Ext{0}; + let Inst{11-10} = 0b10; + // Inherits Rn in 9-5 + // Inherits Rt in 4-0 + + let AddedComplexity = 50; +} + +// Format for Load-store register pair (offset) instructions +class A64I_LSPoffset<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> +{ + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b010; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store register pair (post-indexed) instructions +class A64I_LSPpostind<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> +{ + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b001; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store register pair (pre-indexed) instructions +class A64I_LSPpreind<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> +{ + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b011; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store non-temporal register pair (offset) instructions +class A64I_LSPnontemp<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> +{ + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b000; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Logical (immediate) instructions +class A64I_logicalimm<bit sf, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ + bit N; + bits<6> ImmR; + bits<6> ImmS; + + // N, ImmR and ImmS have no separate existence in any assembly syntax (or for + // selection), so we'll combine them into a single field here. + bits<13> Imm; + // N = Imm{12}; + // ImmR = Imm{11-6}; + // ImmS = Imm{5-0}; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100100; + let Inst{22} = Imm{12}; + let Inst{21-16} = Imm{11-6}; + let Inst{15-10} = Imm{5-0}; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Logical (shifted register) instructions +class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + bits<6> Imm6; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-24} = 0b01010; + let Inst{23-22} = shift; + let Inst{21} = N; + // Rm inherited + let Inst{15-10} = Imm6; + // Rn inherited + // Rd inherited +} + +// Format for Move wide (immediate) +class A64I_movw<bit sf, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> +{ + bits<16> UImm16; + bits<2> Shift; // Called "hw" officially + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100101; + let Inst{22-21} = Shift; + let Inst{20-5} = UImm16; + // Inherits Rd in 4-0 +} + +// Format for PC-relative addressing instructions, ADR and ADRP. +class A64I_PCADR<bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> +{ + bits<21> Label; + + let Inst{31} = op; + let Inst{30-29} = Label{1-0}; + let Inst{28-24} = 0b10000; + let Inst{23-5} = Label{20-2}; +} + +// Format for system instructions +class A64I_system<bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> +{ + bits<2> Op0; + bits<3> Op1; + bits<4> CRn; + bits<4> CRm; + bits<3> Op2; + bits<5> Rt; + + let Inst{31-22} = 0b1101010100; + let Inst{21} = l; + let Inst{20-19} = Op0; + let Inst{18-16} = Op1; + let Inst{15-12} = CRn; + let Inst{11-8} = CRm; + let Inst{7-5} = Op2; + let Inst{4-0} = Rt; + + // These instructions can do horrible things. + let hasSideEffects = 1; +} + +// Format for unconditional branch (immediate) instructions +class A64I_Bimm<bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> +{ + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<26> Label; + + let Inst{31} = op; + let Inst{30-26} = 0b00101; + let Inst{25-0} = Label; +} + +// Format for Test & branch (immediate) instructions +class A64I_TBimm<bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> +{ + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<6> Imm; + bits<14> Label; + + let Inst{31} = Imm{5}; + let Inst{30-25} = 0b011011; + let Inst{24} = op; + let Inst{23-19} = Imm{4-0}; + let Inst{18-5} = Label; + // Inherit Rt in 4-0 +} + +// Format for Unconditional branch (register) instructions, including +// RET. Shares no fields with instructions further up the hierarchy +// so top-level. +class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> +{ + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<5> Rn; + + let Inst{31-25} = 0b1101011; + let Inst{24-21} = opc; + let Inst{20-16} = op2; + let Inst{15-10} = op3; + let Inst{9-5} = Rn; + let Inst{4-0} = op4; +} + diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp new file mode 100644 index 0000000..967960c --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -0,0 +1,805 @@ +//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#include <algorithm> + +#define GET_INSTRINFO_CTOR +#include "AArch64GenInstrInfo.inc" + +using namespace llvm; + +AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) + : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), + RI(*this, STI), Subtarget(STI) {} + +void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc = 0; + unsigned ZeroReg = 0; + if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) { + // E.g. ADD xDst, xsp, #0 (, lsl #0) + BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg) + .addReg(SrcReg) + .addImm(0); + return; + } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { + // E.g. ADD wDST, wsp, #0 (, lsl #0) + BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg) + .addReg(SrcReg) + .addImm(0); + return; + } else if (DestReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + // E.g. MSR NZCV, xDST + BuildMI(MBB, I, DL, get(AArch64::MSRix)) + .addImm(A64SysReg::NZCV) + .addReg(SrcReg); + } else if (SrcReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(DestReg)); + // E.g. MRS xDST, NZCV + BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) + .addImm(A64SysReg::NZCV); + } else if (AArch64::GPR64RegClass.contains(DestReg)) { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + Opc = AArch64::ORRxxx_lsl; + ZeroReg = AArch64::XZR; + } else if (AArch64::GPR32RegClass.contains(DestReg)) { + assert(AArch64::GPR32RegClass.contains(SrcReg)); + Opc = AArch64::ORRwww_lsl; + ZeroReg = AArch64::WZR; + } else if (AArch64::FPR32RegClass.contains(DestReg)) { + assert(AArch64::FPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) + .addReg(SrcReg); + return; + } else if (AArch64::FPR64RegClass.contains(DestReg)) { + assert(AArch64::FPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) + .addReg(SrcReg); + return; + } else if (AArch64::FPR128RegClass.contains(DestReg)) { + assert(AArch64::FPR128RegClass.contains(SrcReg)); + + // FIXME: there's no good way to do this, at least without NEON: + // + There's no single move instruction for q-registers + // + We can't create a spill slot and use normal STR/LDR because stack + // allocation has already happened + // + We can't go via X-registers with FMOV because register allocation has + // already happened. + // This may not be efficient, but at least it works. + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) + .addReg(SrcReg) + .addReg(AArch64::XSP) + .addImm(0x1ff & -16); + + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) + .addReg(AArch64::XSP, RegState::Define) + .addReg(AArch64::XSP) + .addImm(16); + return; + } else { + llvm_unreachable("Unknown register class in copyPhysReg"); + } + + // E.g. ORR xDst, xzr, xSrc, lsl #0 + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(ZeroReg) + .addReg(SrcReg) + .addImm(0); +} + +MachineInstr * +AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0) + .addImm(Offset) + .addMetadata(MDPtr); + return &*MIB; +} + +/// Does the Opcode represent a conditional branch that we can remove and re-add +/// at the end of a basic block? +static bool isCondBranch(unsigned Opc) { + return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx || + Opc == AArch64::CBNZw || Opc == AArch64::CBNZx || + Opc == AArch64::TBZwii || Opc == AArch64::TBZxii || + Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii; +} + +/// Takes apart a given conditional branch MachineInstr (see isCondBranch), +/// setting TBB to the destination basic block and populating the Cond vector +/// with data necessary to recreate the conditional branch at a later +/// date. First element will be the opcode, and subsequent ones define the +/// conditions being branched on in an instruction-specific manner. +static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB, + SmallVectorImpl<MachineOperand> &Cond) { + switch(I->getOpcode()) { + case AArch64::Bcc: + case AArch64::CBZw: + case AArch64::CBZx: + case AArch64::CBNZw: + case AArch64::CBNZx: + // These instructions just have one predicate operand in position 0 (either + // a condition code or a register being compared). + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + TBB = I->getOperand(1).getMBB(); + return; + case AArch64::TBZwii: + case AArch64::TBZxii: + case AArch64::TBNZwii: + case AArch64::TBNZxii: + // These have two predicate operands: a register and a bit position. + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + Cond.push_back(I->getOperand(1)); + TBB = I->getOperand(2).getMBB(); + return; + default: + llvm_unreachable("Unknown conditional branch to classify"); + } +} + + +bool +AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastOpc == AArch64::Bimm) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (isCondBranch(LastOpc)) { + classifyCondBranch(LastInst, TBB, Cond); + return false; + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If AllowModify is true and the block ends with two or more unconditional + // branches, delete all but the first unconditional branch. + if (AllowModify && LastOpc == AArch64::Bimm) { + while (SecondLastOpc == AArch64::Bimm) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + // Return now the only terminator is an unconditional branch. + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + SecondLastInst = I; + SecondLastOpc = SecondLastInst->getOpcode(); + } + } + } + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with a B and a Bcc, handle it. + if (LastOpc == AArch64::Bimm) { + if (SecondLastOpc == AArch64::Bcc) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (isCondBranch(SecondLastOpc)) { + classifyCondBranch(SecondLastInst, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed, so remove it. + if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +bool AArch64InstrInfo::ReverseBranchCondition( + SmallVectorImpl<MachineOperand> &Cond) const { + switch (Cond[0].getImm()) { + case AArch64::Bcc: { + A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm()); + CC = A64InvertCondCode(CC); + Cond[1].setImm(CC); + return false; + } + case AArch64::CBZw: + Cond[0].setImm(AArch64::CBNZw); + return false; + case AArch64::CBZx: + Cond[0].setImm(AArch64::CBNZx); + return false; + case AArch64::CBNZw: + Cond[0].setImm(AArch64::CBZw); + return false; + case AArch64::CBNZx: + Cond[0].setImm(AArch64::CBZx); + return false; + case AArch64::TBZwii: + Cond[0].setImm(AArch64::TBNZwii); + return false; + case AArch64::TBZxii: + Cond[0].setImm(AArch64::TBNZxii); + return false; + case AArch64::TBNZwii: + Cond[0].setImm(AArch64::TBZwii); + return false; + case AArch64::TBNZxii: + Cond[0].setImm(AArch64::TBZxii); + return false; + default: + llvm_unreachable("Unknown branch type"); + } +} + + +unsigned +AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + if (FBB == 0 && Cond.empty()) { + BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB); + return 1; + } else if (FBB == 0) { + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + for (int i = 1, e = Cond.size(); i != e; ++i) + MIB.addOperand(Cond[i]); + MIB.addMBB(TBB); + return 1; + } + + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + for (int i = 1, e = Cond.size(); i != e; ++i) + MIB.addOperand(Cond[i]); + MIB.addMBB(TBB); + + BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB); + return 2; +} + +unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } + if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode())) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (!isCondBranch(I->getOpcode())) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +bool +AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AArch64::TLSDESC_BLRx: { + MachineInstr *NewMI = + BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL)) + .addOperand(MI.getOperand(1)); + MI.setDesc(get(AArch64::BLRx)); + + llvm::finalizeBundle(MBB, NewMI, *++MBBI); + return true; + } + default: + return false; + } + + return false; +} + +void +AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, + int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FrameIdx); + + MachineMemOperand *MMO + = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, + MFI.getObjectSize(FrameIdx), + Align); + + unsigned StoreOp = 0; + if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { + switch(RC->getSize()) { + case 4: StoreOp = AArch64::LS32_STR; break; + case 8: StoreOp = AArch64::LS64_STR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } else { + assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || + RC->hasType(MVT::f128)) + && "Expected integer or floating type for store"); + switch (RC->getSize()) { + case 4: StoreOp = AArch64::LSFP32_STR; break; + case 8: StoreOp = AArch64::LSFP64_STR; break; + case 16: StoreOp = AArch64::LSFP128_STR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); + NewMI.addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIdx) + .addImm(0) + .addMemOperand(MMO); + +} + +void +AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FrameIdx); + + MachineMemOperand *MMO + = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FrameIdx), + Align); + + unsigned LoadOp = 0; + if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { + switch(RC->getSize()) { + case 4: LoadOp = AArch64::LS32_LDR; break; + case 8: LoadOp = AArch64::LS64_LDR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } else { + assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) + || RC->hasType(MVT::f128)) + && "Expected integer or floating type for store"); + switch (RC->getSize()) { + case 4: LoadOp = AArch64::LSFP32_LDR; break; + case 8: LoadOp = AArch64::LSFP64_LDR; break; + case 16: LoadOp = AArch64::LSFP128_LDR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); + NewMI.addFrameIndex(FrameIdx) + .addImm(0) + .addMemOperand(MMO); +} + +unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const { + unsigned Limit = (1 << 16) - 1; + for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (!I->getOperand(i).isFI()) continue; + + // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff + // is the largest offset guaranteed to fit in the immediate offset. + if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) { + Limit = std::min(Limit, 0xfffu); + break; + } + + int AccessScale, MinOffset, MaxOffset; + getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset); + Limit = std::min(Limit, static_cast<unsigned>(MaxOffset)); + + break; // At most one FI per instruction + } + } + } + + return Limit; +} +void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, + int &AccessScale, int &MinOffset, + int &MaxOffset) const { + switch (MI.getOpcode()) { + default: llvm_unreachable("Unkown load/store kind"); + case TargetOpcode::DBG_VALUE: + AccessScale = 1; + MinOffset = INT_MIN; + MaxOffset = INT_MAX; + return; + case AArch64::LS8_LDR: case AArch64::LS8_STR: + case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR: + case AArch64::LDRSBw: + case AArch64::LDRSBx: + AccessScale = 1; + MinOffset = 0; + MaxOffset = 0xfff; + return; + case AArch64::LS16_LDR: case AArch64::LS16_STR: + case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR: + case AArch64::LDRSHw: + case AArch64::LDRSHx: + AccessScale = 2; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LS32_LDR: case AArch64::LS32_STR: + case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR: + case AArch64::LDRSWx: + case AArch64::LDPSWx: + AccessScale = 4; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LS64_LDR: case AArch64::LS64_STR: + case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR: + case AArch64::PRFM: + AccessScale = 8; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR: + AccessScale = 16; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR: + case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR: + AccessScale = 4; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR: + case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR: + AccessScale = 8; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR: + AccessScale = 16; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + } +} + +unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + const MCInstrDesc &MCID = MI.getDesc(); + const MachineBasicBlock &MBB = *MI.getParent(); + const MachineFunction &MF = *MBB.getParent(); + const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); + + if (MCID.getSize()) + return MCID.getSize(); + + if (MI.getOpcode() == AArch64::INLINEASM) + return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); + + if (MI.isLabel()) + return 0; + + switch (MI.getOpcode()) { + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: + return 0; + case AArch64::CONSTPOOL_ENTRY: + return MI.getOperand(2).getImm(); + case AArch64::TLSDESCCALL: + return 0; + default: + llvm_unreachable("Unknown instruction class"); + } +} + +unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += getInstSizeInBytes(*I); + } + return Size; +} + +bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo &TII) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MFI.getObjectOffset(FrameRegIdx); + llvm_unreachable("Unimplemented rewriteFrameIndex"); +} + +void llvm::emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, + int64_t NumBytes, MachineInstr::MIFlag MIFlags) { + if (NumBytes == 0 && DstReg == SrcReg) + return; + else if (abs(NumBytes) & ~0xffffff) { + // Generically, we have to materialize the offset into a temporary register + // and subtract it. There are a couple of ways this could be done, for now + // we'll go for a literal-pool load. + MachineFunction &MF = *MBB.getParent(); + MachineConstantPool *MCP = MF.getConstantPool(); + const Constant *C + = ConstantInt::get(Type::getInt64Ty(MF.getFunction()->getContext()), + abs(NumBytes)); + unsigned CPI = MCP->getConstantPoolIndex(C, 8); + + // LDR xTMP, .LITPOOL + BuildMI(MBB, MBBI, dl, TII.get(AArch64::LDRx_lit), ScratchReg) + .addConstantPoolIndex(CPI) + .setMIFlag(MIFlags); + + // ADD DST, SRC, xTMP (, lsl #0) + unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx; + BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addReg(ScratchReg, RegState::Kill) + .addImm(0) + .setMIFlag(MIFlags); + return; + } + + // Now we know that the adjustment can be done in at most two add/sub + // (immediate) instructions, which is always more efficient than a + // literal-pool load, or even a hypothetical movz/movk/add sequence + + // Decide whether we're doing addition or subtraction + unsigned LowOp, HighOp; + if (NumBytes >= 0) { + LowOp = AArch64::ADDxxi_lsl0_s; + HighOp = AArch64::ADDxxi_lsl12_s; + } else { + LowOp = AArch64::SUBxxi_lsl0_s; + HighOp = AArch64::SUBxxi_lsl12_s; + NumBytes = abs(NumBytes); + } + + // If we're here, at the very least a move needs to be produced, which just + // happens to be materializable by an ADD. + if ((NumBytes & 0xfff) || NumBytes == 0) { + BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(NumBytes & 0xfff) + .setMIFlag(MIFlags); + + // Next update should use the register we've just defined. + SrcReg = DstReg; + } + + if (NumBytes & 0xfff000) { + BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(NumBytes >> 12) + .setMIFlag(MIFlags); + } +} + +void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned ScratchReg, int64_t NumBytes, + MachineInstr::MIFlag MIFlags) { + emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16, + NumBytes, MIFlags); +} + + +namespace { + struct LDTLSCleanup : public MachineFunctionPass { + static char ID; + LDTLSCleanup() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + AArch64MachineFunctionInfo* MFI = MF.getInfo<AArch64MachineFunctionInfo>(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); + return VisitNode(DT->getRootNode(), 0); + } + + // Visit the dominator subtree rooted at Node in pre-order. + // If TLSBaseAddrReg is non-null, then use that to replace any + // TLS_base_addr instructions. Otherwise, create the register + // when the first such instruction is seen, and then use it + // as we encounter more instructions. + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + switch (I->getOpcode()) { + case AArch64::TLSDESC_BLRx: + // Make sure it's a local dynamic access. + if (!I->getOperand(1).isSymbol() || + strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + break; + + if (TLSBaseAddrReg) + I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); + else + I = SetRegister(I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); + I != E; ++I) { + Changed |= VisitNode(*I, TLSBaseAddrReg); + } + + return Changed; + } + + // Replace the TLS_base_addr instruction I with a copy from + // TLSBaseAddrReg, returning the new instruction. + MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const AArch64TargetMachine *TM = + static_cast<const AArch64TargetMachine *>(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); + + // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the + // code sequence assumes the address will be. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + AArch64::X0) + .addReg(TLSBaseAddrReg); + + // Erase the TLS_base_addr instruction. + I->eraseFromParent(); + + return Copy; + } + + // Create a virtal register in *TLSBaseAddrReg, and populate it by + // inserting a copy instruction after I. Returns the new instruction. + MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const AArch64TargetMachine *TM = + static_cast<const AArch64TargetMachine *>(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); + + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); + + // Insert a copy from X0 to TLSBaseAddrReg for later. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + *TLSBaseAddrReg) + .addReg(AArch64::X0); + + return Copy; + } + + virtual const char *getPassName() const { + return "Local Dynamic TLS Access Clean-up"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char LDTLSCleanup::ID = 0; +FunctionPass* +llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h new file mode 100644 index 0000000..8084f78 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -0,0 +1,110 @@ +//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64INSTRINFO_H +#define LLVM_TARGET_AARCH64INSTRINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "AArch64RegisterInfo.h" + +#define GET_INSTRINFO_HEADER +#include "AArch64GenInstrInfo.inc" + +namespace llvm { + +class AArch64Subtarget; + +class AArch64InstrInfo : public AArch64GenInstrInfo { + const AArch64RegisterInfo RI; + const AArch64Subtarget &Subtarget; +public: + explicit AArch64InstrInfo(const AArch64Subtarget &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + const TargetRegisterInfo &getRegisterInfo() const { return RI; } + + const AArch64Subtarget &getSubTarget() const { return Subtarget; } + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + unsigned RemoveBranch(MachineBasicBlock &MBB) const; + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + /// Look through the instructions in this function and work out the largest + /// the stack frame can be while maintaining the ability to address local + /// slots with no complexities. + unsigned estimateRSStackLimit(MachineFunction &MF) const; + + /// getAddressConstraints - For loads and stores (and PRFMs) taking an + /// immediate offset, this function determines the constraints required for + /// the immediate. It must satisfy: + /// + MinOffset <= imm <= MaxOffset + /// + imm % OffsetScale == 0 + void getAddressConstraints(const MachineInstr &MI, int &AccessScale, + int &MinOffset, int &MaxOffset) const; + + unsigned getInstSizeInBytes(const MachineInstr &MI) const; + + unsigned getInstBundleLength(const MachineInstr &MI) const; +}; + +bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo &TII); + + +void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, + int64_t NumBytes, + MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); + +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned ScratchReg, int64_t NumBytes, + MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); + +} + +#endif diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td new file mode 100644 index 0000000..3c15200 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -0,0 +1,5298 @@ +include "AArch64InstrFormats.td" + +//===----------------------------------------------------------------------===// +// Target-specific ISD nodes and profiles +//===----------------------------------------------------------------------===// + +def SDT_A64ret : SDTypeProfile<0, 0, []>; +def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain, + SDNPOptInGlue]>; + +// (ins NZCV, Condition, Dest) +def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>; +def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>; + +// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition) +def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<2, 3>]>; +def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>; + +// (outs NZCV), (ins LHS, RHS, Condition) +def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>]>; +def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>; + + +// (outs GPR64), (ins) +def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; + +// A64 compares don't care about the cond really (they set all flags) so a +// simple binary operator is useful. +def A64cmp : PatFrag<(ops node:$lhs, node:$rhs), + (A64setcc node:$lhs, node:$rhs, cond)>; + + +// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN +// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C +// and V flags can be set differently by this operation. It comes down to +// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are +// then everything is fine. If not then the optimization is wrong. Thus general +// comparisons are only valid if op2 != 0. + +// So, finally, the only LLVM-native comparisons that don't mention C and V are +// SETEQ and SETNE. They're the only ones we can safely use CMN for in the +// absence of information about op2. +def equality_cond : PatLeaf<(cond), [{ + return N->get() == ISD::SETEQ || N->get() == ISD::SETNE; +}]>; + +def A64cmn : PatFrag<(ops node:$lhs, node:$rhs), + (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>; + +// There are two layers of indirection here, driven by the following +// considerations. +// + TableGen does not know CodeModel or Reloc so that decision should be +// made for a variable/address at ISelLowering. +// + The output of ISelLowering should be selectable (hence the Wrapper, +// rather than a bare target opcode) +def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>, + SDTCisPtrTy<0>]>; + +def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>; + + +def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; +def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad, + [SDNPHasChain]>; + + +// (A64BFI LHS, RHS, LSB, Width) +def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i64>, + SDTCisVT<4, i64>]>; + +def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>; + +// (A64EXTR HiReg, LoReg, LSB) +def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisVT<3, i64>]>; +def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; + +// (A64[SU]BFX Field, ImmR, ImmS). +// +// Note that ImmR and ImmS are already encoded for the actual instructions. The +// more natural LSB and Width mix together to form ImmR and ImmS, something +// which TableGen can't handle. +def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; +def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>; + +def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; + +//===----------------------------------------------------------------------===// +// Call sequence pseudo-instructions +//===----------------------------------------------------------------------===// + + +def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// The TLSDESCCALL node is a variant call which goes to an indirectly calculated +// destination but needs a relocation against a fixed symbol. As such it has two +// certain operands: the callee and the relocated variable. +// +// The TLS ABI only allows it to be selected to a BLR instructin (with +// appropriate relocation). +def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; + +def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, SDNPVariadic]>; + + +def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>; +def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; + +def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>; +def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + + + +// These pseudo-instructions have special semantics by virtue of being passed to +// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by +// LowerCall to (in our case) tell the back-end about stack adjustments for +// arguments passed on the stack. Here we select those markers to +// pseudo-instructions which explicitly set the stack, and finally in the +// RegisterInfo we convert them to a true stack adjustment. +let Defs = [XSP], Uses = [XSP] in +{ + def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt), + [(AArch64callseq_start timm:$amt)]>; + + def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2), + [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; +} + +//===----------------------------------------------------------------------===// +// Atomic operation pseudo-instructions +//===----------------------------------------------------------------------===// + +let usesCustomInserter = 1, Defs = [NZCV] in { +multiclass AtomicSizes<string opname> +{ + def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set GPR32:$dst, (!cast<SDNode>(opname # "_8") GPR64:$ptr, GPR32:$incr))]>; + def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set GPR32:$dst, (!cast<SDNode>(opname # "_16") GPR64:$ptr, GPR32:$incr))]>; + def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set GPR32:$dst, (!cast<SDNode>(opname # "_32") GPR64:$ptr, GPR32:$incr))]>; + def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr), + [(set GPR64:$dst, (!cast<SDNode>(opname # "_64") GPR64:$ptr, GPR64:$incr))]>; +} +} + +defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">; +defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">; +defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">; +defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">; +defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">; +defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">; +defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; +defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; +defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; +defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; +defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">; + +let usesCustomInserter = 1, Defs = [NZCV] in { +def ATOMIC_CMP_SWAP_I8 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set GPR32:$dst, + (atomic_cmp_swap_8 GPR64:$ptr, GPR32:$old, GPR32:$new))]>; +def ATOMIC_CMP_SWAP_I16 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set GPR32:$dst, + (atomic_cmp_swap_16 GPR64:$ptr, GPR32:$old, GPR32:$new))]>; +def ATOMIC_CMP_SWAP_I32 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set GPR32:$dst, + (atomic_cmp_swap_32 GPR64:$ptr, GPR32:$old, GPR32:$new))]>; +def ATOMIC_CMP_SWAP_I64 + : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new), + [(set GPR64:$dst, + (atomic_cmp_swap_64 GPR64:$ptr, GPR64:$old, GPR64:$new))]>; +} + +//===----------------------------------------------------------------------===// +// Add-subtract (extended register) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP + +// The RHS of these operations is conceptually a sign/zero-extended +// register, optionally shifted left by 1-4. The extension can be a +// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but +// must be specified with one exception: + +// If one of the registers is sp/wsp then LSL is an alias for UXTW in +// 32-bit instructions and UXTX in 64-bit versions, the shift amount +// is not optional in that case (but can explicitly be 0), and the +// entire suffix can be skipped (e.g. "add sp, x3, x2"). + +multiclass extend_operands<string PREFIX> +{ + def _asmoperand : AsmOperandClass + { + let Name = PREFIX; + let RenderMethod = "addRegExtendOperands"; + let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">"; + } + + def _operand : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> + { + let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">"; + let DecoderMethod = "DecodeRegExtendOperand"; + let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand"); + } +} + +defm UXTB : extend_operands<"UXTB">; +defm UXTH : extend_operands<"UXTH">; +defm UXTW : extend_operands<"UXTW">; +defm UXTX : extend_operands<"UXTX">; +defm SXTB : extend_operands<"SXTB">; +defm SXTH : extend_operands<"SXTH">; +defm SXTW : extend_operands<"SXTW">; +defm SXTX : extend_operands<"SXTX">; + +def LSL_extasmoperand : AsmOperandClass +{ + let Name = "RegExtendLSL"; + let RenderMethod = "addRegExtendOperands"; +} + +def LSL_extoperand : Operand<i64> +{ + let ParserMatchClass = LSL_extasmoperand; +} + + +// The patterns for various sign-extensions are a little ugly and +// non-uniform because everything has already been promoted to the +// legal i64 and i32 types. We'll wrap the various variants up in a +// class for use later. +class extend_types +{ + dag uxtb; dag uxth; dag uxtw; dag uxtx; + dag sxtb; dag sxth; dag sxtw; dag sxtx; +} + +def extends_to_i64 : extend_types +{ + let uxtb = (and (anyext GPR32:$Rm), 255); + let uxth = (and (anyext GPR32:$Rm), 65535); + let uxtw = (zext GPR32:$Rm); + let uxtx = (i64 GPR64:$Rm); + + let sxtb = (sext_inreg (anyext GPR32:$Rm), i8); + let sxth = (sext_inreg (anyext GPR32:$Rm), i16); + let sxtw = (sext GPR32:$Rm); + let sxtx = (i64 GPR64:$Rm); +} + + +def extends_to_i32 : extend_types +{ + let uxtb = (and GPR32:$Rm, 255); + let uxth = (and GPR32:$Rm, 65535); + let uxtw = (i32 GPR32:$Rm); + let uxtx = (i32 GPR32:$Rm); + + let sxtb = (sext_inreg GPR32:$Rm, i8); + let sxth = (sext_inreg GPR32:$Rm, i16); + let sxtw = (i32 GPR32:$Rm); + let sxtx = (i32 GPR32:$Rm); +} + +// Now, six of the extensions supported are easy and uniform: if the source size +// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate +// those instructions in one block. + +// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me: +// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would +// be impossible. +// + Patterns are very different as well. +// + Passing different registers would be ugly (more fields in extend_types +// would probably be the best option). +multiclass addsub_exts<bit sf, bit op, bit S, string asmop, SDPatternOperator opfrag, + dag outs, extend_types exts, RegisterClass GPRsp> +{ + def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, UXTB_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))], + NoItinerary>; + def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, UXTH_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))], + NoItinerary>; + def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, UXTW_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))], + NoItinerary>; + + def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, SXTB_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))], + NoItinerary>; + def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, SXTH_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))], + NoItinerary>; + def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, SXTW_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))], + NoItinerary>; +} + +// These two could be merge in with the above, but their patterns aren't really +// necessary and the naming-scheme would necessarily break: +multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag, dag outs> +{ + def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011, + outs, + (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPR64xsp:$Rn, (shl GPR64:$Rm, UXTX_operand:$Imm3))], + NoItinerary>; + + def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111, + outs, + (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No Pattern: same as uxtx */], + NoItinerary>; +} + +multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> +{ + def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011, + outs, + (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No pattern: probably same as uxtw */], + NoItinerary>; + + def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111, + outs, + (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No Pattern: probably same as uxtw */], + NoItinerary>; +} + +class SetRD<RegisterClass RC, SDPatternOperator op> + : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>; +class SetNZCV<SDPatternOperator op> + : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>; + +defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>, + (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>, + (outs GPR64xsp:$Rd)>; +defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>, + (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b0, 0b0, "add\t$Rd, ", + (outs GPR32wsp:$Rd)>; +defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>, + (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>, + (outs GPR64xsp:$Rd)>; +defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>, + (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ", + (outs GPR32wsp:$Rd)>; + +let Defs = [NZCV] in { +defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>, + (outs GPR64:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>, + (outs GPR64:$Rd)>; +defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>, + (outs GPR32:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ", + (outs GPR32:$Rd)>; +defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>, + (outs GPR64:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>, + (outs GPR64:$Rd)>; +defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>, + (outs GPR32:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ", + (outs GPR32:$Rd)>; + + +let Rd = 0b11111, isCompare = 1 in { +defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, + (outs), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>; +defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, + (outs), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>; +defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, + (outs), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>; +defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, + (outs), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>; +} +} + +// Now patterns for the operation without a shift being needed. No patterns are +// created for uxtx/sxtx since they're non-uniform and it's expected that +// add/sub (shifted register) will handle those cases anyway. +multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop, + RegisterClass GPRsp, extend_types exts> +{ + def : Pat<(nodeop GPRsp:$Rn, exts.uxtb), + (!cast<Instruction>(prefix # "w_uxtb") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.uxth), + (!cast<Instruction>(prefix # "w_uxth") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.uxtw), + (!cast<Instruction>(prefix # "w_uxtw") GPRsp:$Rn, GPR32:$Rm, 0)>; + + def : Pat<(nodeop GPRsp:$Rn, exts.sxtb), + (!cast<Instruction>(prefix # "w_sxtb") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.sxth), + (!cast<Instruction>(prefix # "w_sxth") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.sxtw), + (!cast<Instruction>(prefix # "w_sxtw") GPRsp:$Rn, GPR32:$Rm, 0)>; +} + +defm : addsubext_noshift_patterns<"ADDxx", add, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"ADDww", add, GPR32wsp, extends_to_i32>; +defm : addsubext_noshift_patterns<"SUBxx", sub, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"SUBww", sub, GPR32wsp, extends_to_i32>; + +defm : addsubext_noshift_patterns<"CMNx", A64cmn, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"CMNw", A64cmn, GPR32wsp, extends_to_i32>; +defm : addsubext_noshift_patterns<"CMPx", A64cmp, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"CMPw", A64cmp, GPR32wsp, extends_to_i32>; + +// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is +// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the +// operation. Also permitted in this case is complete omission of the argument, +// which implies "lsl #0". +multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd, + RegisterClass GPR_Rn, RegisterClass GPR_Rm> +{ + def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), + (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>; + + def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"), + (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>; + +} + +defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>; +defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>; +defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>; +defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>; +defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>; +defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>; +defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>; +defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>; + +// Rd cannot be sp for flag-setting variants so only half of the aliases are +// needed. +defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>; +defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>; +defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>; +defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>; + +// CMP unfortunately has to be different because the instruction doesn't have a +// dest register. +multiclass cmp_lsl_aliases<string asmop, Instruction inst, + RegisterClass GPR_Rn, RegisterClass GPR_Rm> +{ + def : InstAlias<!strconcat(asmop, " $Rn, $Rm"), + (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>; + + def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"), + (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>; +} + +defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>; +defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>; +defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>; +defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>; + +//===----------------------------------------------------------------------===// +// Add-subtract (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV + +// These instructions accept a 12-bit unsigned immediate, optionally shifted +// left by 12 bits. Official assembly format specifies a 12 bit immediate with +// one of "", "LSL #0", "LSL #12" supplementary operands. + +// There are surprisingly few ways to make this work with TableGen, so this +// implementation has separate instructions for the "LSL #0" and "LSL #12" +// variants. + +// If the MCInst retained a single combined immediate (which could be 0x123000, +// for example) then both components (imm & shift) would have to be delegated to +// a single assembly operand. This would entail a separate operand parser +// (because the LSL would have to live in the same AArch64Operand as the +// immediate to be accessible); assembly parsing is rather complex and +// error-prone C++ code. +// +// By splitting the immediate, we can delegate handling this optional operand to +// an InstAlias. Supporting functions to generate the correct MCInst are still +// required, but these are essentially trivial and parsing can remain generic. +// +// Rejected plans with rationale: +// ------------------------------ +// +// In an ideal world you'de have two first class immediate operands (in +// InOperandList, specifying imm12 and shift). Unfortunately this is not +// selectable by any means I could discover. +// +// An Instruction with two MCOperands hidden behind a single entry in +// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional, +// but required more C++ code to handle encoding/decoding. Parsing (the intended +// main beneficiary) ended up equally complex because of the optional nature of +// "LSL #0". +// +// Attempting to circumvent the need for a custom OperandParser above by giving +// InstAliases without the "lsl #0" failed. add/sub could be accommodated but +// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands +// should be parsed: there was no way to accommodate an "lsl #12". + +let ParserMethod = "ParseImmWithLSLOperand", + RenderMethod = "addImmWithLSLOperands" in +{ + // Derived PredicateMethod fields are different for each + def addsubimm_lsl0_asmoperand : AsmOperandClass + { + let Name = "AddSubImmLSL0"; + } + + def addsubimm_lsl12_asmoperand : AsmOperandClass + { + let Name = "AddSubImmLSL12"; + } +} + +def shr_12_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32); +}]>; + +def shr_12_neg_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32); +}]>; + +def neg_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32); +}]>; + + +multiclass addsub_imm_operands<ValueType ty> +{ + let PrintMethod = "printAddSubImmLSL0Operand", + EncoderMethod = "getAddSubImmOpValue", + ParserMatchClass = addsubimm_lsl0_asmoperand in + { + def _posimm_lsl0 : Operand<ty>, + ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>; + def _negimm_lsl0 : Operand<ty>, + ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }], + neg_XFORM>; + } + + let PrintMethod = "printAddSubImmLSL12Operand", + EncoderMethod = "getAddSubImmOpValue", + ParserMatchClass = addsubimm_lsl12_asmoperand in + { + def _posimm_lsl12 : Operand<ty>, + ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }], + shr_12_XFORM>; + + def _negimm_lsl12 : Operand<ty>, + ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }], + shr_12_neg_XFORM>; + } +} + +// The add operands don't need any transformation +defm addsubimm_operand_i32 : addsub_imm_operands<i32>; +defm addsubimm_operand_i64 : addsub_imm_operands<i64>; + +multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift, + string asmop, string cmpasmop, + Operand imm_operand, Operand cmp_imm_operand, + RegisterClass GPR, RegisterClass GPRsp, + AArch64Reg ZR> +{ + // All registers for non-S variants allow SP + def _s : A64I_addsubimm<sf, op, 0b0, shift, + (outs GPRsp:$Rd), + (ins GPRsp:$Rn, imm_operand:$Imm12), + !strconcat(asmop, "\t$Rd, $Rn, $Imm12"), + [(set GPRsp:$Rd, + (add GPRsp:$Rn, imm_operand:$Imm12))], + NoItinerary>; + + + // S variants can read SP but would write to ZR + def _S : A64I_addsubimm<sf, op, 0b1, shift, + (outs GPR:$Rd), + (ins GPRsp:$Rn, imm_operand:$Imm12), + !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"), + [(set GPR:$Rd, (addc GPRsp:$Rn, imm_operand:$Imm12))], + NoItinerary> { + let Defs = [NZCV]; + } + + // Note that the pattern here for ADDS is subtle. Canonically CMP + // a, b becomes SUBS a, b. If b < 0 then this is equivalent to + // ADDS a, (-b). This is not true in general. + def _cmp : A64I_addsubimm<sf, op, 0b1, shift, + (outs), (ins GPRsp:$Rn, imm_operand:$Imm12), + !strconcat(cmpasmop, " $Rn, $Imm12"), + [(set NZCV, + (A64cmp GPRsp:$Rn, cmp_imm_operand:$Imm12))], + NoItinerary> + { + let Rd = 0b11111; + let Defs = [NZCV]; + let isCompare = 1; + } +} + + +multiclass addsubimm_shifts<string prefix, bit sf, bit op, + string asmop, string cmpasmop, string operand, string cmpoperand, + RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR> +{ + defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00, + asmop, cmpasmop, + !cast<Operand>(operand # "_lsl0"), + !cast<Operand>(cmpoperand # "_lsl0"), + GPR, GPRsp, ZR>; + + defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01, + asmop, cmpasmop, + !cast<Operand>(operand # "_lsl12"), + !cast<Operand>(cmpoperand # "_lsl12"), + GPR, GPRsp, ZR>; +} + +defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn", + "addsubimm_operand_i32_posimm", + "addsubimm_operand_i32_negimm", + GPR32, GPR32wsp, WZR>; +defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn", + "addsubimm_operand_i64_posimm", + "addsubimm_operand_i64_negimm", + GPR64, GPR64xsp, XZR>; +defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp", + "addsubimm_operand_i32_negimm", + "addsubimm_operand_i32_posimm", + GPR32, GPR32wsp, WZR>; +defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp", + "addsubimm_operand_i64_negimm", + "addsubimm_operand_i64_posimm", + GPR64, GPR64xsp, XZR>; + +multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> +{ + def _fromsp : InstAlias<"mov $Rd, $Rn", + (addop GPRsp:$Rd, SP:$Rn, 0), + 0b1>; + + def _tosp : InstAlias<"mov $Rd, $Rn", + (addop SP:$Rd, GPRsp:$Rn, 0), + 0b1>; +} + +// Recall Rxsp is a RegisterClass containing *just* xsp. +defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>; +defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>; + +//===----------------------------------------------------------------------===// +// Add-subtract (shifted register) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS + +//===------------------------------- +// 1. The "shifed register" operands. Shared with logical insts. +//===------------------------------- + +multiclass shift_operands<string prefix, string form> +{ + def _asmoperand_i32 : AsmOperandClass + { + let Name = "Shift" # form # "i32"; + let RenderMethod = "addShiftOperands"; + let PredicateMethod + = "isShift<A64SE::" # form # ", false>"; + } + + // Note that the operand type is intentionally i64 because the DAGCombiner + // puts these into a canonical form. + def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> + { + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # "_asmoperand_i32"); + let PrintMethod = "printShiftOperand<A64SE::" # form # ">"; + let DecoderMethod = "Decode32BitShiftOperand"; + } + + def _asmoperand_i64 : AsmOperandClass + { + let Name = "Shift" # form # "i64"; + let RenderMethod = "addShiftOperands"; + let PredicateMethod + = "isShift<A64SE::" # form # ", true>"; + } + + def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> + { + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # "_asmoperand_i64"); + let PrintMethod = "printShiftOperand<A64SE::" # form # ">"; + } +} + +defm lsl_operand : shift_operands<"lsl_operand", "LSL">; +defm lsr_operand : shift_operands<"lsr_operand", "LSR">; +defm asr_operand : shift_operands<"asr_operand", "ASR">; + +// Not used for add/sub, but defined here for completeness. The "logical +// (shifted register)" instructions *do* have an ROR variant. +defm ror_operand : shift_operands<"ror_operand", "ROR">; + +//===------------------------------- +// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions. +//===------------------------------- + +// N.b. the commutable parameter is just !N. It will be first against the wall +// when the revolution comes. +multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable, + string asmop, SDPatternOperator opfrag, string sty, + RegisterClass GPR, list<Register> defs> +{ + let isCommutable = commutable, Defs = defs in { + def _lsl : A64I_addsubshift<sf, op, s, 0b00, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_addsubshift<sf, op, s, 0b01, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_addsubshift<sf, op, s, 0b10, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn, + GPR:$Rm, 0)>; + + def : Pat<(opfrag GPR:$Rn, GPR:$Rm), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable, + string asmop, SDPatternOperator opfrag, + list<Register> defs> +{ + defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s, + commutable, asmop, opfrag, "i64", GPR64, defs>; + defm www : addsub_shifts<prefix # "www", 0b0, op, s, + commutable, asmop, opfrag, "i32", GPR32, defs>; +} + + +defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>; +defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>; + +defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>; +defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>; + +//===------------------------------- +// 1. The NEG/NEGS aliases +//===------------------------------- + +multiclass neg_alias<Instruction INST, RegisterClass GPR, + Register ZR, Operand shift_operand, SDNode shiftop> +{ + def : InstAlias<"neg $Rd, $Rm, $Imm6", + (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; + + def : Pat<(sub 0, (shiftop GPR:$Rm, shift_operand:$Imm6)), + (INST ZR, GPR:$Rm, shift_operand:$Imm6)>; +} + +defm : neg_alias<SUBwww_lsl, GPR32, WZR, lsl_operand_i32, shl>; +defm : neg_alias<SUBwww_lsr, GPR32, WZR, lsr_operand_i32, srl>; +defm : neg_alias<SUBwww_asr, GPR32, WZR, asr_operand_i32, sra>; +def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; +def : Pat<(sub 0, GPR32:$Rm), (SUBwww_lsl WZR, GPR32:$Rm, 0)>; + +defm : neg_alias<SUBxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>; +defm : neg_alias<SUBxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>; +defm : neg_alias<SUBxxx_asr, GPR64, XZR, asr_operand_i64, sra>; +def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; +def : Pat<(sub 0, GPR64:$Rm), (SUBxxx_lsl XZR, GPR64:$Rm, 0)>; + +// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to +// be involved. +class negs_alias<Instruction INST, RegisterClass GPR, + Register ZR, Operand shift_operand, SDNode shiftop> + : InstAlias<"negs $Rd, $Rm, $Imm6", + (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; + +def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>; +def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>; +def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>; +def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; + +def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>; +def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>; +def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>; +def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; + +//===------------------------------- +// 1. The CMP/CMN aliases +//===------------------------------- + +multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable, + string asmop, SDPatternOperator opfrag, string sty, + RegisterClass GPR> +{ + let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in { + def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag GPR:$Rn, (shl GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag GPR:$Rn, (srl GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_addsubshift<sf, op, 0b1, 0b10, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag GPR:$Rn, (sra GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias<!strconcat(asmop, " $Rn, $Rm"), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(opfrag GPR:$Rn, GPR:$Rm), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, "i32", GPR32>; +defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, "i64", GPR64>; + +defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, "i32", GPR32>; +defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, "i64", GPR64>; + +//===----------------------------------------------------------------------===// +// Add-subtract (with carry) instructions +//===----------------------------------------------------------------------===// +// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS + +multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> +{ + let Uses = [NZCV] in + { + def www : A64I_addsubcarry<0b0, op, s, 0b000000, + (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm), + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + [], NoItinerary>; + + def xxx : A64I_addsubcarry<0b1, op, s, 0b000000, + (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + [], NoItinerary>; + } +} + +let isCommutable = 1 in +{ + defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">; +} + +defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">; + +let Defs = [NZCV] in +{ + let isCommutable = 1 in + { + defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">; + } + + defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">; +} + +def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>; +def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>; +def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>; +def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>; + +// Note that adde and sube can form a chain longer than two (e.g. for 256-bit +// addition). So the flag-setting instructions are appropriate. +def : Pat<(adde GPR32:$Rn, GPR32:$Rm), (ADCSwww GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(adde GPR64:$Rn, GPR64:$Rm), (ADCSxxx GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(sube GPR32:$Rn, GPR32:$Rm), (SBCSwww GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(sube GPR64:$Rn, GPR64:$Rm), (SBCSxxx GPR64:$Rn, GPR64:$Rm)>; + +//===----------------------------------------------------------------------===// +// Bitfield +//===----------------------------------------------------------------------===// +// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL, +// UBFIZ, UBFX + +// Because of the rather complicated nearly-overlapping aliases, the decoding of +// this range of instructions is handled manually. The architectural +// instructions are BFM, SBFM and UBFM but a disassembler should never produce +// these. +// +// In the end, the best option was to use BFM instructions for decoding under +// almost all circumstances, but to create aliasing *Instructions* for each of +// the canonical forms and specify a completely custom decoder which would +// substitute the correct MCInst as needed. +// +// This also simplifies instruction selection, parsing etc because the MCInsts +// have a shape that's closer to their use in code. + +//===------------------------------- +// 1. The architectural BFM instructions +//===------------------------------- + +def uimm5_asmoperand : AsmOperandClass +{ + let Name = "UImm5"; + let PredicateMethod = "isUImm<5>"; + let RenderMethod = "addImmOperands"; +} + +def uimm6_asmoperand : AsmOperandClass +{ + let Name = "UImm6"; + let PredicateMethod = "isUImm<6>"; + let RenderMethod = "addImmOperands"; +} + +def bitfield32_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> +{ + let ParserMatchClass = uimm5_asmoperand; + + let DecoderMethod = "DecodeBitfield32ImmOperand"; +} + + +def bitfield64_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> +{ + let ParserMatchClass = uimm6_asmoperand; + + // Default decoder works in 64-bit case: the 6-bit field can take any value. +} + +multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> +{ + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> + { + let DecoderMethod = "DecodeBitfieldInstruction"; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> + { + let DecoderMethod = "DecodeBitfieldInstruction"; + } +} + +defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">; +defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">; + +// BFM instructions modify the destination register rather than defining it +// completely. +def BFMwwii : + A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + let DecoderMethod = "DecodeBitfieldInstruction"; + let Constraints = "$src = $Rd"; +} + +def BFMxxii : + A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + let DecoderMethod = "DecodeBitfieldInstruction"; + let Constraints = "$src = $Rd"; +} + + +//===------------------------------- +// 2. Extend aliases to 64-bit dest +//===------------------------------- + +// Unfortunately the extensions that end up as 64-bits cannot be handled by an +// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs +// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is +// not capable of such a map as far as I'm aware + +// Note that these instructions are strictly more specific than the +// BFM ones (in ImmR) so they can handle their own decoding. +class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, string asmop, + bits<6> imms, dag pattern> + : A64I_bitfield<sf, opc, sf, + (outs GPRDest:$Rd), (ins GPR32:$Rn), + !strconcat(asmop, "\t$Rd, $Rn"), + [(set GPRDest:$Rd, pattern)], NoItinerary> +{ + let ImmR = 0b000000; + let ImmS = imms; +} + +// Signed extensions +def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtb", 7, + (sext_inreg (anyext GPR32:$Rn), i8)>; +def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, "sxtb", 7, + (sext_inreg GPR32:$Rn, i8)>; +def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxth", 15, + (sext_inreg (anyext GPR32:$Rn), i16)>; +def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, "sxth", 15, + (sext_inreg GPR32:$Rn, i16)>; +def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtw", 31, (sext GPR32:$Rn)>; + +// Unsigned extensions +def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, "uxtb", 7, + (and GPR32:$Rn, 255)>; +def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, "uxth", 15, + (and GPR32:$Rn, 65535)>; + +// The 64-bit unsigned variants are not strictly architectural but recommended +// for consistency. +let isAsmParserOnly = 1 in +{ + def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxtb", 7, + (and (anyext GPR32:$Rn), 255)>; + def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxth", 15, + (and (anyext GPR32:$Rn), 65535)>; +} + +// Extra patterns for when the source register is actually 64-bits +// too. There's no architectural difference here, it's just LLVM +// shinanigans. There's no need for equivalent zero-extension patterns +// because they'll already be caught by logical (immediate) matching. +def : Pat<(sext_inreg GPR64:$Rn, i8), + (SXTBxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>; +def : Pat<(sext_inreg GPR64:$Rn, i16), + (SXTHxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>; +def : Pat<(sext_inreg GPR64:$Rn, i32), + (SXTWxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>; + + +//===------------------------------- +// 3. Aliases for ASR and LSR (the simple shifts) +//===------------------------------- + +// These also handle their own decoding because ImmS being set makes +// them take precedence over BFM. +multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> +{ + def wwi : A64I_bitfield<0b0, opc, 0b0, + (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), + [(set GPR32:$Rd, (opnode GPR32:$Rn, bitfield32_imm:$ImmR))], + NoItinerary> + { + let ImmS = 31; + } + + def xxi : A64I_bitfield<0b1, opc, 0b1, + (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), + [(set GPR64:$Rd, (opnode GPR64:$Rn, bitfield64_imm:$ImmR))], + NoItinerary> + { + let ImmS = 63; + } + +} + +defm ASR : A64I_shift<0b00, "asr", sra>; +defm LSR : A64I_shift<0b10, "lsr", srl>; + +//===------------------------------- +// 4. Aliases for LSL +//===------------------------------- + +// Unfortunately LSL and subsequent aliases are much more complicated. We need +// to be able to say certain output instruction fields depend in a complex +// manner on combinations of input assembly fields). +// +// MIOperandInfo *might* have been able to do it, but at the cost of +// significantly more C++ code. + +// N.b. contrary to usual practice these operands store the shift rather than +// the machine bits in an MCInst. The complexity overhead of consistency +// outweighed the benefits in this case (custom asmparser, printer and selection +// vs custom encoder). +def bitfield32_lsl_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> +{ + let ParserMatchClass = uimm5_asmoperand; + let EncoderMethod = "getBitfield32LSLOpValue"; +} + +def bitfield64_lsl_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> +{ + let ParserMatchClass = uimm6_asmoperand; + let EncoderMethod = "getBitfield64LSLOpValue"; +} + +class A64I_bitfield_lsl<bit sf, RegisterClass GPR, Operand operand> + : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm), + "lsl\t$Rd, $Rn, $FullImm", + [(set GPR:$Rd, (shl GPR:$Rn, operand:$FullImm))], + NoItinerary> +{ + bits<12> FullImm; + let ImmR = FullImm{5-0}; + let ImmS = FullImm{11-6}; + + // No disassembler allowed because it would overlap with BFM which does the + // actual work. + let isAsmParserOnly = 1; +} + +def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, bitfield32_lsl_imm>; +def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, bitfield64_lsl_imm>; + +//===------------------------------- +// 5. Aliases for bitfield extract instructions +//===------------------------------- + +def bfx32_width_asmoperand : AsmOperandClass +{ + let Name = "BFX32Width"; + let PredicateMethod = "isBitfieldWidth<32>"; + let RenderMethod = "addBFXWidthOperands"; +} + +def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> +{ + let PrintMethod = "printBFXWidthOperand"; + let ParserMatchClass = bfx32_width_asmoperand; +} + +def bfx64_width_asmoperand : AsmOperandClass +{ + let Name = "BFX64Width"; + let PredicateMethod = "isBitfieldWidth<64>"; + let RenderMethod = "addBFXWidthOperands"; +} + +def bfx64_width : Operand<i64> +{ + let PrintMethod = "printBFXWidthOperand"; + let ParserMatchClass = bfx64_width_asmoperand; +} + + +multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> +{ + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [(set GPR32:$Rd, (op GPR32:$Rn, imm:$ImmR, imm:$ImmS))], + NoItinerary> + { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [(set GPR64:$Rd, (op GPR64:$Rn, imm:$ImmR, imm:$ImmS))], + NoItinerary> + { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } +} + +defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>; +defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; + +// Again, variants based on BFM modify Rd so need it as an input too. +def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +// SBFX instructions can do a 1-instruction sign-extension of boolean values. +def : Pat<(sext_inreg GPR64:$Rn, i1), (SBFXxxii GPR64:$Rn, 0, 0)>; +def : Pat<(sext_inreg GPR32:$Rn, i1), (SBFXwwii GPR32:$Rn, 0, 0)>; +def : Pat<(i64 (sext_inreg (anyext GPR32:$Rn), i1)), + (SBFXxxii (SUBREG_TO_REG (i64 0), GPR32:$Rn, sub_32), 0, 0)>; + +// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could +// use either 64-bit or 32-bit variant, but 32-bit might be more efficient. +def : Pat<(zext GPR32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii GPR32:$Rn, 0, 31), sub_32)>; + +//===------------------------------- +// 6. Aliases for bitfield insert instructions +//===------------------------------- + +def bfi32_lsb_asmoperand : AsmOperandClass +{ + let Name = "BFI32LSB"; + let PredicateMethod = "isUImm<5>"; + let RenderMethod = "addBFILSBOperands<32>"; +} + +def bfi32_lsb : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> +{ + let PrintMethod = "printBFILSBOperand<32>"; + let ParserMatchClass = bfi32_lsb_asmoperand; +} + +def bfi64_lsb_asmoperand : AsmOperandClass +{ + let Name = "BFI64LSB"; + let PredicateMethod = "isUImm<6>"; + let RenderMethod = "addBFILSBOperands<64>"; +} + +def bfi64_lsb : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> +{ + let PrintMethod = "printBFILSBOperand<64>"; + let ParserMatchClass = bfi64_lsb_asmoperand; +} + +// Width verification is performed during conversion so width operand can be +// shared between 32/64-bit cases. Still needed for the print method though +// because ImmR encodes "width - 1". +def bfi32_width_asmoperand : AsmOperandClass +{ + let Name = "BFI32Width"; + let PredicateMethod = "isBitfieldWidth<32>"; + let RenderMethod = "addBFIWidthOperands"; +} + +def bfi32_width : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> +{ + let PrintMethod = "printBFIWidthOperand"; + let ParserMatchClass = bfi32_width_asmoperand; +} + +def bfi64_width_asmoperand : AsmOperandClass +{ + let Name = "BFI64Width"; + let PredicateMethod = "isBitfieldWidth<64>"; + let RenderMethod = "addBFIWidthOperands"; +} + +def bfi64_width : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> +{ + let PrintMethod = "printBFIWidthOperand"; + let ParserMatchClass = bfi64_width_asmoperand; +} + +multiclass A64I_bitfield_insert<bits<2> opc, string asmop> +{ + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> + { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> + { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } + +} + +defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">; +defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; + + +def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> +{ + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +//===----------------------------------------------------------------------===// +// Compare and branch (immediate) +//===----------------------------------------------------------------------===// +// Contains: CBZ, CBNZ + +class label_asmoperand<int width, int scale> : AsmOperandClass +{ + let Name = "Label" # width # "_" # scale; + let PredicateMethod = "isLabel<" # width # "," # scale # ">"; + let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">"; +} + +def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>; + +// All conditional immediate branches are the same really: 19 signed bits scaled +// by the instruction-size (4). +def bcc_target : Operand<OtherVT> +{ + // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. + let ParserMatchClass = label_wid19_scal4_asmoperand; + let PrintMethod = "printLabelOperand<19, 4>"; + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>"; + let OperandType = "OPERAND_PCREL"; +} + +multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> +{ + let isBranch = 1, isTerminator = 1 in { + def x : A64I_cmpbr<0b1, op, + (outs), + (ins GPR64:$Rt, bcc_target:$Label), + !strconcat(asmop,"\t$Rt, $Label"), + [(A64br_cc (A64cmp GPR64:$Rt, 0), SETOP, bb:$Label)], + NoItinerary>; + + def w : A64I_cmpbr<0b0, op, + (outs), + (ins GPR32:$Rt, bcc_target:$Label), + !strconcat(asmop,"\t$Rt, $Label"), + [(A64br_cc (A64cmp GPR32:$Rt, 0), SETOP, bb:$Label)], + NoItinerary>; + } +} + +defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf<i32, [{ + return Imm == A64CC::EQ; +}]> >; +defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{ + return Imm == A64CC::NE; +}]> >; + +//===----------------------------------------------------------------------===// +// Conditional branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: B.cc + +def cond_code_asmoperand : AsmOperandClass +{ + let Name = "CondCode"; +} + +def cond_code : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm <= 15; +}]> +{ + let PrintMethod = "printCondCodeOperand"; + let ParserMatchClass = cond_code_asmoperand; +} + +def Bcc : A64I_condbr<0b0, 0b0, (outs), + (ins cond_code:$Cond, bcc_target:$Label), + "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)], + NoItinerary> +{ + let Uses = [NZCV]; + let isBranch = 1; + let isTerminator = 1; +} + +//===----------------------------------------------------------------------===// +// Conditional compare (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: CCMN, CCMP + +def uimm4_asmoperand : AsmOperandClass +{ + let Name = "UImm4"; + let PredicateMethod = "isUImm<4>"; + let RenderMethod = "addImmOperands"; +} + +def uimm4 : Operand<i32> +{ + let ParserMatchClass = uimm4_asmoperand; +} + +def uimm5 : Operand<i32> +{ + let ParserMatchClass = uimm5_asmoperand; +} + +// The only difference between this operand and the one for instructions like +// B.cc is that it's parsed manually. The other get parsed implicitly as part of +// the mnemonic handling. +def cond_code_op_asmoperand : AsmOperandClass +{ + let Name = "CondCodeOp"; + let RenderMethod = "addCondCodeOperands"; + let PredicateMethod = "isCondCode"; + let ParserMethod = "ParseCondCodeOperand"; +} + +def cond_code_op : Operand<i32> +{ + let PrintMethod = "printCondCodeOperand"; + let ParserMatchClass = cond_code_op_asmoperand; +} + +class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop> + : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs), + (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"), + [], NoItinerary> +{ + let Defs = [NZCV]; +} + +def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">; +def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">; +def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">; +def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">; + +//===----------------------------------------------------------------------===// +// Conditional compare (register) instructions +//===----------------------------------------------------------------------===// +// Contains: CCMN, CCMP + +class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop> + : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1, + (outs), + (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), + [], NoItinerary> +{ + let Defs = [NZCV]; +} + +def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">; +def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">; +def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">; +def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">; + +//===----------------------------------------------------------------------===// +// Conditional select instructions +//===----------------------------------------------------------------------===// +// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG + +// Condition code which is encoded as the inversion (semantically rather than +// bitwise) in the instruction. +def inv_cond_code_op_asmoperand : AsmOperandClass +{ + let Name = "InvCondCodeOp"; + let RenderMethod = "addInvCondCodeOperands"; + let PredicateMethod = "isCondCode"; + let ParserMethod = "ParseCondCodeOperand"; +} + +def inv_cond_code_op : Operand<i32> +{ + let ParserMatchClass = inv_cond_code_op_asmoperand; +} + +// Having a separate operand for the selectable use-case is debatable, but gives +// consistency with cond_code. +def inv_cond_XFORM : SDNodeXForm<imm, [{ + A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue()); + return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32); +}]>; + +def inv_cond_code + : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>; + + +multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop, + SDPatternOperator select> +{ + let Uses = [NZCV] in + { + def wwwc : A64I_condsel<0b0, op, 0b0, op2, + (outs GPR32:$Rd), + (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), + [(set GPR32:$Rd, (select GPR32:$Rn, GPR32:$Rm))], + NoItinerary>; + + + def xxxc : A64I_condsel<0b1, op, 0b0, op2, + (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), + [(set GPR64:$Rd, (select GPR64:$Rn, GPR64:$Rm))], + NoItinerary>; + } +} + +def simple_select + : PatFrag<(ops node:$lhs, node:$rhs), + (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>; + +class complex_select<SDPatternOperator opnode> + : PatFrag<(ops node:$lhs, node:$rhs), + (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>; + + +defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>; +defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc", + complex_select<PatFrag<(ops node:$val), + (add node:$val, 1)>>>; +defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>; +defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>; + +// Now the instruction aliases, which fit nicely into LLVM's model: + +def : InstAlias<"cset $Rd, $Cond", + (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"cset $Rd, $Cond", + (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"csetm $Rd, $Cond", + (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"csetm $Rd, $Cond", + (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinc $Rd, $Rn, $Cond", + (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinc $Rd, $Rn, $Cond", + (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinv $Rd, $Rn, $Cond", + (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinv $Rd, $Rn, $Cond", + (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cneg $Rd, $Rn, $Cond", + (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cneg $Rd, $Rn, $Cond", + (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; + +// Finally some helper patterns. + +// For CSET (a.k.a. zero-extension of icmp) +def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), + (CSINCwwwc WZR, WZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), + (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>; + +def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), + (CSINCxxxc XZR, XZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), + (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>; + +// For CSETM (a.k.a. sign-extension of icmp) +def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), + (CSINVwwwc WZR, WZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), + (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>; + +def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), + (CSINVxxxc XZR, XZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), + (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>; + +// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of +// commutativity. The instructions are to complex for isCommutable to be used, +// so we have to create the patterns manually: + +// No commutable pattern for CSEL since the commuted version is isomorphic. + +// CSINC +def :Pat<(A64select_cc NZCV, (add GPR32:$Rm, 1), GPR32:$Rn, inv_cond_code:$Cond), + (CSINCwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (add GPR64:$Rm, 1), GPR64:$Rn, inv_cond_code:$Cond), + (CSINCxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>; + +// CSINV +def :Pat<(A64select_cc NZCV, (not GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond), + (CSINVwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (not GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond), + (CSINVxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>; + +// CSNEG +def :Pat<(A64select_cc NZCV, (ineg GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond), + (CSNEGwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (ineg GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond), + (CSNEGxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>; + +//===----------------------------------------------------------------------===// +// Data Processing (1 source) instructions +//===----------------------------------------------------------------------===// +// Contains: RBIT, REV16, REV, REV32, CLZ, CLS. + +// We define an unary operator which always fails. We will use this to +// define unary operators that cannot be matched. + +class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop, + list<dag> patterns, RegisterClass GPRrc, + InstrItinClass itin>: + A64I_dp_1src<sf, + 0, + 0b00000, + opcode, + !strconcat(asmop, "\t$Rd, $Rn"), + (outs GPRrc:$Rd), + (ins GPRrc:$Rn), + patterns, + itin>; + +multiclass A64I_dp_1src <bits<6> opcode, string asmop> { + let neverHasSideEffects = 1 in { + def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>; + def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>; + } +} + +defm RBIT : A64I_dp_1src<0b000000, "rbit">; +defm CLS : A64I_dp_1src<0b000101, "cls">; +defm CLZ : A64I_dp_1src<0b000100, "clz">; + +def : Pat<(ctlz GPR32:$Rn), (CLZww GPR32:$Rn)>; +def : Pat<(ctlz GPR64:$Rn), (CLZxx GPR64:$Rn)>; +def : Pat<(ctlz_zero_undef GPR32:$Rn), (CLZww GPR32:$Rn)>; +def : Pat<(ctlz_zero_undef GPR64:$Rn), (CLZxx GPR64:$Rn)>; + +def : Pat<(cttz GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>; +def : Pat<(cttz GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>; +def : Pat<(cttz_zero_undef GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>; +def : Pat<(cttz_zero_undef GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>; + + +def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev", + [(set GPR32:$Rd, (bswap GPR32:$Rn))], + GPR32, NoItinerary>; +def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev", + [(set GPR64:$Rd, (bswap GPR64:$Rn))], + GPR64, NoItinerary>; +def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32", + [(set GPR64:$Rd, (bswap (rotr GPR64:$Rn, (i64 32))))], + GPR64, NoItinerary>; +def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16", + [(set GPR32:$Rd, (bswap (rotr GPR32:$Rn, (i64 16))))], + GPR32, + NoItinerary>; +def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>; + +//===----------------------------------------------------------------------===// +// Data Processing (2 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL, LSR, ASR, ROR + +class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns, + RegisterClass GPRsp, + InstrItinClass itin>: + A64I_dp_2src<sf, + opcode, + 0, + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + (outs GPRsp:$Rd), + (ins GPRsp:$Rn, GPRsp:$Rm), + patterns, + itin>; + +multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> { + def www : dp_2src_impl<0b0, + opcode, + asmop, + [(set GPR32:$Rd, (op GPR32:$Rn, (i64 (zext GPR32:$Rm))))], + GPR32, + NoItinerary>; + def xxx : dp_2src_impl<0b1, + opcode, + asmop, + [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))], + GPR64, + NoItinerary>; +} + + +multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> { + def www : dp_2src_impl<0b0, + opcode, + asmop, + [(set GPR32:$Rd, (op GPR32:$Rn, GPR32:$Rm))], + GPR32, + NoItinerary>; + def xxx : dp_2src_impl<0b1, + opcode, + asmop, + [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))], + GPR64, + NoItinerary>; +} + +// Here we define the data processing 2 source instructions. +defm UDIV : dp_2src<0b000010, "udiv", udiv>; +defm SDIV : dp_2src<0b000011, "sdiv", sdiv>; + +defm LSLV : dp_2src_zext<0b001000, "lsl", shl>; +defm LSRV : dp_2src_zext<0b001001, "lsr", srl>; +defm ASRV : dp_2src_zext<0b001010, "asr", sra>; +defm RORV : dp_2src_zext<0b001011, "ror", rotr>; + +// Extra patterns for an incoming 64-bit value for a 32-bit +// operation. Since the LLVM operations are undefined (as in C) if the +// RHS is out of range, it's perfectly permissible to discard the high +// bits of the GPR64. +def : Pat<(shl GPR32:$Rn, GPR64:$Rm), + (LSLVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; +def : Pat<(srl GPR32:$Rn, GPR64:$Rm), + (LSRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; +def : Pat<(sra GPR32:$Rn, GPR64:$Rm), + (ASRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; +def : Pat<(rotr GPR32:$Rn, GPR64:$Rm), + (RORVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; + +// Here we define the aliases for the data processing 2 source instructions. +def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">; +def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">; +def ASR_menmonic : MnemonicAlias<"asrv", "asr">; +def ROR_menmonic : MnemonicAlias<"rorv", "ror">; + +//===----------------------------------------------------------------------===// +// Data Processing (3 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH +// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL + +class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg, + RegisterClass SrcReg, string asmop, dag pattern> + : A64I_dp3<sf, opcode, + (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"), + [(set AccReg:$Rd, pattern)], NoItinerary> +{ + RegisterClass AccGPR = AccReg; + RegisterClass SrcGPR = SrcReg; +} + +def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, GPR32, "madd", + (add GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>; +def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, GPR64, "madd", + (add GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>; + +def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, GPR32, "msub", + (sub GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>; +def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, GPR64, "msub", + (sub GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>; + +def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, GPR32, "smaddl", + (add GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; +def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, GPR32, "smsubl", + (sub GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; + +def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, GPR32, "umaddl", + (add GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; +def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, GPR32, "umsubl", + (sub GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; + +let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in +{ + def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm), + "umulh\t$Rd, $Rn, $Rm", + [(set GPR64:$Rd, (mulhu GPR64:$Rn, GPR64:$Rm))], + NoItinerary>; + + def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm), + "smulh\t$Rd, $Rn, $Rm", + [(set GPR64:$Rd, (mulhs GPR64:$Rn, GPR64:$Rm))], + NoItinerary>; +} + +multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST, + Register ZR, dag pattern> +{ + def : InstAlias<asmop # " $Rd, $Rn, $Rm", + (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>; + + def : Pat<pattern, (INST INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>; +} + +defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul GPR32:$Rn, GPR32:$Rm)>; +defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul GPR64:$Rn, GPR64:$Rm)>; + +defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR, + (sub 0, (mul GPR32:$Rn, GPR32:$Rm))>; +defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR, + (sub 0, (mul GPR64:$Rn, GPR64:$Rm))>; + +defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR, + (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm))>; +defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR, + (sub 0, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; + +defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR, + (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm))>; +defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR, + (sub 0, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; + + +//===----------------------------------------------------------------------===// +// Exception generation +//===----------------------------------------------------------------------===// +// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3 + +def uimm16_asmoperand : AsmOperandClass +{ + let Name = "UImm16"; + let PredicateMethod = "isUImm<16>"; + let RenderMethod = "addImmOperands"; +} + +def uimm16 : Operand<i32> +{ + let ParserMatchClass = uimm16_asmoperand; +} + +class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop> + : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16), + !strconcat(asmop, "\t$UImm16"), [], NoItinerary> +{ + let isBranch = 1; + let isTerminator = 1; +} + +def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">; +def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">; +def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">; +def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">; +def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">; + +def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">; +def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">; +def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">; + +// The immediate is optional for the DCPS instructions, defaulting to 0. +def : InstAlias<"dcps1", (DCPS1i 0)>; +def : InstAlias<"dcps2", (DCPS2i 0)>; +def : InstAlias<"dcps3", (DCPS3i 0)>; + +//===----------------------------------------------------------------------===// +// Extract (immediate) +//===----------------------------------------------------------------------===// +// Contains: EXTR + alias ROR + +def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0, + (outs GPR32:$Rd), + (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB), + "extr\t$Rd, $Rn, $Rm, $LSB", + [(set GPR32:$Rd, + (A64Extr GPR32:$Rn, GPR32:$Rm, imm:$LSB))], + NoItinerary>; +def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1, + (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB), + "extr\t$Rd, $Rn, $Rm, $LSB", + [(set GPR64:$Rd, + (A64Extr GPR64:$Rn, GPR64:$Rm, imm:$LSB))], + NoItinerary>; + +def : InstAlias<"ror $Rd, $Rs, $LSB", + (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>; +def : InstAlias<"ror $Rd, $Rs, $LSB", + (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>; + +def : Pat<(rotr GPR32:$Rn, bitfield32_imm:$LSB), + (EXTRwwwi GPR32:$Rn, GPR32:$Rn, bitfield32_imm:$LSB)>; +def : Pat<(rotr GPR64:$Rn, bitfield64_imm:$LSB), + (EXTRxxxi GPR64:$Rn, GPR64:$Rn, bitfield64_imm:$LSB)>; + +//===----------------------------------------------------------------------===// +// Floating-point compare instructions +//===----------------------------------------------------------------------===// +// Contains: FCMP, FCMPE + +def fpzero_asmoperand : AsmOperandClass +{ + let Name = "FPZero"; + let ParserMethod = "ParseFPImmOperand"; +} + +def fpz32 : Operand<f32>, ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> +{ + let ParserMatchClass = fpzero_asmoperand; + let PrintMethod = "printFPZeroOperand"; +} + +def fpz64 : Operand<f64>, ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> +{ + let ParserMatchClass = fpzero_asmoperand; + let PrintMethod = "printFPZeroOperand"; +} + +multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, string asmop2, + dag pattern> +{ + def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, + (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2), + [pattern], NoItinerary> + { + let Defs = [NZCV]; + } + + def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, + (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2), + [], NoItinerary> + { + let Defs = [NZCV]; + } +} + +defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), "$Rm", + (set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>; +defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm", + (set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>; + +// What would be Rm should be written as 0, but anything is valid for +// disassembly so we can't set the bits +let PostEncoderMethod = "fixFCMPImm" in +{ + defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm", + (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>; + + defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Imm), "$Imm", + (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Imm))>; +} + + +//===----------------------------------------------------------------------===// +// Floating-point conditional compare instructions +//===----------------------------------------------------------------------===// +// Contains: FCCMP, FCCMPE + +class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop> + : A64I_fpccmp<0b0, 0b0, type, op, + (outs), + (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), + [], NoItinerary> +{ + let Defs = [NZCV]; +} + +def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">; +def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">; +def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">; +def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">; + +//===----------------------------------------------------------------------===// +// Floating-point conditional select instructions +//===----------------------------------------------------------------------===// +// Contains: FCSEL + +let Uses = [NZCV] in +{ + def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd), + (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond), + "fcsel\t$Rd, $Rn, $Rm, $Cond", + [(set FPR32:$Rd, + (simple_select (f32 FPR32:$Rn), + FPR32:$Rm))], + NoItinerary>; + + + def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd), + (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond), + "fcsel\t$Rd, $Rn, $Rm, $Cond", + [(set FPR64:$Rd, + (simple_select (f64 FPR64:$Rn), + FPR64:$Rm))], + NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (1 source) +//===----------------------------------------------------------------------===// +// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI]. + +def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val), + [{ (void)N; return false; }]>; + +// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d" +// syntax. Default to no pattern because most are odd enough not to have one. +multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr, + SDPatternOperator opnode = FPNoUnop> +{ + def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn), + !strconcat(asmstr, "\t$Rd, $Rn"), + [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn))], + NoItinerary>; + + def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn), + !strconcat(asmstr, "\t$Rd, $Rn"), + [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn))], + NoItinerary>; +} + +defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">; +defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>; +defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>; +defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>; + +defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">; +defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>; +defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>; +defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>; +defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">; +defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>; +defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>; + +// The FCVT instrucitons have different source and destination register-types, +// but the fields are uniform everywhere a D-register (say) crops up. Package +// this information in a Record. +class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> +{ + RegisterClass Class = rc; + ValueType VT = vt; + bit t1 = fld{1}; + bit t0 = fld{0}; +} + +def FCVT16 : FCVTRegType<FPR16, 0b11, f16>; +def FCVT32 : FCVTRegType<FPR32, 0b00, f32>; +def FCVT64 : FCVTRegType<FPR64, 0b01, f64>; + +class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode> + : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0}, + {0,0,0,1, DestReg.t1, DestReg.t0}, + (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn), + "fcvt\t$Rd, $Rn", + [(set (DestReg.VT DestReg.Class:$Rd), + (opnode (SrcReg.VT SrcReg.Class:$Rn)))], NoItinerary>; + +def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>; +def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>; +def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>; +def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>; +def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>; +def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>; + + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (2 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL + +def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs), + [{ (void)N; return false; }]>; + +multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr, + SDPatternOperator opnode> +{ + def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode, + (outs FPR32:$Rd), + (ins FPR32:$Rn, FPR32:$Rm), + !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), + [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn, FPR32:$Rm))], + NoItinerary>; + + def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode, + (outs FPR64:$Rd), + (ins FPR64:$Rn, FPR64:$Rm), + !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), + [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn, FPR64:$Rm))], + NoItinerary>; +} + +let isCommutable = 1 in { + defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>; + defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>; + + // No patterns for these. + defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>; + defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>; + defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>; + defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>; + + defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul", + PatFrag<(ops node:$lhs, node:$rhs), + (fneg (fmul node:$lhs, node:$rhs))> >; +} + +defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>; +defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>; + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (3 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: FMADD, FMSUB, FNMADD, FNMSUB + +def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma (fneg node:$Rn), node:$Rm, node:$Ra)>; +def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma node:$Rn, node:$Rm, (fneg node:$Ra))>; +def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>; + +class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT, + bits<2> type, bit o1, bit o0, SDPatternOperator fmakind> + : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd), + (ins FPR:$Rn, FPR:$Rm, FPR:$Ra), + !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"), + [(set FPR:$Rd, (fmakind (VT FPR:$Rn), FPR:$Rm, FPR:$Ra))], + NoItinerary>; + +def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>; +def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>; +def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>; +def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>; + +def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>; +def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>; +def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>; +def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; + +//===----------------------------------------------------------------------===// +// Floating-point <-> fixed-point conversion instructions +//===----------------------------------------------------------------------===// +// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF + +// #1-#32 allowed, encoded as "64 - <specified imm> +def fixedpos_asmoperand_i32 : AsmOperandClass +{ + let Name = "CVTFixedPos32"; + let RenderMethod = "addCVTFixedPosOperands"; + let PredicateMethod = "isCVTFixedPos<32>"; +} + +// Also encoded as "64 - <specified imm>" but #1-#64 allowed. +def fixedpos_asmoperand_i64 : AsmOperandClass +{ + let Name = "CVTFixedPos64"; + let RenderMethod = "addCVTFixedPosOperands"; + let PredicateMethod = "isCVTFixedPos<64>"; +} + +// We need the cartesian product of f32/f64 i32/i64 operands for +// conversions: +// + Selection needs to use operands of correct floating type +// + Assembly parsing and decoding depend on integer width +class cvtfix_i32_op<ValueType FloatVT> + : Operand<FloatVT>, + ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> +{ + let ParserMatchClass = fixedpos_asmoperand_i32; + let DecoderMethod = "DecodeCVT32FixedPosOperand"; + let PrintMethod = "printCVTFixedPosOperand"; +} + +class cvtfix_i64_op<ValueType FloatVT> + : Operand<FloatVT>, + ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> +{ + let ParserMatchClass = fixedpos_asmoperand_i64; + let PrintMethod = "printCVTFixedPosOperand"; +} + +// Because of the proliferation of weird operands, it's not really +// worth going for a multiclass here. Oh well. + +class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode, + RegisterClass GPR, RegisterClass FPR, Operand scale_op, + string asmop, SDNode cvtop> + : A64I_fpfixed<sf, 0b0, type, 0b11, opcode, + (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale), + !strconcat(asmop, "\t$Rd, $Rn, $Scale"), + [(set GPR:$Rd, (cvtop (fmul FPR:$Rn, scale_op:$Scale)))], + NoItinerary>; + +def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, + cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>; +def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, + cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>; +def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, + cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>; +def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, + cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>; + +def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, + cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>; +def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, + cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>; +def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, + cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>; +def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, + cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>; + + +class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode, + RegisterClass FPR, RegisterClass GPR, Operand scale_op, + string asmop, SDNode cvtop> + : A64I_fpfixed<sf, 0b0, type, 0b00, opcode, + (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale), + !strconcat(asmop, "\t$Rd, $Rn, $Scale"), + [(set FPR:$Rd, (fdiv (cvtop GPR:$Rn), scale_op:$Scale))], + NoItinerary>; + +def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, + cvtfix_i32_op<f32>, "scvtf", sint_to_fp>; +def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, + cvtfix_i64_op<f32>, "scvtf", sint_to_fp>; +def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, + cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>; +def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, + cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>; +def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, + cvtfix_i32_op<f64>, "scvtf", sint_to_fp>; +def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, + cvtfix_i64_op<f64>, "scvtf", sint_to_fp>; +def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, + cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>; +def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, + cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>; + +//===----------------------------------------------------------------------===// +// Floating-point <-> integer conversion instructions +//===----------------------------------------------------------------------===// +// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF + +class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode, + RegisterClass DestPR, RegisterClass SrcPR, string asmop> + : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn), + !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>; + +multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> +{ + def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, GPR32, FPR32, asmop # "s">; + def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0}, GPR64, FPR32, asmop # "s">; + def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1}, GPR32, FPR32, asmop # "u">; + def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1}, GPR64, FPR32, asmop # "u">; + + def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0}, GPR32, FPR64, asmop # "s">; + def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0}, GPR64, FPR64, asmop # "s">; + def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1}, GPR32, FPR64, asmop # "u">; + def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1}, GPR64, FPR64, asmop # "u">; +} + +defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">; +defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">; +defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">; +defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">; +defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">; + +def : Pat<(i32 (fp_to_sint FPR32:$Rn)), (FCVTZSws FPR32:$Rn)>; +def : Pat<(i64 (fp_to_sint FPR32:$Rn)), (FCVTZSxs FPR32:$Rn)>; +def : Pat<(i32 (fp_to_uint FPR32:$Rn)), (FCVTZUws FPR32:$Rn)>; +def : Pat<(i64 (fp_to_uint FPR32:$Rn)), (FCVTZUxs FPR32:$Rn)>; +def : Pat<(i32 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSwd FPR64:$Rn)>; +def : Pat<(i64 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSxd FPR64:$Rn)>; +def : Pat<(i32 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUwd FPR64:$Rn)>; +def : Pat<(i64 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUxd FPR64:$Rn)>; + +multiclass A64I_inttofp<bit o0, string asmop> +{ + def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>; + def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>; + def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>; + def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>; +} + +defm S : A64I_inttofp<0b0, "scvtf">; +defm U : A64I_inttofp<0b1, "ucvtf">; + +def : Pat<(f32 (sint_to_fp GPR32:$Rn)), (SCVTFsw GPR32:$Rn)>; +def : Pat<(f32 (sint_to_fp GPR64:$Rn)), (SCVTFsx GPR64:$Rn)>; +def : Pat<(f64 (sint_to_fp GPR32:$Rn)), (SCVTFdw GPR32:$Rn)>; +def : Pat<(f64 (sint_to_fp GPR64:$Rn)), (SCVTFdx GPR64:$Rn)>; +def : Pat<(f32 (uint_to_fp GPR32:$Rn)), (UCVTFsw GPR32:$Rn)>; +def : Pat<(f32 (uint_to_fp GPR64:$Rn)), (UCVTFsx GPR64:$Rn)>; +def : Pat<(f64 (uint_to_fp GPR32:$Rn)), (UCVTFdw GPR32:$Rn)>; +def : Pat<(f64 (uint_to_fp GPR64:$Rn)), (UCVTFdx GPR64:$Rn)>; + +def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">; +def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">; +def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">; +def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">; + +def : Pat<(i32 (bitconvert (f32 FPR32:$Rn))), (FMOVws FPR32:$Rn)>; +def : Pat<(f32 (bitconvert (i32 GPR32:$Rn))), (FMOVsw GPR32:$Rn)>; +def : Pat<(i64 (bitconvert (f64 FPR64:$Rn))), (FMOVxd FPR64:$Rn)>; +def : Pat<(f64 (bitconvert (i64 GPR64:$Rn))), (FMOVdx GPR64:$Rn)>; + +def lane1_asmoperand : AsmOperandClass +{ + let Name = "Lane1"; + let RenderMethod = "addImmOperands"; +} + +def lane1 : Operand<i32> +{ + let ParserMatchClass = lane1_asmoperand; + let PrintMethod = "printBareImmOperand"; +} + +let DecoderMethod = "DecodeFMOVLaneInstruction" in +{ + def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110, + (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane), + "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>; + + def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111, + (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane), + "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>; +} + +def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]", + (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>; + +def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", + (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>; + +//===----------------------------------------------------------------------===// +// Floating-point immediate instructions +//===----------------------------------------------------------------------===// +// Contains: FMOV + +def fpimm_asmoperand : AsmOperandClass +{ + let Name = "FMOVImm"; + let ParserMethod = "ParseFPImmOperand"; +} + +// The MCOperand for these instructions are the encoded 8-bit values. +def SDXF_fpimm : SDNodeXForm<fpimm, [{ + uint32_t Imm8; + A64Imms::isFPImm(N->getValueAPF(), Imm8); + return CurDAG->getTargetConstant(Imm8, MVT::i32); +}]>; + +class fmov_operand<ValueType FT> + : Operand<i32>, + PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }], + SDXF_fpimm> +{ + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = fpimm_asmoperand; +} + +def fmov32_operand : fmov_operand<f32>; +def fmov64_operand : fmov_operand<f64>; + +class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT, + Operand fmov_operand> + : A64I_fpimm<0b0, 0b0, type, 0b00000, + (outs Reg:$Rd), + (ins fmov_operand:$Imm8), + "fmov\t$Rd, $Imm8", + [(set (VT Reg:$Rd), fmov_operand:$Imm8)], + NoItinerary>; + +def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>; +def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>; + +//===----------------------------------------------------------------------===// +// Load-register (literal) instructions +//===----------------------------------------------------------------------===// +// Contains: LDR, LDRSW, PRFM + +def ldrlit_label_asmoperand : AsmOperandClass +{ + let Name = "LoadLitLabel"; + let RenderMethod = "addLabelOperands<19, 4>"; +} + +def ldrlit_label : Operand<i64> +{ + let EncoderMethod = "getLoadLitLabelOpValue"; + + // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<19, 4>"; + let ParserMatchClass = ldrlit_label_asmoperand; + let OperandType = "OPERAND_PCREL"; +} + +// Various instructions take an immediate value (which can always be used), +// where some numbers have a symbolic name to make things easier. These operands +// and the associated functions abstract away the differences. +multiclass namedimm<string prefix, string mapper> +{ + def _asmoperand : AsmOperandClass + { + let Name = "NamedImm" # prefix; + let PredicateMethod = "isUImm"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "ParseNamedImmOperand<" # mapper # ">"; + } + + def _op : Operand<i32> + { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); + let PrintMethod = "printNamedImmOperand<" # mapper # ">"; + let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">"; + } +} + +defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">; + +class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg, + list<dag> patterns = []> + : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19), + "ldr\t$Rt, $Imm19", patterns, NoItinerary>; + +let mayLoad = 1 in +{ + def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; + def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; +} + +def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32, + [(set (f32 FPR32:$Rt), (load constpool:$Imm19))]>; +def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64, + [(set (f64 FPR64:$Rt), (load constpool:$Imm19))]>; + +let mayLoad = 1 in +{ + def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; + + + def LDRSWx_lit : A64I_LDRlit<0b10, 0b0, + (outs GPR64:$Rt), + (ins ldrlit_label:$Imm19), + "ldrsw\t$Rt, $Imm19", + [], NoItinerary>; + + def PRFM_lit : A64I_LDRlit<0b11, 0b0, + (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19), + "prfm\t$Rt, $Imm19", + [], NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// Load-store exclusive instructions +//===----------------------------------------------------------------------===// +// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB, +// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB, +// STLRH, STLR, LDARB, LDARH, LDAR + +// Since these instructions have the undefined register bits set to 1 in +// their canonical form, we need a post encoder method to set those bits +// to 1 when encoding these instructions. We do this using the +// fixLoadStoreExclusive function. This function has template parameters: +// +// fixLoadStoreExclusive<int hasRs, int hasRt2> +// +// hasRs indicates that the instruction uses the Rs field, so we won't set +// it to 1 (and the same for Rt2). We don't need template parameters for +// the other register fiels since Rt and Rn are always used. + +// This operand parses a GPR64xsp register, followed by an optional immediate +// #0. +def GPR64xsp0_asmoperand : AsmOperandClass +{ + let Name = "GPR64xsp0"; + let PredicateMethod = "isWrappedReg"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "ParseLSXAddressOperand"; +} + +def GPR64xsp0 : RegisterOperand<GPR64xsp> +{ + let ParserMatchClass = GPR64xsp0_asmoperand; +} + +//===---------------------------------- +// Store-exclusive (releasing & normal) +//===---------------------------------- + +class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_stn <size, + opcode{2}, 0, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rs, $Rt, [$Rn]"), + pat, itin> { + let mayStore = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; +} + +multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> { + def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; + + def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [],NoItinerary>; + + def _word: A64I_SRexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_SRexs_impl<0b11, opcode, asmstr, + (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm STXR : A64I_SRex<"stxr", 0b000, "STXR">; +defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">; + +//===---------------------------------- +// Loads +//===---------------------------------- + +class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_tn <size, + opcode{2}, 1, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rt, [$Rn]"), + pat, itin> { + let mayLoad = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +multiclass A64I_LRex<string asmstr, bits<3> opcode> { + def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _word: A64I_LRexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_LRexs_impl<0b11, opcode, asmstr, + (outs GPR64:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm LDXR : A64I_LRex<"ldxr", 0b000>; +defm LDAXR : A64I_LRex<"ldaxr", 0b001>; +defm LDAR : A64I_LRex<"ldar", 0b101>; + +class acquiring_load<PatFrag base> + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + return cast<AtomicSDNode>(N)->getOrdering() == Acquire; +}]>; + +def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; +def atomic_load_acquire_16 : acquiring_load<atomic_load_16>; +def atomic_load_acquire_32 : acquiring_load<atomic_load_32>; +def atomic_load_acquire_64 : acquiring_load<atomic_load_64>; + +def : Pat<(atomic_load_acquire_8 GPR64xsp:$Rn), (LDAR_byte GPR64xsp0:$Rn)>; +def : Pat<(atomic_load_acquire_16 GPR64xsp:$Rn), (LDAR_hword GPR64xsp0:$Rn)>; +def : Pat<(atomic_load_acquire_32 GPR64xsp:$Rn), (LDAR_word GPR64xsp0:$Rn)>; +def : Pat<(atomic_load_acquire_64 GPR64xsp:$Rn), (LDAR_dword GPR64xsp0:$Rn)>; + +//===---------------------------------- +// Store-release (no exclusivity) +//===---------------------------------- + +class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_tn <size, + opcode{2}, 0, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rt, [$Rn]"), + pat, itin> { + let mayStore = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +class releasing_store<PatFrag base> + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + return cast<AtomicSDNode>(N)->getOrdering() == Release; +}]>; + +def atomic_store_release_8 : releasing_store<atomic_store_8>; +def atomic_store_release_16 : releasing_store<atomic_store_16>; +def atomic_store_release_32 : releasing_store<atomic_store_32>; +def atomic_store_release_64 : releasing_store<atomic_store_64>; + +multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> { + def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_8 GPR64xsp0:$Rn, GPR32:$Rt)], + NoItinerary>; + + def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_16 GPR64xsp0:$Rn, GPR32:$Rt)], + NoItinerary>; + + def _word: A64I_SLexs_impl<0b10, opcode, asmstr, + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_32 GPR64xsp0:$Rn, GPR32:$Rt)], + NoItinerary>; + + def _dword: A64I_SLexs_impl<0b11, opcode, asmstr, + (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_64 GPR64xsp0:$Rn, GPR64:$Rt)], + NoItinerary>; +} + +defm STLR : A64I_SLex<"stlr", 0b101, "STLR">; + +//===---------------------------------- +// Store-exclusive pair (releasing & normal) +//===---------------------------------- + +class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_stt2n <size, + opcode{2}, 0, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"), + pat, itin> +{ + let mayStore = 1; +} + + +multiclass A64I_SPex<string asmstr, bits<3> opcode> { + def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs), + (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2, + GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs), + (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2, + GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm STXP : A64I_SPex<"stxp", 0b010>; +defm STLXP : A64I_SPex<"stlxp", 0b011>; + +//===---------------------------------- +// Load-exclusive pair (acquiring & normal) +//===---------------------------------- + +class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_tt2n <size, + opcode{2}, 1, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"), + pat, itin>{ + let mayLoad = 1; + let DecoderMethod = "DecodeLoadPairExclusiveInstruction"; + let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; +} + +multiclass A64I_LPex<string asmstr, bits<3> opcode> { + def _word: A64I_LPexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rt, GPR32:$Rt2), + (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_LPexs_impl<0b11, opcode, asmstr, + (outs GPR64:$Rt, GPR64:$Rt2), + (ins GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm LDXP : A64I_LPex<"ldxp", 0b010>; +defm LDAXP : A64I_LPex<"ldaxp", 0b011>; + +//===----------------------------------------------------------------------===// +// Load-store register (unscaled immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (register offset) instructions +//===----------------------------------------------------------------------===// +// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (unsigned immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (immediate post-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (immediate pre-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW + +// Note that patterns are much later on in a completely separate section (they +// need ADRPxi to be defined). + +//===------------------------------- +// 1. Various operands needed +//===------------------------------- + +//===------------------------------- +// 1.1 Unsigned 12-bit immediate operands +//===------------------------------- +// The addressing mode for these instructions consists of an unsigned 12-bit +// immediate which is scaled by the size of the memory access. +// +// We represent this in the MC layer by two operands: +// 1. A base register. +// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]" +// would have '1' in this field. +// This means that separate functions are needed for converting representations +// which *are* aware of the intended access size. + +// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to +// know the access size via some means. An isolated operand does not have this +// information unless told from here, which means we need separate tablegen +// Operands for each access size. This multiclass takes care of instantiating +// the correct template functions in the rest of the backend. + +//===------------------------------- +// 1.1 Unsigned 12-bit immediate operands +//===------------------------------- + +multiclass offsets_uimm12<int MemSize, string prefix> +{ + def uimm12_asmoperand : AsmOperandClass + { + let Name = "OffsetUImm12_" # MemSize; + let PredicateMethod = "isOffsetUImm12<" # MemSize # ">"; + let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">"; + } + + // Pattern is really no more than an ImmLeaf, but predicated on MemSize which + // complicates things beyond TableGen's ken. + def uimm12 : Operand<i64>, + ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> + { + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # uimm12_asmoperand); + + let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">"; + let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">"; + } +} + +defm byte_ : offsets_uimm12<1, "byte_">; +defm hword_ : offsets_uimm12<2, "hword_">; +defm word_ : offsets_uimm12<4, "word_">; +defm dword_ : offsets_uimm12<8, "dword_">; +defm qword_ : offsets_uimm12<16, "qword_">; + +//===------------------------------- +// 1.1 Signed 9-bit immediate operands +//===------------------------------- + +// The MCInst is expected to store the bit-wise encoding of the value, +// which amounts to lopping off the extended sign bits. +def SDXF_simm9 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32); +}]>; + +def simm9_asmoperand : AsmOperandClass +{ + let Name = "SImm9"; + let PredicateMethod = "isSImm<9>"; + let RenderMethod = "addSImmOperands<9>"; +} + +def simm9 : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }], + SDXF_simm9> +{ + let PrintMethod = "printOffsetSImm9Operand"; + let ParserMatchClass = simm9_asmoperand; +} + + +//===------------------------------- +// 1.3 Register offset extensions +//===------------------------------- + +// The assembly-syntax for these addressing-modes is: +// [<Xn|SP>, <R><m> {, <extend> {<amount>}}] +// +// The essential semantics are: +// + <amount> is a shift: #<log(transfer size)> or #0 +// + <R> can be W or X. +// + If <R> is W, <extend> can be UXTW or SXTW +// + If <R> is X, <extend> can be LSL or SXTX +// +// The trickiest of those constraints is that Rm can be either GPR32 or GPR64, +// which will need separate instructions for LLVM type-consistency. We'll also +// need separate operands, of course. +multiclass regexts<int MemSize, int RmSize, RegisterClass GPR, + string Rm, string prefix> +{ + def regext_asmoperand : AsmOperandClass + { + let Name = "AddrRegExtend_" # MemSize # "_" # Rm; + let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">"; + let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">"; + } + + def regext : Operand<i64> + { + let PrintMethod + = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">"; + + let DecoderMethod = "DecodeAddrRegExtendOperand"; + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # regext_asmoperand); + } +} + +multiclass regexts_wx<int MemSize, string prefix> +{ + // Rm is an X-register if LSL or SXTX are specified as the shift. + defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">; + + // Rm is a W-register if UXTW or SXTW are specified as the shift. + defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">; +} + +defm byte_ : regexts_wx<1, "byte_">; +defm hword_ : regexts_wx<2, "hword_">; +defm word_ : regexts_wx<4, "word_">; +defm dword_ : regexts_wx<8, "dword_">; +defm qword_ : regexts_wx<16, "qword_">; + + +//===------------------------------ +// 2. The instructions themselves. +//===------------------------------ + +// We have the following instructions to implement: +// | | B | H | W | X | +// |-----------------+-------+-------+-------+--------| +// | unsigned str | STRB | STRH | STR | STR | +// | unsigned ldr | LDRB | LDRH | LDR | LDR | +// | signed ldr to W | LDRSB | LDRSH | - | - | +// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) | + +// This will instantiate the LDR/STR instructions you'd expect to use for an +// unsigned datatype (first two rows above) or floating-point register, which is +// reasonably uniform across all access sizes. + + +//===------------------------------ +// 2.1 Regular instructions +//===------------------------------ + +// This class covers the basic unsigned or irrelevantly-signed loads and stores, +// to general-purpose and floating-point registers. + +class AddrParams<string prefix> +{ + Operand uimm12 = !cast<Operand>(prefix # "_uimm12"); + + Operand regextWm = !cast<Operand>(prefix # "_Wm_regext"); + Operand regextXm = !cast<Operand>(prefix # "_Xm_regext"); +} + +def byte_addrparams : AddrParams<"byte">; +def hword_addrparams : AddrParams<"hword">; +def word_addrparams : AddrParams<"word">; +def dword_addrparams : AddrParams<"dword">; +def qword_addrparams : AddrParams<"qword">; + +multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v, + bit high_opc, string asmsuffix, + RegisterClass GPR, AddrParams params> +{ + // Unsigned immediate + def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0}, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12), + "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> + { + let mayStore = 1; + } + def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1}, + (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> + { + let mayLoad = 1; + } + def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + // Register offset (four of these: load/store and Wm/Xm). + let mayLoad = 1 in + { + def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0, + (outs GPR:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1, + (outs GPR:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + } + def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + let mayStore = 1 in + { + def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm, + params.regextWm:$Ext), + "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm, + params.regextXm:$Ext), + "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + } + def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + // Unaligned immediate + def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0}, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> + { + let mayStore = 1; + } + def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1}, + (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> + { + let mayLoad = 1; + } + def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + // Post-indexed + def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0}, + (outs GPR64xsp:$Rn_wb), + (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> + { + let Constraints = "$Rn = $Rn_wb"; + let mayStore = 1; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1}, + (outs GPR:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> + { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + // Pre-indexed + def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0}, + (outs GPR64xsp:$Rn_wb), + (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> + { + let Constraints = "$Rn = $Rn_wb"; + let mayStore = 1; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1}, + (outs GPR:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> + { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + +} + +// STRB/LDRB: First define the instructions +defm LS8 + : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>; + +// STRH/LDRH +defm LS16 + : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>; + + +// STR/LDR to/from a W register +defm LS32 + : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>; + +// STR/LDR to/from an X register +defm LS64 + : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>; + +// STR/LDR to/from a B register +defm LSFP8 + : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>; + +// STR/LDR to/from an H register +defm LSFP16 + : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>; + +// STR/LDR to/from an S register +defm LSFP32 + : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>; +// STR/LDR to/from a D register +defm LSFP64 + : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>; +// STR/LDR to/from a Q register +defm LSFP128 + : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, qword_addrparams>; + +//===------------------------------ +// 2.3 Signed loads +//===------------------------------ + +// Byte and half-word signed loads can both go into either an X or a W register, +// so it's worth factoring out. Signed word loads don't fit because there is no +// W version. +multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params, + string prefix> +{ + // Unsigned offset + def w : A64I_LSunsigimm<size, 0b0, 0b11, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, params.uimm12:$UImm12), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> + { + let mayLoad = 1; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>; + + def x : A64I_LSunsigimm<size, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, params.uimm12:$UImm12), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> + { + let mayLoad = 1; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>; + + // Register offset + let mayLoad = 1 in + { + def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + + let mayLoad = 1 in + { + // Unaligned offset + def w_U : A64I_LSunalimm<size, 0b0, 0b11, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + + def x_U : A64I_LSunalimm<size, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + + + // Post-indexed + def w_PostInd : A64I_LSpostind<size, 0b0, 0b11, + (outs GPR32:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> + { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def x_PostInd : A64I_LSpostind<size, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> + { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + // Pre-indexed + def w_PreInd : A64I_LSpreind<size, 0b0, 0b11, + (outs GPR32:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> + { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def x_PreInd : A64I_LSpreind<size, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> + { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + } // let mayLoad = 1 +} + +// LDRSB +defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">; +// LDRSH +defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">; + +// LDRSW: load a 32-bit register, sign-extending to 64-bits. +def LDRSWx + : A64I_LSunsigimm<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, word_uimm12:$UImm12), + "ldrsw\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> +{ + let mayLoad = 1; +} +def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +let mayLoad = 1 in +{ + def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext), + "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext), + "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; +} +def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]", + (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>; + + +def LDURSWx + : A64I_LSunalimm<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldursw\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> +{ + let mayLoad = 1; +} +def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +def LDRSWx_PostInd + : A64I_LSpostind<0b10, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrsw\t$Rt, [$Rn], $SImm9", + [], NoItinerary> +{ + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; +} + +def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrsw\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> +{ + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; +} + +//===------------------------------ +// 2.4 Prefetch operations +//===------------------------------ + +def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12), + "prfm\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> +{ + let mayLoad = 1; +} +def : InstAlias<"prfm $Rt, [$Rn]", + (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; + +let mayLoad = 1 in +{ + def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, + GPR32:$Rm, dword_Wm_regext:$Ext), + "prfm\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, dword_Xm_regext:$Ext), + "prfm\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; +} + +def : InstAlias<"prfm $Rt, [$Rn, $Rm]", + (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + +def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "prfum\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> +{ + let mayLoad = 1; +} +def : InstAlias<"prfum $Rt, [$Rn]", + (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; + +//===----------------------------------------------------------------------===// +// Load-store register (unprivileged) instructions +//===----------------------------------------------------------------------===// +// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH + +// These instructions very much mirror the "unscaled immediate" loads, but since +// there are no floating-point variants we need to split them out into their own +// section to avoid instantiation of "ldtr d0, [sp]" etc. + +multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR, + string prefix> +{ + def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> + { + let mayStore = 1; + } + + def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01, + (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> + { + let mayLoad = 1; + } + + def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + +} + +// STTRB/LDTRB: First define the instructions +defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">; + +// STTRH/LDTRH +defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">; + +// STTR/LDTR to/from a W register +defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">; + +// STTR/LDTR to/from an X register +defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">; + +// Now a class for the signed instructions that can go to either 32 or 64 +// bits... +multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> +{ + let mayLoad = 1 in + { + def w : A64I_LSunpriv<size, 0b0, 0b11, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + + def x : A64I_LSunpriv<size, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + } + + def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>; + + def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +} + +// LDTRSB +defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">; +// LDTRSH +defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">; + +// And finally LDTRSW which only goes to 64 bits. +def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtrsw\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> +{ + let mayLoad = 1; +} +def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +//===----------------------------------------------------------------------===// +// Load-store register pair (offset) instructions +//===----------------------------------------------------------------------===// +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register pair (post-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STP, LDP, LDPSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register pair (pre-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STP, LDP, LDPSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store non-temporal register pair (offset) instructions +//===----------------------------------------------------------------------===// +// Contains: STNP, LDNP + + +// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to +// know the access size via some means. An isolated operand does not have this +// information unless told from here, which means we need separate tablegen +// Operands for each access size. This multiclass takes care of instantiating +// the correct template functions in the rest of the backend. + +multiclass offsets_simm7<string MemSize, string prefix> +{ + // The bare signed 7-bit immediate is used in post-indexed instructions, but + // because of the scaling performed a generic "simm7" operand isn't + // appropriate here either. + def simm7_asmoperand : AsmOperandClass + { + let Name = "SImm7_Scaled" # MemSize; + let PredicateMethod = "isSImm7Scaled<" # MemSize # ">"; + let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">"; + } + + def simm7 : Operand<i64> + { + let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">"; + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand"); + } +} + +defm word_ : offsets_simm7<"4", "word_">; +defm dword_ : offsets_simm7<"8", "dword_">; +defm qword_ : offsets_simm7<"16", "qword_">; + +multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg, + Operand simm7, string prefix> +{ + def _STR : A64I_LSPoffset<opc, v, 0b0, (outs), + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> + { + let mayStore = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"stp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _LDR : A64I_LSPoffset<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> + { + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"ldp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0, + (outs GPR64xsp:$Rn_wb), + (ins SomeReg:$Rt, SomeReg:$Rt2, + GPR64xsp:$Rn, + simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn], $SImm7", + [], NoItinerary> + { + let mayStore = 1; + let Constraints = "$Rn = $Rn_wb"; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldp\t$Rt, $Rt2, [$Rn], $SImm7", + [], NoItinerary> + { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb), + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary> + { + let mayStore = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary> + { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs), + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> + { + let mayStore = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"stnp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> + { + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + +} + + +defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">; +defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">; +defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">; +defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">; +defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, "LSFPPair128">; + + +def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> +{ + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} +def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]", + (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>; + +def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7", + [], NoItinerary> +{ + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} + +def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary> +{ + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} + +//===----------------------------------------------------------------------===// +// Logical (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV + +multiclass logical_imm_operands<string prefix, string note, + int size, ValueType VT> +{ + def _asmoperand : AsmOperandClass + { + let Name = "LogicalImm" # note # size; + let PredicateMethod = "isLogicalImm" # note # "<" # size # ">"; + let RenderMethod = "addLogicalImmOperands<" # size # ">"; + } + + def _operand + : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> + { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); + let PrintMethod = "printLogicalImmOperand<" # size # ">"; + let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">"; + } +} + +defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>; +defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>; + +// The mov versions only differ in assembly parsing, where they +// exclude values representable with either MOVZ or MOVN. +defm logical_imm32_mov + : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>; +defm logical_imm64_mov + : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>; + + +multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> +{ + def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd), + (ins GPR32:$Rn, logical_imm32_operand:$Imm), + !strconcat(asmop, "\t$Rd, $Rn, $Imm"), + [(set GPR32wsp:$Rd, + (opnode GPR32:$Rn, logical_imm32_operand:$Imm))], + NoItinerary>; + + def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd), + (ins GPR64:$Rn, logical_imm64_operand:$Imm), + !strconcat(asmop, "\t$Rd, $Rn, $Imm"), + [(set GPR64xsp:$Rd, + (opnode GPR64:$Rn, logical_imm64_operand:$Imm))], + NoItinerary>; +} + +defm AND : A64I_logimmSizes<0b00, "and", and>; +defm ORR : A64I_logimmSizes<0b01, "orr", or>; +defm EOR : A64I_logimmSizes<0b10, "eor", xor>; + +let Defs = [NZCV] in +{ + def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd), + (ins GPR32:$Rn, logical_imm32_operand:$Imm), + "ands\t$Rd, $Rn, $Imm", + [], NoItinerary>; + + def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd), + (ins GPR64:$Rn, logical_imm64_operand:$Imm), + "ands\t$Rd, $Rn, $Imm", + [], NoItinerary>; +} + + +def : InstAlias<"tst $Rn, $Imm", + (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>; +def : InstAlias<"tst $Rn, $Imm", + (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>; +def : InstAlias<"mov $Rd, $Imm", + (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>; +def : InstAlias<"mov $Rd, $Imm", + (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>; + +//===----------------------------------------------------------------------===// +// Logical (shifted register) instructions +//===----------------------------------------------------------------------===// +// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV + +// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS" +// behaves differently for unsigned comparisons, so we defensively only allow +// signed or n/a as the operand. In practice "unsigned greater than 0" is "not +// equal to 0" and LLVM gives us this. +def signed_cond : PatLeaf<(cond), [{ + return !isUnsignedIntSetCC(N->get()); +}]>; + + +// These instructions share their "shift" operands with add/sub (shifted +// register instructions). They are defined there. + +// N.b. the commutable parameter is just !N. It will be first against the wall +// when the revolution comes. +multiclass logical_shifts<string prefix, bit sf, bits<2> opc, + bit N, bit commutable, + string asmop, SDPatternOperator opfrag, string sty, + RegisterClass GPR, list<Register> defs> +{ + let isCommutable = commutable, Defs = defs in { + def _lsl : A64I_logicalshift<sf, opc, 0b00, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_logicalshift<sf, opc, 0b01, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_logicalshift<sf, opc, 0b10, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _ror : A64I_logicalshift<sf, opc, 0b11, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("ror_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (rotr GPR:$Rm, + !cast<Operand>("ror_operand_" # sty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn, + GPR:$Rm, 0)>; + + def : Pat<(opfrag GPR:$Rn, GPR:$Rm), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable, + string asmop, SDPatternOperator opfrag, + list<Register> defs> +{ + defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N, + commutable, asmop, opfrag, "i64", GPR64, defs>; + defm www : logical_shifts<prefix # "www", 0b0, opc, N, + commutable, asmop, opfrag, "i32", GPR32, defs>; +} + + +defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>; +defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>; +defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>; +defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands", + PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), + [{ (void)N; return false; }]>, + [NZCV]>; + +defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic", + PatFrag<(ops node:$lhs, node:$rhs), + (and node:$lhs, (not node:$rhs))>, []>; +defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn", + PatFrag<(ops node:$lhs, node:$rhs), + (or node:$lhs, (not node:$rhs))>, []>; +defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon", + PatFrag<(ops node:$lhs, node:$rhs), + (xor node:$lhs, (not node:$rhs))>, []>; +defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics", + PatFrag<(ops node:$lhs, node:$rhs), + (and node:$lhs, (not node:$rhs)), + [{ (void)N; return false; }]>, + [NZCV]>; + +multiclass tst_shifts<string prefix, bit sf, string sty, RegisterClass GPR> +{ + let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in { + def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (shl GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + + def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (srl GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (sra GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("ror_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (rotr GPR:$Rm, + !cast<Operand>("ror_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + } + + def _noshift : InstAlias<"tst $Rn, $Rm", + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(A64setcc (and GPR:$Rn, GPR:$Rm), 0, signed_cond), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +defm TSTxx : tst_shifts<"TSTxx", 0b1, "i64", GPR64>; +defm TSTww : tst_shifts<"TSTww", 0b0, "i32", GPR32>; + + +multiclass mvn_shifts<string prefix, bit sf, string sty, RegisterClass GPR> +{ + let isCommutable = 0, Rn = 0b11111 in { + def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (shl GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)))], + NoItinerary>; + + + def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (srl GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6)))], + NoItinerary>; + + def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (sra GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6)))], + NoItinerary>; + + def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("ror_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (rotr GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)))], + NoItinerary>; + } + + def _noshift : InstAlias<"mvn $Rn, $Rm", + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(not GPR:$Rm), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rm, 0)>; +} + +defm MVNxx : mvn_shifts<"MVNxx", 0b1, "i64", GPR64>; +defm MVNww : mvn_shifts<"MVNww", 0b0, "i32", GPR32>; + +def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; +def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; + +//===----------------------------------------------------------------------===// +// Move wide (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: MOVN, MOVZ, MOVK + MOV aliases + +// A wide variety of different relocations are needed for variants of these +// instructions, so it turns out that we need a different operand for all of +// them. +multiclass movw_operands<string prefix, string instname, int width> +{ + def _imm_asmoperand : AsmOperandClass + { + let Name = instname # width # "Shifted" # shift; + let PredicateMethod = "is" # instname # width # "Imm"; + let RenderMethod = "addMoveWideImmOperands"; + + let ParserMethod = "ParseImmWithLSLOperand"; + } + + def _imm : Operand<i32> + { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand"); + let PrintMethod = "printMoveWideImmOperand"; + let EncoderMethod = "getMoveWideImmOpValue"; + let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">"; + + let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); + } +} + +defm movn32 : movw_operands<"movn32", "MOVN", 32>; +defm movn64 : movw_operands<"movn64", "MOVN", 64>; +defm movz32 : movw_operands<"movz32", "MOVZ", 32>; +defm movz64 : movw_operands<"movz64", "MOVZ", 64>; +defm movk32 : movw_operands<"movk32", "MOVK", 32>; +defm movk64 : movw_operands<"movk64", "MOVK", 64>; + +multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit, dag ins64bit> +{ + + def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit, + !strconcat(asmop, "\t$Rd, $FullImm"), + [], NoItinerary> + { + bits<18> FullImm; + let UImm16 = FullImm{15-0}; + let Shift = FullImm{17-16}; + } + + def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit, + !strconcat(asmop, "\t$Rd, $FullImm"), + [], NoItinerary> + { + bits<18> FullImm; + let UImm16 = FullImm{15-0}; + let Shift = FullImm{17-16}; + } +} + +let isMoveImm = 1, isReMaterializable = 1, + isAsCheapAsAMove = 1, neverHasSideEffects = 1 in +{ + defm MOVN : A64I_movwSizes<0b00, "movn", + (ins movn32_imm:$FullImm), + (ins movn64_imm:$FullImm)>; + + // Some relocations are able to convert between a MOVZ and a MOVN. If these + // are applied the instruction must be emitted with the corresponding bits as + // 0, which means a MOVZ needs to override that bit from the default. + let PostEncoderMethod = "fixMOVZ" in + defm MOVZ : A64I_movwSizes<0b10, "movz", + (ins movz32_imm:$FullImm), + (ins movz64_imm:$FullImm)>; +} + +let Constraints = "$src = $Rd" in +defm MOVK : A64I_movwSizes<0b11, "movk", + (ins GPR32:$src, movk32_imm:$FullImm), + (ins GPR64:$src, movk64_imm:$FullImm)>; + + +// And now the "MOV" aliases. These also need their own operands because what +// they accept is completely different to what the base instructions accept. +multiclass movalias_operand<string prefix, string basename, + string immpredicate, int width> +{ + def _asmoperand : AsmOperandClass + { + let Name = basename # width # "MovAlias"; + let PredicateMethod + = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">"; + let RenderMethod + = "addMoveWideMovAliasOperands<" # width # ", " + # "A64Imms::" # immpredicate # ">"; + } + + def _movimm : Operand<i32> + { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); + + let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); + } +} + +defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>; +defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>; +defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>; +defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>; + +// FIXME: these are officially canonical aliases, but TableGen is too limited to +// print them at the moment. I believe in this case an "AliasPredicate" method +// will need to be implemented. to allow it, as well as the more generally +// useful handling of non-register, non-constant operands. +class movalias<Instruction INST, RegisterClass GPR, Operand operand> + : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>; + +def : movalias<MOVZwii, GPR32, movz32_movimm>; +def : movalias<MOVZxii, GPR64, movz64_movimm>; +def : movalias<MOVNwii, GPR32, movn32_movimm>; +def : movalias<MOVNxii, GPR64, movn64_movimm>; + +//===----------------------------------------------------------------------===// +// PC-relative addressing instructions +//===----------------------------------------------------------------------===// +// Contains: ADR, ADRP + +def adr_label : Operand<i64> { + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>"; + + // This label is a 21-bit offset from PC, unscaled + let PrintMethod = "printLabelOperand<21, 1>"; + let ParserMatchClass = label_asmoperand<21, 1>; + let OperandType = "OPERAND_PCREL"; +} + +def adrp_label_asmoperand : AsmOperandClass +{ + let Name = "AdrpLabel"; + let RenderMethod = "addLabelOperands<21, 4096>"; +} + +def adrp_label : Operand<i64> +{ + let EncoderMethod = "getAdrpLabelOpValue"; + + // This label is a 21-bit offset from PC, scaled by the page-size: 4096. + let PrintMethod = "printLabelOperand<21, 4096>"; + let ParserMatchClass = adrp_label_asmoperand; + let OperandType = "OPERAND_PCREL"; +} + +let neverHasSideEffects = 1 in +{ + def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label), + "adr\t$Rd, $Label", [], NoItinerary>; + + def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label), + "adrp\t$Rd, $Label", [], NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// System instructions +//===----------------------------------------------------------------------===// +// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS +// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL + +// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values. +def uimm3_asmoperand : AsmOperandClass +{ + let Name = "UImm3"; + let PredicateMethod = "isUImm<3>"; + let RenderMethod = "addImmOperands"; +} + +def uimm3 : Operand<i32> +{ + let ParserMatchClass = uimm3_asmoperand; +} + +// The HINT alias can accept a simple unsigned 7-bit immediate. +def uimm7_asmoperand : AsmOperandClass +{ + let Name = "UImm7"; + let PredicateMethod = "isUImm<7>"; + let RenderMethod = "addImmOperands"; +} + +def uimm7 : Operand<i32> +{ + let ParserMatchClass = uimm7_asmoperand; +} + +// Multiclass namedimm is defined with the prefetch operands. Most of these fit +// into the NamedImmMapper scheme well: they either accept a named operand or +// any immediate under a particular value (which may be 0, implying no immediate +// is allowed). +defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">; +defm isb : namedimm<"isb", "A64ISB::ISBMapper">; +defm ic : namedimm<"ic", "A64IC::ICMapper">; +defm dc : namedimm<"dc", "A64DC::DCMapper">; +defm at : namedimm<"at", "A64AT::ATMapper">; +defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">; + +// However, MRS and MSR are more complicated for a few reasons: +// * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an +// implementation-defined effect +// * Most registers are shared, but some are read-only or write-only. +// * There is a variant of MSR which accepts the same register name (SPSel), but +// which would have a different encoding. + +// In principle these could be resolved in with more complicated subclasses of +// NamedImmMapper, however that imposes an overhead on other "named +// immediates". Both in concrete terms with virtual tables and in unnecessary +// abstraction. + +// The solution adopted here is to take the MRS/MSR Mappers out of the usual +// hierarchy (they're not derived from NamedImmMapper) and to add logic for +// their special situation. +def mrs_asmoperand : AsmOperandClass +{ + let Name = "MRS"; + let ParserMethod = "ParseSysRegOperand"; +} + +def mrs_op : Operand<i32> +{ + let ParserMatchClass = mrs_asmoperand; + let PrintMethod = "printMRSOperand"; + let DecoderMethod = "DecodeMRSOperand"; +} + +def msr_asmoperand : AsmOperandClass +{ + let Name = "MSRWithReg"; + + // Note that SPSel is valid for both this and the pstate operands, but with + // different immediate encodings. This is why these operands provide a string + // AArch64Operand rather than an immediate. The overlap is small enough that + // it could be resolved with hackery now, but who can say in future? + let ParserMethod = "ParseSysRegOperand"; +} + +def msr_op : Operand<i32> +{ + let ParserMatchClass = msr_asmoperand; + let PrintMethod = "printMSROperand"; + let DecoderMethod = "DecodeMSROperand"; +} + +def pstate_asmoperand : AsmOperandClass +{ + let Name = "MSRPState"; + // See comment above about parser. + let ParserMethod = "ParseSysRegOperand"; +} + +def pstate_op : Operand<i32> +{ + let ParserMatchClass = pstate_asmoperand; + let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>"; + let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>"; +} + +// When <CRn> is specified, an assembler should accept something like "C4", not +// the usual "#4" immediate. +def CRx_asmoperand : AsmOperandClass +{ + let Name = "CRx"; + let PredicateMethod = "isUImm<4>"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "ParseCRxOperand"; +} + +def CRx : Operand<i32> +{ + let ParserMatchClass = CRx_asmoperand; + let PrintMethod = "printCRxOperand"; +} + + +// Finally, we can start defining the instructions. + +// HINT is straightforward, with a few aliases. +def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7", + [], NoItinerary> +{ + bits<7> UImm7; + let CRm = UImm7{6-3}; + let Op2 = UImm7{2-0}; + + let Op0 = 0b00; + let Op1 = 0b011; + let CRn = 0b0010; + let Rt = 0b11111; +} + +def : InstAlias<"nop", (HINTi 0)>; +def : InstAlias<"yield", (HINTi 1)>; +def : InstAlias<"wfe", (HINTi 2)>; +def : InstAlias<"wfi", (HINTi 3)>; +def : InstAlias<"sev", (HINTi 4)>; +def : InstAlias<"sevl", (HINTi 5)>; + +// Quite a few instructions then follow a similar pattern of fixing common +// fields in the bitpattern, we'll define a helper-class for them. +class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2, + Operand operand, string asmop> + : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"), + [], NoItinerary> +{ + let Op0 = op0; + let Op1 = op1; + let CRn = crn; + let Op2 = op2; + let Rt = 0b11111; +} + + +def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">; +def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">; +def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">; +def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">; + +def : InstAlias<"clrex", (CLREXi 0b1111)>; +def : InstAlias<"isb", (ISBi 0b1111)>; + +// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP +// configurations at least. +def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>; + +// Any SYS bitpattern can be represented with a complex and opaque "SYS" +// instruction. +def SYSiccix : A64I_system<0b0, (outs), + (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, + uimm3:$Op2, GPR64:$Rt), + "sys\t$Op1, $CRn, $CRm, $Op2, $Rt", + [], NoItinerary> +{ + let Op0 = 0b01; +} + +// You can skip the Xt argument whether it makes sense or not for the generic +// SYS instruction. +def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2", + (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>; + + +// But many have aliases, which obviously don't fit into +class SYSalias<dag ins, string asmstring> + : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> +{ + let isAsmParserOnly = 1; + + bits<14> SysOp; + let Op0 = 0b01; + let Op1 = SysOp{13-11}; + let CRn = SysOp{10-7}; + let CRm = SysOp{6-3}; + let Op2 = SysOp{2-0}; +} + +def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">; + +def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> +{ + let Rt = 0b11111; +} + +def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">; +def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">; + +def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">; + +def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> +{ + let Rt = 0b11111; +} + + +def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt), + (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2), + "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2", + [], NoItinerary> +{ + let Op0 = 0b01; +} + +// The instructions themselves are rather simple for MSR and MRS. +def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt), + "msr\t$SysReg, $Rt", [], NoItinerary> +{ + bits<16> SysReg; + let Op0 = SysReg{15-14}; + let Op1 = SysReg{13-11}; + let CRn = SysReg{10-7}; + let CRm = SysReg{6-3}; + let Op2 = SysReg{2-0}; +} + +def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg), + "mrs\t$Rt, $SysReg", [], NoItinerary> +{ + bits<16> SysReg; + let Op0 = SysReg{15-14}; + let Op1 = SysReg{13-11}; + let CRn = SysReg{10-7}; + let CRm = SysReg{6-3}; + let Op2 = SysReg{2-0}; +} + +def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm), + "msr\t$PState, $CRm", [], NoItinerary> +{ + bits<6> PState; + + let Op0 = 0b00; + let Op1 = PState{5-3}; + let CRn = 0b0100; + let Op2 = PState{2-0}; + let Rt = 0b11111; +} + +//===----------------------------------------------------------------------===// +// Test & branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: TBZ, TBNZ + +// The bit to test is a simple unsigned 6-bit immediate in the X-register +// versions. +def uimm6 : Operand<i64> +{ + let ParserMatchClass = uimm6_asmoperand; +} + +def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>; + +def tbimm_target : Operand<OtherVT> +{ + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>"; + + // This label is a 14-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<14, 4>"; + let ParserMatchClass = label_wid14_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>; +def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>; + +// These instructions correspond to patterns involving "and" with a power of +// two, which we need to be able to select. +def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">; +def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">; + +let isBranch = 1, isTerminator = 1 in +{ + def TBZxii : A64I_TBimm<0b0, (outs), + (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), + "tbz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0), + A64eq, bb:$Label)], + NoItinerary>; + + def TBNZxii : A64I_TBimm<0b1, (outs), + (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), + "tbnz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0), + A64ne, bb:$Label)], + NoItinerary>; + + + // Note, these instructions overlap with the above 64-bit patterns. This is + // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both + // do the same thing and are both permitted assembly. They also both have + // sensible DAG patterns. + def TBZwii : A64I_TBimm<0b0, (outs), + (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), + "tbz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0), + A64eq, bb:$Label)], + NoItinerary> + { + let Imm{5} = 0b0; + } + + def TBNZwii : A64I_TBimm<0b1, (outs), + (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), + "tbnz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0), + A64ne, bb:$Label)], + NoItinerary> + { + let Imm{5} = 0b0; + } +} + +//===----------------------------------------------------------------------===// +// Unconditional branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: B, BL + +def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>; + +def bimm_target : Operand<OtherVT> +{ + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>"; + + // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<26, 4>"; + let ParserMatchClass = label_wid26_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +def blimm_target : Operand<i64> +{ + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>"; + + // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<26, 4>"; + let ParserMatchClass = label_wid26_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type> + : A64I_Bimm<op, (outs), (ins lbl_type:$Label), + !strconcat(asmop, "\t$Label"), patterns, + NoItinerary>; + +let isBranch = 1 in { + def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> + { + let isTerminator = 1; + let isBarrier = 1; + } + + def BLimm : A64I_BimmImpl<0b1, "bl", + [(AArch64Call tglobaladdr:$Label)], blimm_target> + { + let isCall = 1; + let Defs = [X30]; + } +} + +def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>; + +//===----------------------------------------------------------------------===// +// Unconditional branch (register) instructions +//===----------------------------------------------------------------------===// +// Contains: BR, BLR, RET, ERET, DRP. + +// Most of the notional opcode fields in the A64I_Breg format are fixed in A64 +// at the moment. +class A64I_BregImpl<bits<4> opc, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin = NoItinerary> + : A64I_Breg<opc, 0b11111, 0b000000, 0b00000, + outs, ins, asmstr, patterns, itin> +{ + let isBranch = 1; + let isIndirectBranch = 1; +} + +// Note that these are not marked isCall or isReturn because as far as LLVM is +// concerned they're not. "ret" is just another jump unless it has been selected +// by LLVM as the function's return. + +let isBranch = 1 in { + def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn), + "br\t$Rn", [(brind GPR64:$Rn)]> + { + let isBarrier = 1; + let isTerminator = 1; + } + + def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn), + "blr\t$Rn", [(AArch64Call GPR64:$Rn)]> + { + let isBarrier = 0; + let isCall = 1; + let Defs = [X30]; + } + + def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn), + "ret\t$Rn", []> + { + let isBarrier = 1; + let isTerminator = 1; + let isReturn = 1; + } + + // Create a separate pseudo-instruction for codegen to use so that we don't + // flag x30 as used in every function. It'll be restored before the RET by the + // epilogue if it's legitimately used. + def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> + { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; + } + + def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> + { + let Rn = 0b11111; + let isBarrier = 1; + let isTerminator = 1; + let isReturn = 1; + } + + def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> + { + let Rn = 0b11111; + let isBarrier = 1; + } +} + +def RETAlias : InstAlias<"ret", (RETx X30)>; + + +//===----------------------------------------------------------------------===// +// Address generation patterns +//===----------------------------------------------------------------------===// + +// Primary method of address generation for the small/absolute memory model is +// an ADRP/ADR pair: +// ADRP x0, some_variable +// ADD x0, x0, #:lo12:some_variable +// +// The load/store elision of the ADD is accomplished when selecting +// addressing-modes. This just mops up the cases where that doesn't work and we +// really need an address in some register. + +// This wrapper applies a LO12 modifier to the address. Otherwise we could just +// use the same address. + +class ADRP_ADD<SDNode Wrapper, SDNode addrop> + : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)), + (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>; + +def : ADRP_ADD<A64WrapperSmall, tblockaddress>; +def : ADRP_ADD<A64WrapperSmall, texternalsym>; +def : ADRP_ADD<A64WrapperSmall, tglobaladdr>; +def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>; +def : ADRP_ADD<A64WrapperSmall, tjumptable>; + +//===----------------------------------------------------------------------===// +// GOT access patterns +//===----------------------------------------------------------------------===// + +// FIXME: Wibble + +class GOTLoadSmall<SDNode addrfrag> + : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)), + (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>; + +def : GOTLoadSmall<texternalsym>; +def : GOTLoadSmall<tglobaladdr>; +def : GOTLoadSmall<tglobaltlsaddr>; + +//===----------------------------------------------------------------------===// +// Tail call handling +//===----------------------------------------------------------------------===// + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in +{ + def TC_RETURNdi + : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff), + [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>; + + def TC_RETURNxi + : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), + [(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff))]>; +} + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + Uses = [XSP] in +{ + def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [], + (Bimm bimm_target:$Label)>; + + def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [], + (BRx GPR64:$Rd)>; +} + + +def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), + (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>; + +//===----------------------------------------------------------------------===// +// Thread local storage +//===----------------------------------------------------------------------===// + +// This is a pseudo-instruction representing the ".tlsdesccall" directive in +// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the +// current location. It should always be immediately followed by a BLR +// instruction, and is intended solely for relaxation by the linker. + +def : Pat<(A64threadpointer), (MRSxi 0xde82)>; + +def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> +{ + let hasSideEffects = 1; +} + +def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var), + [(A64tlsdesc_blr GPR64:$Rn, tglobaltlsaddr:$Var)]> +{ + let isCall = 1; + let Defs = [X30]; +} + +def : Pat<(A64tlsdesc_blr GPR64:$Rn, texternalsym:$Var), + (TLSDESC_BLRx GPR64:$Rn, texternalsym:$Var)>; + +//===----------------------------------------------------------------------===// +// Bitfield patterns +//===----------------------------------------------------------------------===// + +def bfi32_lsb_to_immr : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64); +}]>; + +def bfi64_lsb_to_immr : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64); +}]>; + +def bfi_width_to_imms : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64); +}]>; + + +// The simpler patterns deal with cases where no AND mask is actually needed +// (either all bits are used or the low 32 bits are used). +let AddedComplexity = 10 in { + +def : Pat<(A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS), + (BFIxxii GPR64:$src, GPR64:$Rn, + (bfi64_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS)))>; + +def : Pat<(A64Bfi GPR32:$src, GPR32:$Rn, imm:$ImmR, imm:$ImmS), + (BFIwwii GPR32:$src, GPR32:$Rn, + (bfi32_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS)))>; + + +def : Pat<(and (A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS), + (i64 4294967295)), + (SUBREG_TO_REG (i64 0), + (BFIwwii (EXTRACT_SUBREG GPR64:$src, sub_32), + (EXTRACT_SUBREG GPR64:$Rn, sub_32), + (bfi32_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS))), + sub_32)>; + +} + +//===----------------------------------------------------------------------===// +// Constant island entries +//===----------------------------------------------------------------------===// + +// The constant island pass needs to create "instructions" in the middle of the +// instruction stream to reresent its constants. + +def cpinst_operand : Operand<i32>; + +def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, + cpinst_operand:$cpidx, + i32imm:$size), []> +{ + let neverHasSideEffects = 1; + let isNotDuplicable = 1; +} + +//===----------------------------------------------------------------------===// +// Miscellaneous patterns +//===----------------------------------------------------------------------===// + +// Truncation from 64 to 32-bits just involves renaming your register. +def : Pat<(i32 (trunc (i64 GPR64:$val))), (EXTRACT_SUBREG GPR64:$val, sub_32)>; + +// Similarly, extension where we don't care about the high bits is +// just a rename. +def : Pat<(i64 (anyext (i32 GPR32:$val))), + (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$val, sub_32)>; + +// SELECT instructions providing f128 types need to be handled by a +// pseudo-instruction since the eventual code will need to introduce basic +// blocks and control flow. +def F128CSEL : PseudoInst<(outs FPR128:$Rd), + (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond), + [(set FPR128:$Rd, (simple_select (f128 FPR128:$Rn), + FPR128:$Rm))]> +{ + let Uses = [NZCV]; + let usesCustomInserter = 1; +} + +//===----------------------------------------------------------------------===// +// Load/store patterns +//===----------------------------------------------------------------------===// + +// There are lots of patterns here, because we need to allow at least three +// parameters to vary independently. +// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ... +// 2. LLVM source: zextloadi8, anyextloadi8, ... +// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ... +// +// The biggest problem turns out to be the address-generation variable. At the +// point of instantiation we need to produce two DAGs, one for the pattern and +// one for the instruction. Doing this at the lowest level of classes doesn't +// work. +// +// Consider the simple uimm12 addressing mode, and the desire to match both (add +// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the +// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or +// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this +// operation, and PatFrags are for selection not output. +// +// As a result, the address-generation patterns are the final +// instantiations. However, we do still need to vary the operand for the address +// further down (At the point we're deciding A64WrapperSmall, we don't know +// the memory width of the operation). + +//===------------------------------ +// 1. Basic infrastructural defs +//===------------------------------ + +// First, some simple classes for !foreach and !subst to use: +class Decls +{ + dag pattern; +} + +def decls : Decls; +def ALIGN; +def INST; +def OFFSET; +def SHIFT; + +// You can't use !subst on an actual immediate, but you *can* use it on an +// operand record that happens to match a single immediate. So we do. +def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>; +def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>; +def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>; +def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>; +def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>; + +// If the low bits of a pointer are known to be 0 then an "or" is just as good +// as addition for computing an offset. This fragment forwards that check for +// TableGen's use. +def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs), +[{ + return CurDAG->isBaseWithConstantOffset(SDValue(N, 0)); +}]>; + +// Load/store (unsigned immediate) operations with relocations against global +// symbols (for lo12) are only valid if those symbols have correct alignment +// (since the immediate offset is divided by the access scale, it can't have a +// remainder). +// +// The guaranteed alignment is provided as part of the WrapperSmall +// operation, and checked against one of these. +def any_align : ImmLeaf<i32, [{ (void)Imm; return true; }]>; +def min_align2 : ImmLeaf<i32, [{ return Imm >= 2; }]>; +def min_align4 : ImmLeaf<i32, [{ return Imm >= 4; }]>; +def min_align8 : ImmLeaf<i32, [{ return Imm >= 8; }]>; +def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>; + +// "Normal" load/store instructions can be used on atomic operations, provided +// the ordering parameter is at most "monotonic". Anything above that needs +// special handling with acquire/release instructions. +class simple_load<PatFrag base> + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic; +}]>; + +def atomic_load_simple_i8 : simple_load<atomic_load_8>; +def atomic_load_simple_i16 : simple_load<atomic_load_16>; +def atomic_load_simple_i32 : simple_load<atomic_load_32>; +def atomic_load_simple_i64 : simple_load<atomic_load_64>; + +class simple_store<PatFrag base> + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic; +}]>; + +def atomic_store_simple_i8 : simple_store<atomic_store_8>; +def atomic_store_simple_i16 : simple_store<atomic_store_16>; +def atomic_store_simple_i32 : simple_store<atomic_store_32>; +def atomic_store_simple_i64 : simple_store<atomic_store_64>; + +//===------------------------------ +// 2. UImm12 and SImm9 +//===------------------------------ + +// These instructions have two operands providing the address so they can be +// treated similarly for most purposes. + +//===------------------------------ +// 2.1 Base patterns covering extend/truncate semantics +//===------------------------------ + +// Atomic patterns can be shared between integer operations of all sizes, a +// quick multiclass here allows reuse. +multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag address, RegisterClass TPR, + ValueType sty> +{ + def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address), + (LOAD Base, Offset)>; + + def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, TPR:$Rt), + (STORE TPR:$Rt, Base, Offset)>; +} + +// Instructions accessing a memory chunk smaller than a register (or, in a +// pinch, the same size) have a characteristic set of patterns they want to +// match: extending loads and truncating stores. This class deals with the +// sign-neutral version of those patterns. +// +// It will be instantiated across multiple addressing-modes. +multiclass ls_small_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, + dag address, ValueType sty> + : ls_atomic_pats<LOAD, STORE, Base, Offset, address, GPR32, sty> +{ + def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>; + + def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>; + + // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit + // register was actually set. + def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; + + def : Pat<(i64 (!cast<SDNode>(extload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; + + def : Pat<(!cast<SDNode>(truncstore # sty) GPR32:$Rt, address), + (STORE GPR32:$Rt, Base, Offset)>; + + // For truncating store from 64-bits, we have to manually tell LLVM to + // ignore the high bits of the x register. + def : Pat<(!cast<SDNode>(truncstore # sty) GPR64:$Rt, address), + (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>; +} + +// Next come patterns for sign-extending loads. +multiclass load_signed_pats<string T, string U, dag Base, dag Offset, + dag address, ValueType sty> +{ + def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>; + + def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>; + +} + +// and finally "natural-width" loads and stores come next. +multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag address, RegisterClass TPR, + ValueType sty> +{ + def : Pat<(sty (load address)), (LOAD Base, Offset)>; + def : Pat<(store (sty TPR:$Rt), address), (STORE TPR:$Rt, Base, Offset)>; +} + +// Integer operations also get atomic instructions to select for. +multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag address, RegisterClass TPR, + ValueType sty> + : ls_neutral_pats<LOAD, STORE, Base, Offset, address, TPR, sty>, + ls_atomic_pats<LOAD, STORE, Base, Offset, address, TPR, sty>; + +//===------------------------------ +// 2.2. Addressing-mode instantiations +//===------------------------------ + +multiclass uimm12_pats<dag address, dag Base, dag Offset> +{ + defm : ls_small_pats<LS8_LDR, LS8_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, byte_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, byte_uimm12, + !subst(ALIGN, any_align, decls.pattern))), + i8>; + defm : ls_small_pats<LS16_LDR, LS16_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, hword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, hword_uimm12, + !subst(ALIGN, min_align2, decls.pattern))), + i16>; + defm : ls_small_pats<LS32_LDR, LS32_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern))), + i32>; + + defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern))), + GPR32, i32>; + + defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, dword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, dword_uimm12, + !subst(ALIGN, min_align8, decls.pattern))), + GPR64, i64>; + + defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, hword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, hword_uimm12, + !subst(ALIGN, min_align2, decls.pattern))), + FPR16, f16>; + + defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern))), + FPR32, f32>; + + defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, dword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, dword_uimm12, + !subst(ALIGN, min_align8, decls.pattern))), + FPR64, f64>; + + defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, qword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, qword_uimm12, + !subst(ALIGN, min_align16, decls.pattern))), + FPR128, f128>; + + defm : load_signed_pats<"B", "", Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, byte_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, byte_uimm12, + !subst(ALIGN, any_align, decls.pattern))), + i8>; + + defm : load_signed_pats<"H", "", Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, hword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, hword_uimm12, + !subst(ALIGN, min_align2, decls.pattern))), + i16>; + + def : Pat<(sextloadi32 !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern)))), + (LDRSWx Base, !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)))>; +} + +// Straightforward patterns of last resort: a pointer with or without an +// appropriate offset. +defm : uimm12_pats<(i64 GPR64xsp:$Rn), (i64 GPR64xsp:$Rn), (i64 0)>; +defm : uimm12_pats<(add GPR64xsp:$Rn, OFFSET:$UImm12), + (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>; + +// The offset could be hidden behind an "or", of course: +defm : uimm12_pats<(add_like_or GPR64xsp:$Rn, OFFSET:$UImm12), + (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>; + +// Global addresses under the small-absolute model should use these +// instructions. There are ELF relocations specifically for it. +defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN), + (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>; + +defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12, ALIGN), + (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>; + +// External symbols that make it this far should also get standard relocations. +defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, ALIGN), + (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>; + + +// We also want to use uimm12 instructions for local variables at the moment. +def tframeindex_XFORM : SDNodeXForm<frameindex, [{ + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + return CurDAG->getTargetFrameIndex(FI, MVT::i64); +}]>; + +defm : uimm12_pats<(i64 frameindex:$Rn), + (tframeindex_XFORM tframeindex:$Rn), (i64 0)>; + +// These can be much simpler than uimm12 because we don't to change the operand +// type (e.g. LDURB and LDURH take the same operands). +multiclass simm9_pats<dag address, dag Base, dag Offset> +{ + defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>; + defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>; + + defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, + GPR32, i32>; + defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, + GPR64, i64>; + + defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, + FPR16, f16>; + defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, + FPR32, f32>; + defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, + FPR64, f64>; + defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address, + FPR128, f128>; + + def : Pat<(i64 (zextloadi32 address)), + (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>; + + def : Pat<(truncstorei32 GPR64:$Rt, address), + (LS32_STUR (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>; + + defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>; + defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>; + def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>; +} + +defm : simm9_pats<(add GPR64xsp:$Rn, simm9:$SImm9), + (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>; + +defm : simm9_pats<(add_like_or GPR64xsp:$Rn, simm9:$SImm9), + (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>; + + +//===------------------------------ +// 3. Register offset patterns +//===------------------------------ + +// Atomic patterns can be shared between integer operations of all sizes, a +// quick multiclass here allows reuse. +multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag Extend, dag address, + RegisterClass TPR, ValueType sty> +{ + def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address), + (LOAD Base, Offset, Extend)>; + + def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, TPR:$Rt), + (STORE TPR:$Rt, Base, Offset, Extend)>; +} + +// The register offset instructions take three operands giving the instruction, +// and have an annoying split between instructions where Rm is 32-bit and +// 64-bit. So we need a special hierarchy to describe them. Other than that the +// same operations should be supported as for simm9 and uimm12 addressing. + +multiclass ro_small_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, dag Extend, + dag address, ValueType sty> + : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, GPR32, sty> +{ + def : Pat<(!cast<SDNode>(zextload # sty) address), + (LOAD Base, Offset, Extend)>; + + def : Pat<(!cast<SDNode>(extload # sty) address), + (LOAD Base, Offset, Extend)>; + + // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit + // register was actually set. + def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; + + def : Pat<(i64 (!cast<SDNode>(extload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; + + def : Pat<(!cast<SDNode>(truncstore # sty) GPR32:$Rt, address), + (STORE GPR32:$Rt, Base, Offset, Extend)>; + + // For truncating store from 64-bits, we have to manually tell LLVM to + // ignore the high bits of the x register. + def : Pat<(!cast<SDNode>(truncstore # sty) GPR64:$Rt, address), + (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset, Extend)>; + +} + +// Next come patterns for sign-extending loads. +multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend, + dag address, ValueType sty> +{ + def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset") + Base, Offset, Extend)>; + + def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset") + Base, Offset, Extend)>; +} + +// and finally "natural-width" loads and stores come next. +multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, dag Extend, dag address, + RegisterClass TPR, ValueType sty> +{ + def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>; + def : Pat<(store (sty TPR:$Rt), address), + (STORE TPR:$Rt, Base, Offset, Extend)>; +} + +multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, dag Extend, dag address, + RegisterClass TPR, ValueType sty> + : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, TPR, sty>, + ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, TPR, sty>; + +multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset, dag Extend> +{ + defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq0, decls.pattern)), + i8>; + defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + i16>; + defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + i32>; + + defm : ro_int_neutral_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + GPR32, i32>; + + defm : ro_int_neutral_pats<!cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq3, decls.pattern)), + GPR64, i64>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + FPR16, f16>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + FPR32, f32>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq3, decls.pattern)), + FPR64, f64>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq4, decls.pattern)), + FPR128, f128>; + + defm : ro_signed_pats<"B", Rm, Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq0, decls.pattern)), + i8>; + + defm : ro_signed_pats<"H", Rm, Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + i16>; + + def : Pat<(sextloadi32 !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern))), + (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset") + Base, Offset, Extend)>; +} + + +// Finally we're in a position to tell LLVM exactly what addresses are reachable +// using register-offset instructions. Essentially a base plus a possibly +// extended, possibly shifted (by access size) offset. + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (sext GPR32:$Rm)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 6)>; + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (sext GPR32:$Rm), SHIFT)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 7)>; + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (zext GPR32:$Rm)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 2)>; + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (zext GPR32:$Rm), SHIFT)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 3)>; + +defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, GPR64:$Rm), + (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 2)>; + +defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, (shl GPR64:$Rm, SHIFT)), + (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 3)>; diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp new file mode 100644 index 0000000..0603574 --- /dev/null +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -0,0 +1,140 @@ +//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower AArch64 MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "AArch64AsmPrinter.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +MCOperand +AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO, + const MCSymbol *Sym) const { + const MCExpr *Expr = 0; + + Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext); + + switch (MO.getTargetFlags()) { + case AArch64II::MO_GOT: + Expr = AArch64MCExpr::CreateGOT(Expr, OutContext); + break; + case AArch64II::MO_GOT_LO12: + Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext); + break; + case AArch64II::MO_LO12: + Expr = AArch64MCExpr::CreateLo12(Expr, OutContext); + break; + case AArch64II::MO_DTPREL_G1: + Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext); + break; + case AArch64II::MO_DTPREL_G0_NC: + Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext); + break; + case AArch64II::MO_GOTTPREL: + Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext); + break; + case AArch64II::MO_GOTTPREL_LO12: + Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext); + break; + case AArch64II::MO_TLSDESC: + Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext); + break; + case AArch64II::MO_TLSDESC_LO12: + Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext); + break; + case AArch64II::MO_TPREL_G1: + Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext); + break; + case AArch64II::MO_TPREL_G0_NC: + Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext); + break; + case AArch64II::MO_NO_FLAG: + // Expr is already correct + break; + default: + llvm_unreachable("Unexpected MachineOperand flag"); + } + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), + OutContext), + OutContext); + + return MCOperand::CreateExpr(Expr); +} + +bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO, + MCOperand &MCOp) const { + switch (MO.getType()) { + default: llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + if (MO.isImplicit()) + return false; + assert(!MO.getSubReg() && "Subregs should be eliminated!"); + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_BlockAddress: + MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress())); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal())); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), OutContext)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex())); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex())); + break; + case MachineOperand::MO_RegisterMask: + // Ignore call clobbers + return false; + + } + + return true; +} + +void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, + MCInst &OutMI, + AArch64AsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + if (AP.lowerOperand(MO, MCOp)) + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp new file mode 100644 index 0000000..012a4f8 --- /dev/null +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MachineFunctionInfo.h" + +using namespace llvm; + +void AArch64MachineFunctionInfo::anchor() { } diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h new file mode 100644 index 0000000..bf5cadf --- /dev/null +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -0,0 +1,158 @@ +//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares AArch64-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef AARCH64MACHINEFUNCTIONINFO_H +#define AARCH64MACHINEFUNCTIONINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// This class is derived from MachineFunctionInfo and contains private AArch64 +/// target-specific information for each MachineFunction. +class AArch64MachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + + /// Number of bytes of arguments this function has on the stack. If the callee + /// is expected to restore the argument stack this should be a multiple of 16, + /// all usable during a tail call. + /// + /// The alternative would forbid tail call optimisation in some cases: if we + /// want to transfer control from a function with 8-bytes of stack-argument + /// space to a function with 16-bytes then misalignment of this value would + /// make a stack adjustment necessary, which could not be undone by the + /// callee. + unsigned BytesInStackArgArea; + + /// The number of bytes to restore to deallocate space for incoming + /// arguments. Canonically 0 in the C calling convention, but non-zero when + /// callee is expected to pop the args. + unsigned ArgumentStackToRestore; + + /// If the stack needs to be adjusted on frame entry in two stages, this + /// records the size of the first adjustment just prior to storing + /// callee-saved registers. The callee-saved slots are addressed assuming + /// SP == <incoming-SP> - InitialStackAdjust. + unsigned InitialStackAdjust; + + /// Number of local-dynamic TLS accesses. + unsigned NumLocalDynamics; + + /// Keep track of the next label to be created within this function to + /// represent a cloned constant pool entry. Used by constant islands pass. + unsigned PICLabelUId; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of the area where LowerFormalArguments puts the + /// general-purpose registers that might contain variadic parameters. + int VariadicGPRIdx; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The size of the frame object used to store the general-purpose registers + /// which might contain variadic arguments. This is the offset from + /// VariadicGPRIdx to what's stored in __gr_top. + unsigned VariadicGPRSize; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of the area where LowerFormalArguments puts the + /// floating-point registers that might contain variadic parameters. + int VariadicFPRIdx; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The size of the frame object used to store the floating-point registers + /// which might contain variadic arguments. This is the offset from + /// VariadicFPRIdx to what's stored in __vr_top. + unsigned VariadicFPRSize; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of an object pointing just past the last known stacked + /// argument on entry to a variadic function. This goes into the __stack field + /// of the va_list type. + int VariadicStackIdx; + + /// The offset of the frame pointer from the stack pointer on function + /// entry. This is expected to be negative. + int FramePointerOffset; + +public: + AArch64MachineFunctionInfo() + : BytesInStackArgArea(0), + ArgumentStackToRestore(0), + InitialStackAdjust(0), + NumLocalDynamics(0), + PICLabelUId(0), + VariadicGPRIdx(0), + VariadicGPRSize(0), + VariadicFPRIdx(0), + VariadicFPRSize(0), + VariadicStackIdx(0), + FramePointerOffset(0) {} + + explicit AArch64MachineFunctionInfo(MachineFunction &MF) + : BytesInStackArgArea(0), + ArgumentStackToRestore(0), + InitialStackAdjust(0), + NumLocalDynamics(0), + PICLabelUId(0), + VariadicGPRIdx(0), + VariadicGPRSize(0), + VariadicFPRIdx(0), + VariadicFPRSize(0), + VariadicStackIdx(0), + FramePointerOffset(0) {} + + unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } + void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;} + + unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; } + void setArgumentStackToRestore(unsigned bytes) { ArgumentStackToRestore = bytes; } + + unsigned getInitialStackAdjust() const { return InitialStackAdjust; } + void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; } + + unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + + void initPICLabelUId(unsigned UId) { PICLabelUId = UId; } + unsigned getNumPICLabels() const { return PICLabelUId; } + unsigned createPICLabelUId() { return PICLabelUId++; } + + int getVariadicGPRIdx() const { return VariadicGPRIdx; } + void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; } + + unsigned getVariadicGPRSize() const { return VariadicGPRSize; } + void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; } + + int getVariadicFPRIdx() const { return VariadicFPRIdx; } + void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; } + + unsigned getVariadicFPRSize() const { return VariadicFPRSize; } + void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; } + + int getVariadicStackIdx() const { return VariadicStackIdx; } + void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; } + + int getFramePointerOffset() const { return FramePointerOffset; } + void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; } + +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp new file mode 100644 index 0000000..ce66504 --- /dev/null +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -0,0 +1,211 @@ +//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + + +#include "AArch64RegisterInfo.h" +#include "AArch64FrameLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/ADT/BitVector.h" + +#define GET_REGINFO_TARGET_DESC +#include "AArch64GenRegisterInfo.inc" + +using namespace llvm; + +AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii, + const AArch64Subtarget &sti) + : AArch64GenRegisterInfo(AArch64::X30), TII(tii) { +} + +const uint16_t * +AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + return CSR_PCS_SaveList; +} + +const uint32_t* +AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const { + return CSR_PCS_RegMask; +} + +const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const { + return TLSDesc_RegMask; +} + +const TargetRegisterClass * +AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &AArch64::FlagClassRegClass) + return &AArch64::GPR64RegClass; + + return RC; +} + + + +BitVector +AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + Reserved.set(AArch64::XSP); + Reserved.set(AArch64::WSP); + + Reserved.set(AArch64::XZR); + Reserved.set(AArch64::WZR); + + if (TFI->hasFP(MF)) { + Reserved.set(AArch64::X29); + Reserved.set(AArch64::W29); + } + + return Reserved; +} + +void +AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, + int SPAdj, RegScavenger *RS) const { + assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet"); + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64FrameLowering *TFI = + static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering()); + + unsigned i = 0; + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have a FrameIndex Operand"); + } + + // In order to work out the base and offset for addressing, the FrameLowering + // code needs to know (sometimes) whether the instruction is storing/loading a + // callee-saved register, or whether it's a more generic + // operation. Fortunately the frame indices are used *only* for that purpose + // and are contiguous, so we can check here. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI; + + unsigned FrameReg; + int64_t Offset; + Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj, + IsCalleeSaveOp); + + Offset += MI.getOperand(i+1).getImm(); + + // DBG_VALUE instructions have no real restrictions so they can be handled + // easily. + if (MI.isDebugValue()) { + MI.getOperand(i).ChangeToRegister(FrameReg, /*isDef=*/ false); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return; + } + + int MinOffset, MaxOffset, OffsetScale; + if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) { + MinOffset = 0; + MaxOffset = 0xfff; + OffsetScale = 1; + } else { + // Load/store of a stack object + TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset); + } + + // The frame lowering has told us a base and offset it thinks we should use to + // access this variable, but it's still up to us to make sure the values are + // legal for the instruction in question. + if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) { + unsigned BaseReg = + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII, + BaseReg, FrameReg, BaseReg, Offset); + FrameReg = BaseReg; + Offset = 0; + } + + // Negative offsets are expected if we address from FP, but for + // now this checks nothing has gone horribly wrong. + assert(Offset >= 0 && "Unexpected negative offset from SP"); + + MI.getOperand(i).ChangeToRegister(FrameReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(Offset / OffsetScale); +} + +void +AArch64RegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + DebugLoc dl = MI->getDebugLoc(); + int Opcode = MI->getOpcode(); + bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0; + + if (!TFI->hasReservedCallFrame(MF)) { + unsigned Align = TFI->getStackAlignment(); + + uint64_t Amount = MI->getOperand(0).getImm(); + Amount = (Amount + Align - 1)/Align * Align; + if (!IsDestroy) Amount = -Amount; + + // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it + // doesn't have to pop anything), then the first operand will be zero too so + // this adjustment is a no-op. + if (CalleePopAmount == 0) { + // FIXME: in-function stack adjustment for calls is limited to 12-bits + // because there's no guaranteed temporary register available. Mostly call + // frames will be allocated at the start of a function so this is OK, but + // it is a limitation that needs dealing with. + assert(abs(Amount) < 0xfff && "call frame too large"); + emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount); + } + } else if (CalleePopAmount != 0) { + // If the calling convention demands that the callee pops arguments from the + // stack, we want to add it back if we have a reserved call frame. + assert(CalleePopAmount < 0xfff && "call frame too large"); + emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount); + } + + MBB.erase(MI); +} + +unsigned +AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (TFI->hasFP(MF)) + return AArch64::X29; + else + return AArch64::XSP; +} + +bool +AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const AArch64FrameLowering *AFI = static_cast<const AArch64FrameLowering*>(TFI); + return AFI->useFPForAddressing(MF); +} diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h new file mode 100644 index 0000000..ea538e2 --- /dev/null +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -0,0 +1,79 @@ +//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H +#define LLVM_TARGET_AARCH64REGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "AArch64GenRegisterInfo.inc" + +namespace llvm { + +class AArch64InstrInfo; +class AArch64Subtarget; + +struct AArch64RegisterInfo : public AArch64GenRegisterInfo { +private: + const AArch64InstrInfo &TII; + +public: + AArch64RegisterInfo(const AArch64InstrInfo &tii, + const AArch64Subtarget &sti); + + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint32_t *getCallPreservedMask(CallingConv::ID) const; + + const uint32_t *getTLSDescCallPreservedMask() const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *Rs = NULL) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + /// getCrossCopyRegClass - Returns a legal register class to copy a register + /// in the specified class to or from. Returns original class if it is + /// possible to copy between a two registers of the specified class. + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const; + + /// getLargestLegalSuperClass - Returns the largest super class of RC that is + /// legal to use in the current sub-target and has the same spill size. + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const { + if (RC == &AArch64::tcGPR64RegClass) + return &AArch64::GPR64RegClass; + + return RC; + } + + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } + + bool useFPForScavengingIndex(const MachineFunction &MF) const; +}; + +} // end namespace llvm + +#endif // LLVM_TARGET_AARCH64REGISTERINFO_H diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td new file mode 100644 index 0000000..f1f7fd1 --- /dev/null +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -0,0 +1,205 @@ +//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the ARM register file +//===----------------------------------------------------------------------===// + +let Namespace = "AArch64" in { +def sub_128 : SubRegIndex; +def sub_64 : SubRegIndex; +def sub_32 : SubRegIndex; +def sub_16 : SubRegIndex; +def sub_8 : SubRegIndex; + +// The VPR registers are handled as sub-registers of FPR equivalents, but +// they're really the same thing. We give this concept a special index. +def sub_alias : SubRegIndex; +} + +// Registers are identified with 5-bit ID numbers. +class AArch64Reg<bits<16> enc, string n> : Register<n> { + let HWEncoding = enc; + let Namespace = "AArch64"; +} + +class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [], + list<SubRegIndex> inds = []> + : AArch64Reg<enc, n> { + let SubRegs = subregs; + let SubRegIndices = inds; +} + +//===----------------------------------------------------------------------===// +// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp +//===----------------------------------------------------------------------===// + +foreach Index = 0-30 in { + def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>; +} + +def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; +def WZR : AArch64Reg<31, "wzr">; + +// Could be combined with previous loop, but this way leaves w and x registers +// consecutive as LLVM register numbers, which makes for easier debugging. +foreach Index = 0-30 in { + def X#Index : AArch64RegWithSubs<Index, "x"#Index, + [!cast<Register>("W"#Index)], [sub_32]>, + DwarfRegNum<[Index]>; +} + +def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>; +def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>; + +// Most instructions treat register 31 as zero for reads and a black-hole for +// writes. + +// Note that the order of registers is important for the Disassembler here: +// tablegen uses it to form MCRegisterClass::getRegister, which we assume can +// take an encoding value. +def GPR32 : RegisterClass<"AArch64", [i32], 32, + (add (sequence "W%u", 0, 30), WZR)> { +} + +def GPR64 : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 30), XZR)> { +} + +def GPR32nowzr : RegisterClass<"AArch64", [i32], 32, + (sequence "W%u", 0, 30)> { +} + +def GPR64noxzr : RegisterClass<"AArch64", [i64], 64, + (sequence "X%u", 0, 30)> { +} + +// For tail calls, we can't use callee-saved registers or the structure-return +// register, as they are supposed to be live across function calls and may be +// clobbered by the epilogue. +def tcGPR64 : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 7), + (sequence "X%u", 9, 18))> { +} + + +// Certain addressing-useful instructions accept sp directly. Again the order of +// registers is important to the Disassembler. +def GPR32wsp : RegisterClass<"AArch64", [i32], 32, + (add (sequence "W%u", 0, 30), WSP)> { +} + +def GPR64xsp : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 30), XSP)> { +} + +// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and +// non-SP variants). We can't use a bare register in those patterns because +// TableGen doesn't like it, so we need a class containing just stack registers +def Rxsp : RegisterClass<"AArch64", [i64], 64, + (add XSP)> { +} + +def Rwsp : RegisterClass<"AArch64", [i32], 32, + (add WSP)> { +} + +//===----------------------------------------------------------------------===// +// Scalar registers in the vector unit: +// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31 +//===----------------------------------------------------------------------===// + +foreach Index = 0-31 in { + def B # Index : AArch64Reg< Index, "b" # Index>, + DwarfRegNum<[!add(Index, 64)]>; + + def H # Index : AArch64RegWithSubs<Index, "h" # Index, + [!cast<Register>("B" # Index)], [sub_8]>, + DwarfRegNum<[!add(Index, 64)]>; + + def S # Index : AArch64RegWithSubs<Index, "s" # Index, + [!cast<Register>("H" # Index)], [sub_16]>, + DwarfRegNum<[!add(Index, 64)]>; + + def D # Index : AArch64RegWithSubs<Index, "d" # Index, + [!cast<Register>("S" # Index)], [sub_32]>, + DwarfRegNum<[!add(Index, 64)]>; + + def Q # Index : AArch64RegWithSubs<Index, "q" # Index, + [!cast<Register>("D" # Index)], [sub_64]>, + DwarfRegNum<[!add(Index, 64)]>; +} + + +def FPR8 : RegisterClass<"AArch64", [i8], 8, + (sequence "B%u", 0, 31)> { +} + +def FPR16 : RegisterClass<"AArch64", [f16], 16, + (sequence "H%u", 0, 31)> { +} + +def FPR32 : RegisterClass<"AArch64", [f32], 32, + (sequence "S%u", 0, 31)> { +} + +def FPR64 : RegisterClass<"AArch64", [f64], 64, + (sequence "D%u", 0, 31)> { +} + +def FPR128 : RegisterClass<"AArch64", [f128], 128, + (sequence "Q%u", 0, 31)> { +} + + +//===----------------------------------------------------------------------===// +// Vector registers: +//===----------------------------------------------------------------------===// + +// NEON registers simply specify the overall vector, and it's expected that +// Instructions will individually specify the acceptable data layout. In +// principle this leaves two approaches open: +// + An operand, giving a single ADDvvv instruction (for example). This turns +// out to be unworkable in the assembly parser (without every Instruction +// having a "cvt" function, at least) because the constraints can't be +// properly enforced. It also complicates specifying patterns since each +// instruction will accept many types. +// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific +// details about NEON registers, but simplifies most other details. +// +// The second approach was taken. + +foreach Index = 0-31 in { + def V # Index : AArch64RegWithSubs<Index, "v" # Index, + [!cast<Register>("Q" # Index)], + [sub_alias]>, + DwarfRegNum<[!add(Index, 64)]>; +} + +// These two classes contain the same registers, which should be reasonably +// sensible for MC and allocation purposes, but allows them to be treated +// separately for things like stack spilling. +def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64, + (sequence "V%u", 0, 31)>; + +def VPR128 : RegisterClass<"AArch64", + [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128, + (sequence "V%u", 0, 31)>; + +// Flags register +def NZCV : Register<"nzcv"> +{ + let Namespace = "AArch64"; +} + +def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> +{ + let CopyCost = -1; + let isAllocatable = 0; +} diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td new file mode 100644 index 0000000..e17cdaa --- /dev/null +++ b/lib/Target/AArch64/AArch64Schedule.td @@ -0,0 +1,10 @@ +//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def GenericItineraries : ProcessorItineraries<[], [], []>; diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp new file mode 100644 index 0000000..6bbe075 --- /dev/null +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -0,0 +1,25 @@ +//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-selectiondag-info" +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<AArch64Subtarget>()) { +} + +AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() { +} diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h new file mode 100644 index 0000000..8d3889e --- /dev/null +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -0,0 +1,32 @@ +//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AArch64 subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64SELECTIONDAGINFO_H +#define LLVM_AARCH64SELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class AArch64TargetMachine; + +class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { + const AArch64Subtarget *Subtarget; +public: + explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM); + ~AArch64SelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp new file mode 100644 index 0000000..d17b738 --- /dev/null +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -0,0 +1,43 @@ +//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64 specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "AArch64Subtarget.h" +#include "AArch64RegisterInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallVector.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "AArch64GenSubtargetInfo.inc" + +using namespace llvm; + +AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS) + : AArch64GenSubtargetInfo(TT, CPU, FS) + , HasNEON(true) + , HasCrypto(true) + , TargetTriple(TT) { + + ParseSubtargetFeatures(CPU, FS); +} + +bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV, + Reloc::Model RelocM) const { + if (RelocM == Reloc::Static) + return false; + + return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility(); +} diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h new file mode 100644 index 0000000..2e9205f --- /dev/null +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -0,0 +1,54 @@ +//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the AArch64 specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H +#define LLVM_TARGET_AARCH64_SUBTARGET_H + +#include "llvm/ADT/Triple.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "AArch64GenSubtargetInfo.inc" + +#include <string> + +namespace llvm { +class StringRef; +class GlobalValue; + +class AArch64Subtarget : public AArch64GenSubtargetInfo { +protected: + bool HasNEON; + bool HasCrypto; + + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; +public: + /// This constructor initializes the data members to match that + /// of the specified triple. + /// + AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; + + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } + +}; +} // End llvm namespace + +#endif // LLVM_TARGET_AARCH64_SUBTARGET_H diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp new file mode 100644 index 0000000..68e3643 --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -0,0 +1,78 @@ +//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +extern "C" void LLVMInitializeAArch64Target() { + RegisterTargetMachine<AArch64TargetMachine> X(TheAArch64Target); +} + +AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS), + InstrInfo(Subtarget), + DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"), + TLInfo(*this), + TSInfo(*this), + FrameLowering(Subtarget) { +} + +namespace { +/// AArch64 Code Generator Pass Configuration Options. +class AArch64PassConfig : public TargetPassConfig { +public: + AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + AArch64TargetMachine &getAArch64TargetMachine() const { + return getTM<AArch64TargetMachine>(); + } + + const AArch64Subtarget &getAArch64Subtarget() const { + return *getAArch64TargetMachine().getSubtargetImpl(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { + return new AArch64PassConfig(this, PM); +} + +bool AArch64PassConfig::addPreEmitPass() { + addPass(&UnpackMachineBundlesID); + addPass(createAArch64ConstantIslandPass()); + return true; +} + +bool AArch64PassConfig::addInstSelector() { + addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel())); + + // For ELF, cleanup any local-dynamic TLS accesses. + if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) + addPass(createAArch64CleanupLocalDynamicTLSPass()); + + return false; +} diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h new file mode 100644 index 0000000..c1f47c2 --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -0,0 +1,69 @@ +//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the AArch64 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64TARGETMACHINE_H +#define LLVM_AARCH64TARGETMACHINE_H + +#include "AArch64FrameLowering.h" +#include "AArch64ISelLowering.h" +#include "AArch64InstrInfo.h" +#include "AArch64SelectionDAGInfo.h" +#include "AArch64Subtarget.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class AArch64TargetMachine : public LLVMTargetMachine { + AArch64Subtarget Subtarget; + AArch64InstrInfo InstrInfo; + const DataLayout DL; + AArch64TargetLowering TLInfo; + AArch64SelectionDAGInfo TSInfo; + AArch64FrameLowering FrameLowering; + +public: + AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + const AArch64InstrInfo *getInstrInfo() const { + return &InstrInfo; + } + + const AArch64FrameLowering *getFrameLowering() const { + return &FrameLowering; + } + + const AArch64TargetLowering *getTargetLowering() const { + return &TLInfo; + } + + const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + + const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; } + + const DataLayout *getDataLayout() const { return &DL; } + + const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + TargetPassConfig *createPassConfig(PassManagerBase &PM); +}; + +} + +#endif diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp new file mode 100644 index 0000000..3bb961a --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -0,0 +1,19 @@ +//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64TargetObjectFile.h" + +using namespace llvm; + +void +AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h new file mode 100644 index 0000000..07caac1 --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -0,0 +1,27 @@ +//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H +#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + + /// AArch64LinuxTargetObjectFile - This implementation is used for linux + /// AArch64. + class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp new file mode 100644 index 0000000..3402634 --- /dev/null +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -0,0 +1,2025 @@ +//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +namespace { + +class AArch64Operand; + +class AArch64AsmParser : public MCTargetAsmParser { + MCSubtargetInfo &STI; + MCAsmParser &Parser; + +#define GET_ASSEMBLER_HEADER +#include "AArch64GenAsmMatcher.inc" + +public: + AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + MCAsmParserExtension::Initialize(_Parser); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + + // These are the public interface of the MCTargetAsmParser + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + bool ParseDirective(AsmToken DirectiveID); + bool ParseDirectiveTLSDescCall(SMLoc L); + bool ParseDirectiveWord(unsigned Size, SMLoc L); + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer&Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); + + // The rest of the sub-parsers have more freedom over interface: they return + // an OperandMatchResultTy because it's less ambiguous than true/false or + // -1/0/1 even if it is more verbose + OperandMatchResultTy + ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic); + + OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal); + + OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind); + + OperandMatchResultTy + ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t NumLanes); + + OperandMatchResultTy + ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t &NumLanes); + + OperandMatchResultTy + ParseImmWithLSLOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseCondCodeOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseCRxOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseFPImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + template<typename SomeNamedImmMapper> OperandMatchResultTy + ParseNamedImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return ParseNamedImmOperand(SomeNamedImmMapper(), Operands); + } + + OperandMatchResultTy + ParseNamedImmOperand(const NamedImmMapper &Mapper, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseLSXAddressOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseShiftExtend(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + bool validateInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + /// Scan the next token (which had better be an identifier) and determine + /// whether it represents a general-purpose or vector register. It returns + /// true if an identifier was found and populates its reference arguments. It + /// does not consume the token. + bool + IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec, + SMLoc &LayoutLoc) const; + +}; + +} + +namespace { + +/// Instances of this class represent a parsed AArch64 machine instruction. +class AArch64Operand : public MCParsedAsmOperand { +private: + enum KindTy { + k_ImmWithLSL, // #uimm {, LSL #amt } + k_CondCode, // eq/ne/... + k_FPImmediate, // Limited-precision floating-point imm + k_Immediate, // Including expressions referencing symbols + k_Register, + k_ShiftExtend, + k_SysReg, // The register operand of MRS and MSR instructions + k_Token, // The mnemonic; other raw tokens the auto-generated + k_WrappedRegister // Load/store exclusive permit a wrapped register. + } Kind; + + SMLoc StartLoc, EndLoc; + + union { + struct { + const MCExpr *Val; + unsigned ShiftAmount; + bool ImplicitAmount; + } ImmWithLSL; + + struct { + A64CC::CondCodes Code; + } CondCode; + + struct { + double Val; + } FPImm; + + struct { + const MCExpr *Val; + } Imm; + + struct { + unsigned RegNum; + } Reg; + + struct { + A64SE::ShiftExtSpecifiers ShiftType; + unsigned Amount; + bool ImplicitAmount; + } ShiftExtend; + + struct { + const char *Data; + unsigned Length; + } SysReg; + + struct { + const char *Data; + unsigned Length; + } Tok; + }; + + AArch64Operand(KindTy K, SMLoc S, SMLoc E) + : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {} + +public: + AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() { + } + + SMLoc getStartLoc() const { return StartLoc; } + SMLoc getEndLoc() const { return EndLoc; } + void print(raw_ostream&) const; + void dump() const; + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const { + assert((Kind == k_Register || Kind == k_WrappedRegister) + && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert(Kind == k_Immediate && "Invalid access!"); + return Imm.Val; + } + + A64CC::CondCodes getCondCode() const { + assert(Kind == k_CondCode && "Invalid access!"); + return CondCode.Code; + } + + static bool isNonConstantExpr(const MCExpr *E, + AArch64MCExpr::VariantKind &Variant) { + if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) { + Variant = A64E->getKind(); + return true; + } else if (!isa<MCConstantExpr>(E)) { + Variant = AArch64MCExpr::VK_AARCH64_None; + return true; + } + + return false; + } + + bool isCondCode() const { return Kind == k_CondCode; } + bool isToken() const { return Kind == k_Token; } + bool isReg() const { return Kind == k_Register; } + bool isImm() const { return Kind == k_Immediate; } + bool isMem() const { return false; } + bool isFPImm() const { return Kind == k_FPImmediate; } + bool isShiftOrExtend() const { return Kind == k_ShiftExtend; } + bool isSysReg() const { return Kind == k_SysReg; } + bool isImmWithLSL() const { return Kind == k_ImmWithLSL; } + bool isWrappedReg() const { return Kind == k_WrappedRegister; } + + bool isAddSubImmLSL0() const { + if (!isImmWithLSL()) return false; + if (ImmWithLSL.ShiftAmount != 0) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC + || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12; + } + + // Otherwise it should be a real immediate in range: + const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val); + return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + } + + bool isAddSubImmLSL12() const { + if (!isImmWithLSL()) return false; + if (ImmWithLSL.ShiftAmount != 12) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12 + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12; + } + + // Otherwise it should be a real immediate in range: + const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val); + return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + } + + template<unsigned MemSize, unsigned RmSize> bool isAddrRegExtend() const { + if (!isShiftOrExtend()) return false; + + A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType; + if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW)) + return false; + + if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX)) + return false; + + return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0; + } + + bool isAdrpLabel() const { + if (!isImm()) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(getImm(), Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_None + || Variant == AArch64MCExpr::VK_AARCH64_GOT + || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL + || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC; + } + + return isLabel<21, 4096>(); + } + + template<unsigned RegWidth> bool isBitfieldWidth() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= 1 && CE->getValue() <= RegWidth; + } + + template<int RegWidth> + bool isCVTFixedPos() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= 1 && CE->getValue() <= RegWidth; + } + + bool isFMOVImm() const { + if (!isFPImm()) return false; + + APFloat RealVal(FPImm.Val); + uint32_t ImmVal; + return A64Imms::isFPImm(RealVal, ImmVal); + } + + bool isFPZero() const { + if (!isFPImm()) return false; + + APFloat RealVal(FPImm.Val); + return RealVal.isPosZero(); + } + + template<unsigned field_width, unsigned scale> + bool isLabel() const { + if (!isImm()) return false; + + if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) { + return true; + } else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Min = - (scale * (1LL << (field_width - 1))); + int64_t Max = scale * ((1LL << (field_width - 1)) - 1); + return (Val % scale) == 0 && Val >= Min && Val <= Max; + } + + // N.b. this disallows explicit relocation specifications via an + // AArch64MCExpr. Users needing that behaviour + return false; + } + + bool isLane1() const { + if (!isImm()) return false; + + // Because it's come through custom assembly parsing, it must always be a + // constant expression. + return cast<MCConstantExpr>(getImm())->getValue() == 1; + } + + bool isLoadLitLabel() const { + if (!isImm()) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(getImm(), Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_None + || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL; + } + + return isLabel<19, 4>(); + } + + template<unsigned RegWidth> bool isLogicalImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val); + if (!CE) return false; + + uint32_t Bits; + return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); + } + + template<unsigned RegWidth> bool isLogicalImmMOV() const { + if (!isLogicalImm<RegWidth>()) return false; + + const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val); + + // The move alias for ORR is only valid if the immediate cannot be + // represented with a move (immediate) instruction; they take priority. + int UImm16, Shift; + return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift) + && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift); + } + + template<int MemSize> + bool isOffsetUImm12() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + + // Assume they know what they're doing for now if they've given us a + // non-constant expression. In principle we could check for ridiculous + // things that can't possibly work or relocations that would almost + // certainly break resulting code. + if (!CE) + return true; + + int64_t Val = CE->getValue(); + + // Must be a multiple of the access size in bytes. + if ((Val & (MemSize - 1)) != 0) return false; + + // Must be 12-bit unsigned + return Val >= 0 && Val <= 0xfff * MemSize; + } + + template<A64SE::ShiftExtSpecifiers SHKind, bool is64Bit> + bool isShift() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != SHKind) + return false; + + return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31; + } + + bool isMOVN32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVN64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G2, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + + bool isMOVZ32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0, + AArch64MCExpr::VK_AARCH64_ABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVZ64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0, + AArch64MCExpr::VK_AARCH64_ABS_G1, + AArch64MCExpr::VK_AARCH64_ABS_G2, + AArch64MCExpr::VK_AARCH64_ABS_G3, + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G2, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + bool isMOVK32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0_NC, + AArch64MCExpr::VK_AARCH64_ABS_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVK64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0_NC, + AArch64MCExpr::VK_AARCH64_ABS_G1_NC, + AArch64MCExpr::VK_AARCH64_ABS_G2_NC, + AArch64MCExpr::VK_AARCH64_ABS_G3, + AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + bool isMoveWideImm(unsigned RegWidth, + AArch64MCExpr::VariantKind *PermittedModifiers, + unsigned NumModifiers) const { + if (!isImmWithLSL()) return false; + + if (ImmWithLSL.ShiftAmount % 16 != 0) return false; + if (ImmWithLSL.ShiftAmount >= RegWidth) return false; + + AArch64MCExpr::VariantKind Modifier; + if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) { + // E.g. "#:abs_g0:sym, lsl #16" makes no sense. + if (!ImmWithLSL.ImplicitAmount) return false; + + for (unsigned i = 0; i < NumModifiers; ++i) + if (PermittedModifiers[i] == Modifier) return true; + + return false; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmWithLSL.Val); + return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff; + } + + template<int RegWidth, bool (*isValidImm)(int, uint64_t, int&, int&)> + bool isMoveWideMovAlias() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + int UImm16, Shift; + uint64_t Value = CE->getValue(); + + // If this is a 32-bit instruction then all bits above 32 should be the + // same: either of these is fine because signed/unsigned values should be + // permitted. + if (RegWidth == 32) { + if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff) + return false; + + Value &= 0xffffffffULL; + } + + return isValidImm(RegWidth, Value, UImm16, Shift); + } + + bool isMSRWithReg() const { + if (!isSysReg()) return false; + + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64SysReg::MSRMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isMSRPState() const { + if (!isSysReg()) return false; + + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64PState::PStateMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isMRS() const { + if (!isSysReg()) return false; + + // First check against specific MSR-only (write-only) registers + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64SysReg::MRSMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isPRFM() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + + if (!CE) + return false; + + return CE->getValue() >= 0 && CE->getValue() <= 31; + } + + template<A64SE::ShiftExtSpecifiers SHKind> bool isRegExtend() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != SHKind) + return false; + + return ShiftExtend.Amount <= 4; + } + + bool isRegExtendLSL() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != A64SE::LSL) + return false; + + return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4; + } + + template<int MemSize> bool isSImm7Scaled() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + int64_t Val = CE->getValue(); + if (Val % MemSize != 0) return false; + + Val /= MemSize; + + return Val >= -64 && Val < 64; + } + + template<int BitWidth> + bool isSImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= -(1LL << (BitWidth - 1)) + && CE->getValue() < (1LL << (BitWidth - 1)); + } + + template<int bitWidth> + bool isUImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth); + } + + bool isUImm() const { + if (!isImm()) return false; + + return isa<MCConstantExpr>(getImm()); + } + + static AArch64Operand *CreateImmWithLSL(const MCExpr *Val, + unsigned ShiftAmount, + bool ImplicitAmount, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E); + Op->ImmWithLSL.Val = Val; + Op->ImmWithLSL.ShiftAmount = ShiftAmount; + Op->ImmWithLSL.ImplicitAmount = ImplicitAmount; + return Op; + } + + static AArch64Operand *CreateCondCode(A64CC::CondCodes Code, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E); + Op->CondCode.Code = Code; + return Op; + } + + static AArch64Operand *CreateFPImm(double Val, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E); + Op->FPImm.Val = Val; + return Op; + } + + static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E); + Op->Imm.Val = Val; + return Op; + } + + static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_Register, S, E); + Op->Reg.RegNum = RegNum; + return Op; + } + + static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E); + Op->Reg.RegNum = RegNum; + return Op; + } + + static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp, + unsigned Amount, + bool ImplicitAmount, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E); + Op->ShiftExtend.ShiftType = ShiftTyp; + Op->ShiftExtend.Amount = Amount; + Op->ShiftExtend.ImplicitAmount = ImplicitAmount; + return Op; + } + + static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) { + AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + return Op; + } + + static AArch64Operand *CreateToken(StringRef Str, SMLoc S) { + AArch64Operand *Op = new AArch64Operand(k_Token, S, S); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + return Op; + } + + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + + template<unsigned RegWidth> + void addBFILSBOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth; + Inst.addOperand(MCOperand::CreateImm(EncodedVal)); + } + + void addBFIWidthOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); + } + + void addBFXWidthOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm(); + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + + Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1)); + } + + void addCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getCondCode())); + } + + void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue())); + } + + void addFMOVImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + APFloat RealVal(FPImm.Val); + uint32_t ImmVal; + A64Imms::isFPImm(RealVal, ImmVal); + + Inst.addOperand(MCOperand::CreateImm(ImmVal)); + } + + void addFPZeroOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + Inst.addOperand(MCOperand::CreateImm(0)); + } + + void addInvCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + unsigned Encoded = A64InvertCondCode(getCondCode()); + Inst.addOperand(MCOperand::CreateImm(Encoded)); + } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + template<int MemSize> + void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + uint64_t Val = CE->getValue() / MemSize; + Inst.addOperand(MCOperand::CreateImm(Val & 0x7f)); + } + + template<int BitWidth> + void addSImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + uint64_t Val = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1))); + } + + void addImmWithLSLOperands(MCInst &Inst, unsigned N) const { + assert (N == 1 && "Invalid number of operands!"); + + addExpr(Inst, ImmWithLSL.Val); + } + + template<unsigned field_width, unsigned scale> + void addLabelOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val); + + if (!CE) { + addExpr(Inst, Imm.Val); + return; + } + + int64_t Val = CE->getValue(); + assert(Val % scale == 0 && "Unaligned immediate in instruction"); + Val /= scale; + + Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1))); + } + + template<int MemSize> + void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { + Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize)); + } else { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + } + } + + template<unsigned RegWidth> + void addLogicalImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val); + + uint32_t Bits; + A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMRSOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMSRWithRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMSRPStateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMoveWideImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + addExpr(Inst, ImmWithLSL.Val); + + AArch64MCExpr::VariantKind Variant; + if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) { + Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16)); + return; + } + + // We know it's relocated + switch (Variant) { + case AArch64MCExpr::VK_AARCH64_ABS_G0: + case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G0: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: + Inst.addOperand(MCOperand::CreateImm(0)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G1: + case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: + Inst.addOperand(MCOperand::CreateImm(1)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G2: + case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + Inst.addOperand(MCOperand::CreateImm(2)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G3: + Inst.addOperand(MCOperand::CreateImm(3)); + break; + default: llvm_unreachable("Inappropriate move wide relocation"); + } + } + + template<int RegWidth, bool isValidImm(int, uint64_t, int&, int&)> + void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int UImm16, Shift; + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + uint64_t Value = CE->getValue(); + + if (RegWidth == 32) { + Value &= 0xffffffffULL; + } + + bool Valid = isValidImm(RegWidth, Value, UImm16, Shift); + (void)Valid; + assert(Valid && "Invalid immediates should have been weeded out by now"); + + Inst.addOperand(MCOperand::CreateImm(UImm16)); + Inst.addOperand(MCOperand::CreateImm(Shift)); + } + + void addPRFMOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + assert(CE->getValue() >= 0 && CE->getValue() <= 31 + && "PRFM operand should be 5-bits"); + + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + } + + // For Add-sub (extended register) operands. + void addRegExtendOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); + } + + // For the extend in load-store (register offset) instructions. + template<unsigned MemSize> + void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const { + addAddrRegExtendOperands(Inst, N, MemSize); + } + + void addAddrRegExtendOperands(MCInst &Inst, unsigned N, + unsigned MemSize) const { + assert(N == 1 && "Invalid number of operands!"); + + // First bit of Option is set in instruction classes, the high two bits are + // as follows: + unsigned OptionHi = 0; + switch (ShiftExtend.ShiftType) { + case A64SE::UXTW: + case A64SE::LSL: + OptionHi = 1; + break; + case A64SE::SXTW: + case A64SE::SXTX: + OptionHi = 3; + break; + default: + llvm_unreachable("Invalid extend type for register offset"); + } + + unsigned S = 0; + if (MemSize == 1 && !ShiftExtend.ImplicitAmount) + S = 1; + else if (MemSize != 1 && ShiftExtend.Amount != 0) + S = 1; + + Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S)); + } + void addShiftOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); + } +}; + +} // end anonymous namespace. + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic) { + + // See if the operand has a custom parser + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + + // It could either succeed, fail or just not care. + if (ResTy != MatchOperand_NoMatch) + return ResTy; + + switch (getLexer().getKind()) { + default: + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return MatchOperand_ParseFail; + case AsmToken::Identifier: { + // It might be in the LSL/UXTB family ... + OperandMatchResultTy GotShift = ParseShiftExtend(Operands); + + // We can only continue if no tokens were eaten. + if (GotShift != MatchOperand_NoMatch) + return GotShift; + + // ... or it might be a register ... + uint32_t NumLanes = 0; + OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes); + assert(GotReg != MatchOperand_ParseFail + && "register parsing shouldn't partially succeed"); + + if (GotReg == MatchOperand_Success) { + if (Parser.getTok().is(AsmToken::LBrac)) + return ParseNEONLane(Operands, NumLanes); + else + return MatchOperand_Success; + } + + // ... or it might be a symbolish thing + } + // Fall through + case AsmToken::LParen: // E.g. (strcmp-4) + case AsmToken::Integer: // 1f, 2b labels + case AsmToken::String: // quoted labels + case AsmToken::Dot: // . is Current location + case AsmToken::Dollar: // $ is PC + case AsmToken::Colon: { + SMLoc StartLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + const MCExpr *ImmVal = 0; + + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); + return MatchOperand_Success; + } + case AsmToken::Hash: { // Immediates + SMLoc StartLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + const MCExpr *ImmVal = 0; + Parser.Lex(); + + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); + return MatchOperand_Success; + } + case AsmToken::LBrac: { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("[", Loc)); + Parser.Lex(); // Eat '[' + + // There's no comma after a '[', so we can parse the next operand + // immediately. + return ParseOperand(Operands, Mnemonic); + } + // The following will likely be useful later, but not in very early cases + case AsmToken::LCurly: // Weird SIMD lists + llvm_unreachable("Don't know how to deal with '{' in operand"); + return MatchOperand_ParseFail; + } +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) { + if (getLexer().is(AsmToken::Colon)) { + AArch64MCExpr::VariantKind RefKind; + + OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind); + if (ResTy != MatchOperand_Success) + return ResTy; + + const MCExpr *SubExprVal; + if (getParser().ParseExpression(SubExprVal)) + return MatchOperand_ParseFail; + + ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext()); + return MatchOperand_Success; + } + + // No weird AArch64MCExpr prefix + return getParser().ParseExpression(ExprVal) + ? MatchOperand_ParseFail : MatchOperand_Success; +} + +// A lane attached to a NEON register. "[N]", which should yield three tokens: +// '[', N, ']'. A hash is not allowed to precede the immediate here. +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t NumLanes) { + SMLoc Loc = Parser.getTok().getLoc(); + + assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand"); + Operands.push_back(AArch64Operand::CreateToken("[", Loc)); + Parser.Lex(); // Eat '[' + + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), "expected lane number"); + return MatchOperand_ParseFail; + } + + if (Parser.getTok().getIntVal() >= NumLanes) { + Error(Parser.getTok().getLoc(), "lane number incompatible with layout"); + return MatchOperand_ParseFail; + } + + const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(), + getContext()); + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat actual lane + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImm(Lane, S, E)); + + + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(Parser.getTok().getLoc(), "expected ']' after lane"); + return MatchOperand_ParseFail; + } + + Operands.push_back(AArch64Operand::CreateToken("]", Loc)); + Parser.Lex(); // Eat ']' + + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) { + assert(getLexer().is(AsmToken::Colon) && "expected a ':'"); + Parser.Lex(); + + if (getLexer().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), + "expected relocation specifier in operand after ':'"); + return MatchOperand_ParseFail; + } + + StringRef lowerCase = Parser.getTok().getIdentifier().lower(); + RefKind = StringSwitch<AArch64MCExpr::VariantKind>(lowerCase) + .Case("got", AArch64MCExpr::VK_AARCH64_GOT) + .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12) + .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12) + .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0) + .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC) + .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1) + .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC) + .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2) + .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC) + .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3) + .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0) + .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1) + .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2) + .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2) + .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1) + .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC) + .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0) + .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC) + .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12) + .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12) + .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC) + .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1) + .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC) + .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL) + .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12) + .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2) + .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1) + .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC) + .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0) + .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC) + .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12) + .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12) + .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC) + .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC) + .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12) + .Default(AArch64MCExpr::VK_AARCH64_None); + + if (RefKind == AArch64MCExpr::VK_AARCH64_None) { + Error(Parser.getTok().getLoc(), + "expected relocation specifier in operand after ':'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat identifier + + if (getLexer().isNot(AsmToken::Colon)) { + Error(Parser.getTok().getLoc(), + "expected ':' after relocation specifier"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseImmWithLSLOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // FIXME?: I want to live in a world where immediates must start with + // #. Please don't dash my hopes (well, do if you have a good reason). + if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '#' + + const MCExpr *Imm; + if (ParseImmediate(Imm) != MatchOperand_Success) + return MatchOperand_ParseFail; + else if (Parser.getTok().isNot(AsmToken::Comma)) { + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E)); + return MatchOperand_Success; + } + + // Eat ',' + Parser.Lex(); + + // The optional operand must be "lsl #N" where N is non-negative. + if (Parser.getTok().is(AsmToken::Identifier) + && Parser.getTok().getIdentifier().lower() == "lsl") { + Parser.Lex(); + + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate"); + return MatchOperand_ParseFail; + } + } + } + + int64_t ShiftAmount = Parser.getTok().getIntVal(); + + if (ShiftAmount < 0) { + Error(Parser.getTok().getLoc(), "positive shift amount required"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat the number + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount, + false, S, E)); + return MatchOperand_Success; +} + + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseCondCodeOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + if (Parser.getTok().isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + + StringRef Tok = Parser.getTok().getIdentifier(); + A64CC::CondCodes CondCode = A64StringToCondCode(Tok); + + if (CondCode == A64CC::Invalid) + return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat condition code + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseCRxOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + StringRef Tok = Parser.getTok().getIdentifier().lower(); + if (Tok[0] != 'c') { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + uint32_t CRNum; + bool BadNum = Tok.drop_front().getAsInteger(10, CRNum); + if (BadNum || CRNum > 15) { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext()); + + Parser.Lex(); + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseFPImmOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + + // FIXME?: I want to live in a world where immediates must start with + // #. Please don't dash my hopes (well, do if you have a good reason). + if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '#' + + bool Negative = false; + if (Parser.getTok().is(AsmToken::Minus)) { + Negative = true; + Parser.Lex(); // Eat '-' + } else if (Parser.getTok().is(AsmToken::Plus)) { + Parser.Lex(); // Eat '+' + } + + if (Parser.getTok().isNot(AsmToken::Real)) { + Error(S, "Expected floating-point immediate"); + return MatchOperand_ParseFail; + } + + APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString()); + if (Negative) RealVal.changeSign(); + double DblVal = RealVal.convertToDouble(); + + Parser.Lex(); // Eat real number + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E)); + return MatchOperand_Success; +} + + +// Automatically generated +static unsigned MatchRegisterName(StringRef Name); + +bool +AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, + StringRef &Layout, + SMLoc &LayoutLoc) const { + const AsmToken &Tok = Parser.getTok(); + + if (Tok.isNot(AsmToken::Identifier)) + return false; + + std::string LowerReg = Tok.getString().lower(); + size_t DotPos = LowerReg.find('.'); + + RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); + if (RegNum == AArch64::NoRegister) { + RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos)) + .Case("ip0", AArch64::X16) + .Case("ip1", AArch64::X17) + .Case("fp", AArch64::X29) + .Case("lr", AArch64::X30) + .Default(AArch64::NoRegister); + } + if (RegNum == AArch64::NoRegister) + return false; + + SMLoc S = Tok.getLoc(); + RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); + + if (DotPos == StringRef::npos) { + Layout = StringRef(); + } else { + // Everything afterwards needs to be a literal token, expected to be + // '.2d','.b' etc for vector registers. + + // This StringSwitch validates the input and (perhaps more importantly) + // gives us a permanent string to use in the token (a pointer into LowerReg + // would go out of scope when we return). + LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); + Layout = LowerReg.substr(DotPos, StringRef::npos); + Layout = StringSwitch<const char *>(Layout) + .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d") + .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s") + .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h") + .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b") + .Default(""); + + if (Layout.size() == 0) { + // Malformed register + return false; + } + } + + return true; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t &NumLanes) { + unsigned RegNum; + StringRef Layout; + SMLoc RegEndLoc, LayoutLoc; + SMLoc S = Parser.getTok().getLoc(); + + if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) + return MatchOperand_NoMatch; + + Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc)); + + if (Layout.size() != 0) { + unsigned long long TmpLanes = 0; + llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes); + if (TmpLanes != 0) { + NumLanes = TmpLanes; + } else { + // If the number of lanes isn't specified explicitly, a valid instruction + // will have an element specifier and be capable of acting on the entire + // vector register. + switch (Layout.back()) { + default: llvm_unreachable("Invalid layout specifier"); + case 'b': NumLanes = 16; break; + case 'h': NumLanes = 8; break; + case 's': NumLanes = 4; break; + case 'd': NumLanes = 2; break; + } + } + + Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc)); + } + + Parser.Lex(); + return MatchOperand_Success; +} + +bool +AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + // This callback is used for things like DWARF frame directives in + // assembly. They don't care about things like NEON layouts or lanes, they + // just want to be able to produce the DWARF register number. + StringRef LayoutSpec; + SMLoc RegEndLoc, LayoutLoc; + StartLoc = Parser.getTok().getLoc(); + + if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc)) + return true; + + Parser.Lex(); + EndLoc = Parser.getTok().getLoc(); + + return false; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Since these operands occur in very limited circumstances, without + // alternatives, we actually signal an error if there is no match. If relaxing + // this, beware of unintended consequences: an immediate will be accepted + // during matching, no matter how it gets into the AArch64Operand. + const AsmToken &Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + + if (Tok.is(AsmToken::Identifier)) { + bool ValidName; + uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName); + + if (!ValidName) { + Error(S, "operand specifier not recognised"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // We're done with the identifier. Eat it + + SMLoc E = Parser.getTok().getLoc(); + const MCExpr *Imm = MCConstantExpr::Create(Code, getContext()); + Operands.push_back(AArch64Operand::CreateImm(Imm, S, E)); + return MatchOperand_Success; + } else if (Tok.is(AsmToken::Hash)) { + Parser.Lex(); + + const MCExpr *ImmVal; + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); + if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) { + Error(S, "Invalid immediate for instruction"); + return MatchOperand_ParseFail; + } + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E)); + return MatchOperand_Success; + } + + Error(S, "unexpected operand for instruction"); + return MatchOperand_ParseFail; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseSysRegOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + const AsmToken &Tok = Parser.getTok(); + + // Any MSR/MRS operand will be an identifier, and we want to store it as some + // kind of string: SPSel is valid for two different forms of MSR with two + // different encodings. There's no collision at the moment, but the potential + // is there. + if (!Tok.is(AsmToken::Identifier)) { + return MatchOperand_NoMatch; + } + + SMLoc S = Tok.getLoc(); + Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S)); + Parser.Lex(); // Eat identifier + + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseLSXAddressOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + + unsigned RegNum; + SMLoc RegEndLoc, LayoutLoc; + StringRef Layout; + if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc) + || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum) + || Layout.size() != 0) { + // Check Layout.size because we don't want to let "x3.4s" or similar + // through. + return MatchOperand_NoMatch; + } + Parser.Lex(); // Eat register + + if (Parser.getTok().is(AsmToken::RBrac)) { + // We're done + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); + return MatchOperand_Success; + } + + // Otherwise, only ", #0" is valid + + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "expected ',' or ']' after register"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat ',' + + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "expected '#0'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat '#' + + if (Parser.getTok().isNot(AsmToken::Integer) + || Parser.getTok().getIntVal() != 0 ) { + Error(Parser.getTok().getLoc(), "expected '#0'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat '0' + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseShiftExtend( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + StringRef IDVal = Parser.getTok().getIdentifier(); + std::string LowerID = IDVal.lower(); + + A64SE::ShiftExtSpecifiers Spec = + StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID) + .Case("lsl", A64SE::LSL) + .Case("lsr", A64SE::LSR) + .Case("asr", A64SE::ASR) + .Case("ror", A64SE::ROR) + .Case("uxtb", A64SE::UXTB) + .Case("uxth", A64SE::UXTH) + .Case("uxtw", A64SE::UXTW) + .Case("uxtx", A64SE::UXTX) + .Case("sxtb", A64SE::SXTB) + .Case("sxth", A64SE::SXTH) + .Case("sxtw", A64SE::SXTW) + .Case("sxtx", A64SE::SXTX) + .Default(A64SE::Invalid); + + if (Spec == A64SE::Invalid) + return MatchOperand_NoMatch; + + // Eat the shift + SMLoc S, E; + S = Parser.getTok().getLoc(); + Parser.Lex(); + + if (Spec != A64SE::LSL && Spec != A64SE::LSR && + Spec != A64SE::ASR && Spec != A64SE::ROR) { + // The shift amount can be omitted for the extending versions, but not real + // shifts: + // add x0, x0, x0, uxtb + // is valid, and equivalent to + // add x0, x0, x0, uxtb #0 + + if (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::EndOfStatement) || + Parser.getTok().is(AsmToken::RBrac)) { + Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true, S, E)); + return MatchOperand_Success; + } + } + + // Eat # at beginning of immediate + if (!Parser.getTok().is(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), + "expected #imm after shift specifier"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + + // Make sure we do actually have a number + if (!Parser.getTok().is(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), + "expected integer shift amount"); + return MatchOperand_ParseFail; + } + unsigned Amount = Parser.getTok().getIntVal(); + Parser.Lex(); + E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false, S, E)); + + return MatchOperand_Success; +} + +// FIXME: We would really like to be able to tablegen'erate this. +bool AArch64AsmParser:: +validateInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + switch (Inst.getOpcode()) { + case AArch64::BFIwwii: + case AArch64::BFIxxii: + case AArch64::SBFIZwwii: + case AArch64::SBFIZxxii: + case AArch64::UBFIZwwii: + case AArch64::UBFIZxxii: { + unsigned ImmOps = Inst.getNumOperands() - 2; + int64_t ImmR = Inst.getOperand(ImmOps).getImm(); + int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); + + if (ImmR == 0) { + // Bitfield inserts are preferred disassembly if ImmS < ImmR. However, + // there is this one case where insert is valid syntax but the bfx + // disassembly should be used: e.g. "sbfiz w0, w0, #0, #1". + return false; + } else if (ImmS >= ImmR) { + return Error(Operands[4]->getStartLoc(), + "requested insert overflows register"); + } + return false; + } + case AArch64::BFXILwwii: + case AArch64::BFXILxxii: + case AArch64::SBFXwwii: + case AArch64::SBFXxxii: + case AArch64::UBFXwwii: + case AArch64::UBFXxxii: { + unsigned ImmOps = Inst.getNumOperands() - 2; + int64_t ImmR = Inst.getOperand(ImmOps).getImm(); + int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); + int64_t RegWidth = 0; + switch (Inst.getOpcode()) { + case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii: + RegWidth = 64; + break; + case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii: + RegWidth = 32; + break; + } + + if (ImmS >= RegWidth || ImmS < ImmR) { + return Error(Operands[4]->getStartLoc(), + "requested extract overflows register"); + } + return false; + } + case AArch64::ICix: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal); + if (!A64IC::NeedsRegister(ICOp)) { + return Error(Operands[1]->getStartLoc(), + "specified IC op does not use a register"); + } + return false; + } + case AArch64::ICi: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal); + if (A64IC::NeedsRegister(ICOp)) { + return Error(Operands[1]->getStartLoc(), + "specified IC op requires a register"); + } + return false; + } + case AArch64::TLBIix: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal); + if (!A64TLBI::NeedsRegister(TLBIOp)) { + return Error(Operands[1]->getStartLoc(), + "specified TLBI op does not use a register"); + } + return false; + } + case AArch64::TLBIi: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal); + if (A64TLBI::NeedsRegister(TLBIOp)) { + return Error(Operands[1]->getStartLoc(), + "specified TLBI op requires a register"); + } + return false; + } + } + + return false; +} + + +// Parses the instruction *together with* all operands, appending each parsed +// operand to the "Operands" list +bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + size_t CondCodePos = Name.find('.'); + + StringRef Mnemonic = Name.substr(0, CondCodePos); + Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc)); + + if (CondCodePos != StringRef::npos) { + // We have a condition code + SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1); + StringRef CondStr = Name.substr(CondCodePos + 1, StringRef::npos); + A64CC::CondCodes Code; + + Code = A64StringToCondCode(CondStr); + + if (Code == A64CC::Invalid) { + Error(S, "invalid condition code"); + Parser.EatToEndOfStatement(); + return true; + } + + SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos); + + Operands.push_back(AArch64Operand::CreateToken(".", DotL)); + SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3); + Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E)); + } + + // Now we parse the operands of this instruction + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (ParseOperand(Operands, Mnemonic)) { + Parser.EatToEndOfStatement(); + return true; + } + + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + + // Parse and remember the operand. + if (ParseOperand(Operands, Mnemonic)) { + Parser.EatToEndOfStatement(); + return true; + } + + + // After successfully parsing some operands there are two special cases to + // consider (i.e. notional operands not separated by commas). Both are due + // to memory specifiers: + // + An RBrac will end an address for load/store/prefetch + // + An '!' will indicate a pre-indexed operation. + // + // It's someone else's responsibility to make sure these tokens are sane + // in the given context! + if (Parser.getTok().is(AsmToken::RBrac)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("]", Loc)); + Parser.Lex(); + } + + if (Parser.getTok().is(AsmToken::Exclaim)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("!", Loc)); + Parser.Lex(); + } + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, ""); + } + + // Eat the EndOfStatement + Parser.Lex(); + + return false; +} + +bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".hword") + return ParseDirectiveWord(2, DirectiveID.getLoc()); + else if (IDVal == ".word") + return ParseDirectiveWord(4, DirectiveID.getLoc()); + else if (IDVal == ".xword") + return ParseDirectiveWord(8, DirectiveID.getLoc()); + else if (IDVal == ".tlsdesccall") + return ParseDirectiveTLSDescCall(DirectiveID.getLoc()); + + return true; +} + +/// parseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + +// parseDirectiveTLSDescCall: +// ::= .tlsdesccall symbol +bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return Error(L, "expected symbol after directive"); + + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); + + MCInst Inst; + Inst.setOpcode(AArch64::TLSDESCCALL); + Inst.addOperand(MCOperand::CreateExpr(Expr)); + + getParser().getStreamer().EmitInstruction(Inst); + return false; +} + + +bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult; + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); + switch (MatchResult) { + default: break; + case Match_Success: + if (validateInstruction(Inst, Operands)) + return true; + + Out.EmitInstruction(Inst); + return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + } + + llvm_unreachable("Implement any new match types added!"); + return true; +} + +void AArch64Operand::print(raw_ostream &OS) const { + switch (Kind) { + case k_CondCode: + OS << "<CondCode: " << CondCode.Code << ">"; + break; + case k_FPImmediate: + OS << "<fpimm: " << FPImm.Val << ">"; + break; + case k_ImmWithLSL: + OS << "<immwithlsl: imm=" << ImmWithLSL.Val + << ", shift=" << ImmWithLSL.ShiftAmount << ">"; + break; + case k_Immediate: + getImm()->print(OS); + break; + case k_Register: + OS << "<register " << getReg() << '>'; + break; + case k_Token: + OS << '\'' << getToken() << '\''; + break; + case k_ShiftExtend: + OS << "<shift: type=" << ShiftExtend.ShiftType + << ", amount=" << ShiftExtend.Amount << ">"; + break; + case k_SysReg: { + StringRef Name(SysReg.Data, SysReg.Length); + OS << "<sysreg: " << Name << '>'; + break; + } + default: + llvm_unreachable("No idea how to print this kind of operand"); + break; + } +} + +void AArch64Operand::dump() const { + print(errs()); +} + + +/// Force static initialization. +extern "C" void LLVMInitializeAArch64AsmParser() { + RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64Target); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "AArch64GenAsmMatcher.inc" diff --git a/lib/Target/AArch64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt new file mode 100644 index 0000000..a018a0a --- /dev/null +++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64AsmParser + AArch64AsmParser.cpp + ) + +add_dependencies(LLVMAArch64AsmParser AArch64CommonTableGen) diff --git a/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt new file mode 100644 index 0000000..bd1fcaf --- /dev/null +++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64AsmParser +parent = AArch64 +required_libraries = AArch64Desc AArch64Info MC MCParser Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile new file mode 100644 index 0000000..56c9ef5 --- /dev/null +++ b/lib/Target/AArch64/AsmParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64AsmParser + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt new file mode 100644 index 0000000..a89861f --- /dev/null +++ b/lib/Target/AArch64/CMakeLists.txt @@ -0,0 +1,35 @@ +set(LLVM_TARGET_DEFINITIONS AArch64.td) + +tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv) +tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info) +tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering) +tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel) +tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget) +add_public_tablegen_target(AArch64CommonTableGen) + +add_llvm_target(AArch64CodeGen + AArch64AsmPrinter.cpp + AArch64ConstantIslandPass.cpp + AArch64FrameLowering.cpp + AArch64ISelDAGToDAG.cpp + AArch64ISelLowering.cpp + AArch64InstrInfo.cpp + AArch64MachineFunctionInfo.cpp + AArch64MCInstLower.cpp + AArch64RegisterInfo.cpp + AArch64SelectionDAGInfo.cpp + AArch64Subtarget.cpp + AArch64TargetMachine.cpp + AArch64TargetObjectFile.cpp + ) + +add_subdirectory(AsmParser) +add_subdirectory(Disassembler) +add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) +add_subdirectory(TargetInfo) diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp new file mode 100644 index 0000000..e98285b --- /dev/null +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -0,0 +1,791 @@ +//===- AArch64Disassembler.cpp - Disassembler for AArch64/Thumb ISA -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-disassembler" + +#include "AArch64.h" +#include "AArch64RegisterInfo.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +/// AArch64 disassembler for all AArch64 platforms. +class AArch64Disassembler : public MCDisassembler { + const MCRegisterInfo *RegInfo; +public: + /// Initializes the disassembler. + /// + AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) + : MCDisassembler(STI), RegInfo(Info) { + } + + ~AArch64Disassembler() { + } + + /// See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + const MCRegisterInfo *getRegInfo() const { return RegInfo; } +}; + +} + +// Forward-declarations used in the auto-generated files. +static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus +DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus +DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, + unsigned OptionHiS, + uint64_t Address, + const void *Decoder); + + +static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder); + +template<int RegWidth> +static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, + unsigned FullImm, + uint64_t Address, + const void *Decoder); + +template<int RegWidth> +static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, + unsigned Bits, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder); + +static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +template<typename SomeNamedImmMapper> +static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper, + llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + + +static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + + +static bool Check(DecodeStatus &Out, DecodeStatus In); + +#include "AArch64GenDisassemblerTables.inc" +#include "AArch64GenInstrInfo.inc" + +static bool Check(DecodeStatus &Out, DecodeStatus In) { + switch (In) { + case MCDisassembler::Success: + // Out stays the same. + return true; + case MCDisassembler::SoftFail: + Out = In; + return true; + case MCDisassembler::Fail: + Out = In; + return false; + } + llvm_unreachable("Invalid DecodeStatus!"); +} + +DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + CommentStream = &cs; + + uint8_t bytes[4]; + + // We want to read exactly 4 bytes of data. + if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + // Encoded as a small-endian 32-bit word in the stream. + uint32_t insn = (bytes[3] << 24) | + (bytes[2] << 16) | + (bytes[1] << 8) | + (bytes[0] << 0); + + // Calling the auto-generated decoder function. + DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); + Size = 0; + return MCDisassembler::Fail; +} + +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D); + return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo); +} + +static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + + +static DecodeStatus +DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + + +static DecodeStatus +DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + + +static DecodeStatus +DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, + unsigned OptionHiS, + uint64_t Address, + const void *Decoder) { + // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1}, + // S}. Hence we want to check bit 1. + if (!(OptionHiS & 2)) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(OptionHiS)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder) { + // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be + // between 0 and 31. + if (Imm6Bits > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder) { + // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32. + if (Imm6Bits < 32) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); + return MCDisassembler::Success; +} + + +template<int RegWidth> +static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, + unsigned FullImm, + uint64_t Address, + const void *Decoder) { + unsigned Imm16 = FullImm & 0xffff; + unsigned Shift = FullImm >> 16; + + if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm16)); + Inst.addOperand(MCOperand::CreateImm(Shift)); + return MCDisassembler::Success; +} + +template<int RegWidth> +static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, + unsigned Bits, + uint64_t Address, + const void *Decoder) { + uint64_t Imm; + if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm)) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Bits)); + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder) { + // Only values 0-4 are valid for this 3-bit field + if (ShiftAmount > 4) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); + return MCDisassembler::Success; +} + +static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder) { + // Only values below 32 are valid for a 32-bit register + if (ShiftAmount > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned ImmS = fieldFromInstruction(Insn, 10, 6); + unsigned ImmR = fieldFromInstruction(Insn, 16, 6); + unsigned SF = fieldFromInstruction(Insn, 31, 1); + + // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise + // out assertions that it thinks should never be hit. + enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc; + Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2); + + if (!SF) { + // ImmR and ImmS must be between 0 and 31 for 32-bit instructions. + if (ImmR > 31 || ImmS > 31) + return MCDisassembler::Fail; + } + + if (SF) { + DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + // BFM MCInsts use Rd as a source too. + if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); + } else { + DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); + // BFM MCInsts use Rd as a source too. + if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder); + } + + // ASR and LSR have more specific patterns so they won't get here: + assert(!(ImmS == 31 && !SF && Opc != BFM) && "shift should have used auto decode"); + assert(!(ImmS == 63 && SF && Opc != BFM) && "shift should have used auto decode"); + + // Extension instructions similarly: + if (Opc == SBFM && ImmR == 0) { + assert((ImmS != 7 && ImmS != 15) && "extension got here"); + assert((ImmS != 31 || SF == 0) && "extension got here"); + } else if (Opc == UBFM && ImmR == 0) { + assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here"); + } + + if (Opc == UBFM) { + // It might be a LSL instruction, which actually takes the shift amount + // itself as an MCInst operand. + if (SF && (ImmS + 1) % 64 == ImmR) { + Inst.setOpcode(AArch64::LSLxxi); + Inst.addOperand(MCOperand::CreateImm(63 - ImmS)); + return MCDisassembler::Success; + } else if (!SF && (ImmS + 1) % 32 == ImmR) { + Inst.setOpcode(AArch64::LSLwwi); + Inst.addOperand(MCOperand::CreateImm(31 - ImmS)); + return MCDisassembler::Success; + } + } + + // Otherwise it's definitely either an extract or an insert depending on which + // of ImmR or ImmS is larger. + unsigned ExtractOp, InsertOp; + switch (Opc) { + default: llvm_unreachable("unexpected instruction trying to decode bitfield"); + case SBFM: + ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii; + InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii; + break; + case BFM: + ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii; + InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii; + break; + case UBFM: + ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii; + InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii; + break; + } + + // Otherwise it's a boring insert or extract + Inst.addOperand(MCOperand::CreateImm(ImmR)); + Inst.addOperand(MCOperand::CreateImm(ImmS)); + + + if (ImmS < ImmR) + Inst.setOpcode(InsertOp); + else + Inst.setOpcode(ExtractOp); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + // This decoder exists to add the dummy Lane operand to the MCInst, which must + // be 1 in assembly but has no other real manifestation. + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned IsToVec = fieldFromInstruction(Insn, 16, 1); + + if (IsToVec) { + DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); + } else { + DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder); + } + + // Add the lane + Inst.addOperand(MCOperand::CreateImm(1)); + + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + DecodeStatus Result = MCDisassembler::Success; + unsigned Rt = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned Rt2 = fieldFromInstruction(Insn, 10, 5); + unsigned SImm7 = fieldFromInstruction(Insn, 15, 7); + unsigned L = fieldFromInstruction(Insn, 22, 1); + unsigned V = fieldFromInstruction(Insn, 26, 1); + unsigned Opc = fieldFromInstruction(Insn, 30, 2); + + // Not an official name, but it turns out that bit 23 distinguishes indexed + // from non-indexed operations. + unsigned Indexed = fieldFromInstruction(Insn, 23, 1); + + if (Indexed && L == 0) { + // The MCInst for an indexed store has an out operand and 4 ins: + // Rn_wb, Rt, Rt2, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + // You shouldn't load to the same register twice in an instruction... + if (L && Rt == Rt2) + Result = MCDisassembler::SoftFail; + + // ... or do any operation that writes-back to a transfer register. But note + // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different. + if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn)) + Result = MCDisassembler::SoftFail; + + // Exactly how we decode the MCInst's registers depends on the Opc and V + // fields of the instruction. These also obviously determine the size of the + // operation so we can fill in that information while we're at it. + if (V) { + // The instruction operates on the FP/SIMD registers + switch (Opc) { + default: return MCDisassembler::Fail; + case 0: + DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 1: + DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 2: + DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder); + break; + } + } else { + switch (Opc) { + default: return MCDisassembler::Fail; + case 0: + DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 1: + assert(L && "unexpected \"store signed\" attempt"); + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 2: + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + } + } + + if (Indexed && L == 1) { + // The MCInst for an indexed load has 3 out operands and an 3 ins: + // Rt, Rt2, Rn_wb, Rt2, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(SImm7)); + + return Result; +} + +static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, + uint32_t Val, + uint64_t Address, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(Val, 0, 5); + unsigned Rn = fieldFromInstruction(Val, 5, 5); + unsigned Rt2 = fieldFromInstruction(Val, 10, 5); + unsigned MemSize = fieldFromInstruction(Val, 30, 2); + + DecodeStatus S = MCDisassembler::Success; + if (Rt == Rt2) S = MCDisassembler::SoftFail; + + switch (MemSize) { + case 2: + if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + break; + case 3: + if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction"); + } + + if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +template<typename SomeNamedImmMapper> +static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + SomeNamedImmMapper Mapper; + bool ValidNamed; + Mapper.toString(Val, ValidNamed); + if (ValidNamed || Mapper.validImm(Val)) { + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; + } + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + bool ValidNamed; + Mapper.toString(Val, ValidNamed); + + Inst.addOperand(MCOperand::CreateImm(Val)); + + return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail; +} + +static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address, + Decoder); +} + +static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address, + Decoder); +} + +static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned Imm9 = fieldFromInstruction(Insn, 12, 9); + + unsigned Opc = fieldFromInstruction(Insn, 22, 2); + unsigned V = fieldFromInstruction(Insn, 26, 1); + unsigned Size = fieldFromInstruction(Insn, 30, 2); + + if (Opc == 0 || (V == 1 && Opc == 2)) { + // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + if (V == 0 && (Opc == 2 || Size == 3)) { + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + } else if (V == 0) { + DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); + } else if (V == 1 && (Opc & 2)) { + DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + } else { + switch (Size) { + case 0: + DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder); + break; + case 1: + DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder); + break; + case 2: + DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); + break; + case 3: + DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); + break; + } + } + + if (Opc != 0 && (V != 1 || Opc != 2)) { + // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + + Inst.addOperand(MCOperand::CreateImm(Imm9)); + + // N.b. The official documentation says undpredictable if Rt == Rn, but this + // takes place at the architectural rather than encoding level: + // + // "STR xzr, [sp], #4" is perfectly valid. + if (V == 0 && Rt == Rn && Rn != 31) + return MCDisassembler::SoftFail; + else + return MCDisassembler::Success; +} + +static MCDisassembler *createAArch64Disassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new AArch64Disassembler(STI, T.createMCRegInfo("")); +} + +extern "C" void LLVMInitializeAArch64Disassembler() { + TargetRegistry::RegisterMCDisassembler(TheAArch64Target, + createAArch64Disassembler); +} + + diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt new file mode 100644 index 0000000..d4bd163 --- /dev/null +++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64Disassembler + AArch64Disassembler.cpp + ) + +add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen) diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000..123eb3e --- /dev/null +++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Disassembler +parent = AArch64 +required_libraries = AArch64CodeGen AArch64Desc AArch64Info MC Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile new file mode 100644 index 0000000..5c86120 --- /dev/null +++ b/lib/Target/AArch64/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Disassembler + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp new file mode 100644 index 0000000..909810f --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -0,0 +1,408 @@ +//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an AArch64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#define PRINT_ALIAS_INSTR +#include "AArch64GenAsmWriter.inc" + +static int64_t unpackSignedImm(int BitWidth, uint64_t Value) { + assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit"); + if (Value & (1ULL << (BitWidth - 1))) + return static_cast<int64_t>(Value) - (1LL << BitWidth); + else + return Value; +} + +AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) : + MCInstPrinter(MAI, MII, MRI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + +void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << getRegisterName(RegNo); +} + +void +AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O) { + const MCOperand &MOImm = MI->getOperand(OpNum); + int32_t Imm = unpackSignedImm(9, MOImm.getImm()); + + O << '#' << Imm; +} + +void +AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, unsigned MemSize, + unsigned RmSize) { + unsigned ExtImm = MI->getOperand(OpNum).getImm(); + unsigned OptionHi = ExtImm >> 1; + unsigned S = ExtImm & 1; + bool IsLSL = OptionHi == 1 && RmSize == 64; + + const char *Ext; + switch (OptionHi) { + case 1: + Ext = (RmSize == 32) ? "uxtw" : "lsl"; + break; + case 3: + Ext = (RmSize == 32) ? "sxtw" : "sxtx"; + break; + default: + llvm_unreachable("Incorrect Option on load/store (reg offset)"); + } + O << Ext; + + if (S) { + unsigned ShiftAmt = Log2_32(MemSize); + O << " #" << ShiftAmt; + } else if (IsLSL) { + O << " #0"; + } +} + +void +AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O) { + const MCOperand &Imm12Op = MI->getOperand(OpNum); + + if (Imm12Op.isImm()) { + int64_t Imm12 = Imm12Op.getImm(); + assert(Imm12 >= 0 && "Invalid immediate for add/sub imm"); + O << "#" << Imm12; + } else { + assert(Imm12Op.isExpr() && "Unexpected shift operand type"); + O << "#" << *Imm12Op.getExpr(); + } +} + +void +AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + + printAddSubImmLSL0Operand(MI, OpNum, O); + + O << ", lsl #12"; +} + +void +AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + O << MO.getImm(); +} + +template<unsigned RegWidth> void +AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmROp = MI->getOperand(OpNum); + unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm(); + + O << '#' << LSB; +} + +void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmSOp = MI->getOperand(OpNum); + unsigned Width = ImmSOp.getImm() + 1; + + O << '#' << Width; +} + +void +AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmSOp = MI->getOperand(OpNum); + const MCOperand &ImmROp = MI->getOperand(OpNum - 1); + + unsigned ImmR = ImmROp.getImm(); + unsigned ImmS = ImmSOp.getImm(); + + assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract"); + + O << '#' << (ImmS - ImmR + 1); +} + +void +AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &CRx = MI->getOperand(OpNum); + + O << 'c' << CRx.getImm(); +} + + +void +AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ScaleOp = MI->getOperand(OpNum); + + O << '#' << (64 - ScaleOp.getImm()); +} + + +void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &o) { + const MCOperand &MOImm8 = MI->getOperand(OpNum); + + assert(MOImm8.isImm() + && "Immediate operand required for floating-point immediate inst"); + + uint32_t Imm8 = MOImm8.getImm(); + uint32_t Fraction = Imm8 & 0xf; + uint32_t Exponent = (Imm8 >> 4) & 0x7; + uint32_t Negative = (Imm8 >> 7) & 0x1; + + float Val = 1.0f + Fraction / 16.0f; + + // That is: + // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4, + // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0 + if (Exponent & 0x4) { + Val /= 1 << (7 - Exponent); + } else { + Val *= 1 << (Exponent + 1); + } + + Val = Negative ? -Val : Val; + + o << '#' << format("%.8f", Val); +} + +void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &o) { + o << "#0.0"; +} + +void +AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm())); +} + +template <unsigned field_width, unsigned scale> void +AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + if (!MO.isImm()) { + printOperand(MI, OpNum, O); + return; + } + + // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which + // is multiplied by 4 (because all A64 instructions are 32-bits wide). + uint64_t UImm = MO.getImm(); + uint64_t Sign = UImm & (1LL << (field_width - 1)); + int64_t SImm = scale * ((UImm & ~Sign) - Sign); + + O << "#" << SImm; +} + +template<unsigned RegWidth> void +AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + uint64_t Val; + A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val); + O << "#0x"; + O.write_hex(Val); +} + +void +AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, int MemSize) { + const MCOperand &MOImm = MI->getOperand(OpNum); + + if (MOImm.isImm()) { + uint32_t Imm = MOImm.getImm() * MemSize; + + O << "#" << Imm; + } else { + O << "#" << *MOImm.getExpr(); + } +} + +void +AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, + A64SE::ShiftExtSpecifiers Shift) { + const MCOperand &MO = MI->getOperand(OpNum); + + // LSL #0 is not printed + if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0) + return; + + switch (Shift) { + case A64SE::LSL: O << "lsl"; break; + case A64SE::LSR: O << "lsr"; break; + case A64SE::ASR: O << "asr"; break; + case A64SE::ROR: O << "ror"; break; + default: llvm_unreachable("Invalid shift specifier in logical instruction"); + } + + O << " #" << MO.getImm(); +} + +void +AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &UImm16MO = MI->getOperand(OpNum); + const MCOperand &ShiftMO = MI->getOperand(OpNum + 1); + + if (UImm16MO.isImm()) { + O << '#' << UImm16MO.getImm(); + + if (ShiftMO.getImm() != 0) + O << ", lsl #" << (ShiftMO.getImm() * 16); + + return; + } + + O << "#" << *UImm16MO.getExpr(); +} + +void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + bool ValidName; + const MCOperand &MO = MI->getOperand(OpNum); + StringRef Name = Mapper.toString(MO.getImm(), ValidName); + + if (ValidName) + O << Name; + else + O << '#' << MO.getImm(); +} + +void +AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + bool ValidName; + std::string Name = Mapper.toString(MO.getImm(), ValidName); + if (ValidName) { + O << Name; + return; + } +} + + +void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O, + A64SE::ShiftExtSpecifiers Ext) { + // FIXME: In principle TableGen should be able to detect this itself far more + // easily. We will only accumulate more of these hacks. + unsigned Reg0 = MI->getOperand(0).getReg(); + unsigned Reg1 = MI->getOperand(1).getReg(); + + if (isStackReg(Reg0) || isStackReg(Reg1)) { + A64SE::ShiftExtSpecifiers LSLEquiv; + + if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP) + LSLEquiv = A64SE::UXTX; + else + LSLEquiv = A64SE::UXTW; + + if (Ext == LSLEquiv) { + O << "lsl #" << MI->getOperand(OpNum).getImm(); + return; + } + } + + switch (Ext) { + case A64SE::UXTB: O << "uxtb"; break; + case A64SE::UXTH: O << "uxth"; break; + case A64SE::UXTW: O << "uxtw"; break; + case A64SE::UXTX: O << "uxtx"; break; + case A64SE::SXTB: O << "sxtb"; break; + case A64SE::SXTH: O << "sxth"; break; + case A64SE::SXTW: O << "sxtw"; break; + case A64SE::SXTX: O << "sxtx"; break; + default: llvm_unreachable("Unexpected shift type for printing"); + } + + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.getImm() != 0) + O << " #" << MO.getImm(); +} + +template<int MemScale> void +AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MOImm = MI->getOperand(OpNum); + int32_t Imm = unpackSignedImm(7, MOImm.getImm()); + + O << "#" << (Imm * MemScale); +} + +void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + O << getRegisterName(Reg); + } else if (Op.isImm()) { + O << '#' << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + // If a symbolic branch target was added as a constant expression then print + // that address in hex. + const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); + int64_t Address; + if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + O << "0x"; + O.write_hex(Address); + } + else { + // Otherwise, just print the expression. + O << *Op.getExpr(); + } + } +} + + +void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + if (MI->getOpcode() == AArch64::TLSDESCCALL) { + // This is a special assembler directive which applies an + // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed + // form outside the normal TableGenerated scheme. + O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr(); + } else if (!printAliasInstr(MI, O)) + printInstruction(MI, O); + + printAnnotation(O, Annot); +} diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h new file mode 100644 index 0000000..1890082 --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -0,0 +1,171 @@ +//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an AArch64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64INSTPRINTER_H +#define LLVM_AARCH64INSTPRINTER_H + +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { + +class MCOperand; + +class AArch64InstPrinter : public MCInstPrinter { +public: + AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + + // Autogenerated by tblgen + void printInstruction(const MCInst *MI, raw_ostream &O); + bool printAliasInstr(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); + + void printRegName(raw_ostream &O, unsigned RegNum) const; + + template<unsigned MemSize, unsigned RmSize> + void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize); + } + + + void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, unsigned MemSize, + unsigned RmSize); + + void printAddSubImmLSL0Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O); + void printAddSubImmLSL12Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O); + + void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template<unsigned RegWidth> + void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + + void printCondCodeOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printCRxOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); + + void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); + + template<int MemScale> + void printOffsetUImm12Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &o) { + printOffsetUImm12Operand(MI, OpNum, o, MemScale); + } + + void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &o, int MemScale); + + template<unsigned field_width, unsigned scale> + void printLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + template<unsigned RegWidth> + void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template<typename SomeNamedImmMapper> + void printNamedImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O); + } + + void printNamedImmOperand(const NamedImmMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printMRSOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O); + } + + void printMSROperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O); + } + + void printShiftOperand(const char *name, const MCInst *MI, + unsigned OpIdx, raw_ostream &O); + + void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("lsr", MI, OpNum, O); + } + void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("asr", MI, OpNum, O); + } + void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("ror", MI, OpNum, O); + } + + template<A64SE::ShiftExtSpecifiers Shift> + void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand(MI, OpNum, O, Shift); + } + + void printShiftOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, A64SE::ShiftExtSpecifiers Sh); + + + void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + template<int MemSize> void + printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template<A64SE::ShiftExtSpecifiers EXT> + void printRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printRegExtendOperand(MI, OpNum, O, EXT); + } + + void printRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + + bool isStackReg(unsigned RegNo) { + return RegNo == AArch64::XSP || RegNo == AArch64::WSP; + } + + +}; + +} + +#endif diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..d4b980a --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64AsmPrinter + AArch64InstPrinter.cpp + ) + +add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen) + diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..40fdc55 --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64AsmPrinter +parent = AArch64 +required_libraries = MC Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile new file mode 100644 index 0000000..1c36a8d --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64AsmPrinter + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt new file mode 100644 index 0000000..09c7448 --- /dev/null +++ b/lib/Target/AArch64/LLVMBuild.txt @@ -0,0 +1,36 @@ +;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = AArch64 +parent = Target +has_asmparser = 1 +has_asmprinter = 1 +has_disassembler = 1 +;has_jit = 1 + +[component_1] +type = Library +name = AArch64CodeGen +parent = AArch64 +required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AsmPrinter CodeGen Core MC SelectionDAG Support Target +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp new file mode 100644 index 0000000..1c09369 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -0,0 +1,580 @@ +//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class AArch64AsmBackend : public MCAsmBackend { + const MCSubtargetInfo* STI; +public: + AArch64AsmBackend(const Target &T, const StringRef TT) + : MCAsmBackend(), + STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", "")) + {} + + + ~AArch64AsmBackend() { + delete STI; + } + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; + + virtual void processFixupValue(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved); +}; +} // end anonymous namespace + +void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, + const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved) { + // The ADRP instruction adds some multiple of 0x1000 to the current PC & + // ~0xfff. This means that the required offset to reach a symbol can vary by + // up to one step depending on where the ADRP is in memory. For example: + // + // ADRP x0, there + // there: + // + // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and + // we'll need that as an offset. At any other address "there" will be in the + // same page as the ADRP and the instruction should encode 0x0. Assuming the + // section isn't 0x1000-aligned, we therefore need to delegate this decision + // to the linker -- a relocation! + if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page) + IsResolved = false; +} + + +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value); + +namespace { + +class ELFAArch64AsmBackend : public AArch64AsmBackend { +public: + uint8_t OSABI; + ELFAArch64AsmBackend(const Target &T, const StringRef TT, + uint8_t _OSABI) + : AArch64AsmBackend(T, TT), OSABI(_OSABI) { } + + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const; + + unsigned int getNumFixupKinds() const { + return AArch64::NumTargetFixupKinds; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { +// This table *must* be in the order that the fixup_* kinds are defined in +// AArch64FixupKinds.h. +// +// Name Offset (bits) Size (bits) Flags + { "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_add_lo12", 0, 32, 0 }, + { "fixup_a64_ldst8_lo12", 0, 32, 0 }, + { "fixup_a64_ldst16_lo12", 0, 32, 0 }, + { "fixup_a64_ldst32_lo12", 0, 32, 0 }, + { "fixup_a64_ldst64_lo12", 0, 32, 0 }, + { "fixup_a64_ldst128_lo12", 0, 32, 0 }, + { "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_movw_uabs_g0", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g1", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g2", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 }, + { "fixup_a64_movw_uabs_g3", 0, 32, 0 }, + { "fixup_a64_movw_sabs_g0", 0, 32, 0 }, + { "fixup_a64_movw_sabs_g1", 0, 32, 0 }, + { "fixup_a64_movw_sabs_g2", 0, 32, 0 }, + { "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 }, + { "fixup_a64_movw_dtprel_g2", 0, 32, 0 }, + { "fixup_a64_movw_dtprel_g1", 0, 32, 0 }, + { "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 }, + { "fixup_a64_movw_dtprel_g0", 0, 32, 0 }, + { "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 }, + { "fixup_a64_add_dtprel_hi12", 0, 32, 0 }, + { "fixup_a64_add_dtprel_lo12", 0, 32, 0 }, + { "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_movw_gottprel_g1", 0, 32, 0 }, + { "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 }, + { "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_movw_tprel_g2", 0, 32, 0 }, + { "fixup_a64_movw_tprel_g1", 0, 32, 0 }, + { "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 }, + { "fixup_a64_movw_tprel_g0", 0, 32, 0 }, + { "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 }, + { "fixup_a64_add_tprel_hi12", 0, 32, 0 }, + { "fixup_a64_add_tprel_lo12", 0, 32, 0 }, + { "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 }, + { "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 }, + { "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 }, + { "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 }, + { "fixup_a64_tlsdesc_call", 0, 0, 0 } + }; + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8; + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the bits + // from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) { + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + } + } + + bool mayNeedRelaxation(const MCInst&) const { + return false; + } + + void relaxInstruction(const MCInst&, llvm::MCInst&) const { + llvm_unreachable("Cannot relax instructions"); + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createAArch64ELFObjectWriter(OS, OSABI); + } +}; + +} // end anonymous namespace + +bool +ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + // Correct for now. With all instructions 32-bit only very low-level + // considerations could make you select something which may fail. + return false; +} + + +bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { + // Can't emit NOP with size not multiple of 32-bits + if (Count % 4 != 0) + return false; + + uint64_t NumNops = Count / 4; + for (uint64_t i = 0; i != NumNops; ++i) + OW->Write32(0xd503201f); + + return true; +} + +static unsigned ADRImmBits(unsigned Value) { + unsigned lo2 = Value & 0x3; + unsigned hi19 = (Value & 0x1fffff) >> 2; + + return (hi19 << 5) | (lo2 << 29); +} + +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_2: + assert((int64_t)Value >= -32768 && + (int64_t)Value <= 65536 && + "Out of range ABS16 fixup"); + return Value; + case FK_Data_4: + assert((int64_t)Value >= -(1LL << 31) && + (int64_t)Value <= (1LL << 32) - 1 && + "Out of range ABS32 fixup"); + return Value; + case FK_Data_8: + return Value; + + case AArch64::fixup_a64_ld_gottprel_prel19: + // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F + // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20. + case AArch64::fixup_a64_ld_prel: + // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits + // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup"); + return (Value & 0x1ffffc) << 3; + + case AArch64::fixup_a64_adr_prel: + // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of + // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range ADR fixup"); + return ADRImmBits(Value & 0x1fffff); + + case AArch64::fixup_a64_adr_prel_page: + // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF + // F000 of the result of the operation, checking that -2^32 <= result < + // 2^32. + assert((int64_t)Value >= -(1LL << 32) && + (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); + return ADRImmBits((Value & 0x1fffff000ULL) >> 12); + + case AArch64::fixup_a64_add_dtprel_hi12: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits + // FF F000 of DTPREL(S+A), check 0 <= X < 2^24. + case AArch64::fixup_a64_add_tprel_hi12: + // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits + // FF F000 of TPREL(S+A), check 0 <= X < 2^24. + assert((int64_t)Value >= 0 && + (int64_t)Value < (1LL << 24) && "Out of range ADD fixup"); + return (Value & 0xfff000) >> 2; + + case AArch64::fixup_a64_add_dtprel_lo12: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits + // FFF of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_add_tprel_lo12: + // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits + // FFF of TPREL(S+A), check 0 <= X < 2^12. + assert((int64_t)Value >= 0 && + (int64_t)Value < (1LL << 12) && "Out of range ADD fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_add_dtprel_lo12_nc: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits + // FFF of DTPREL(S+A) with no overflow check. + case AArch64::fixup_a64_add_tprel_lo12_nc: + // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits + // FFF of TPREL(S+A) with no overflow check. + case AArch64::fixup_a64_tlsdesc_add_lo12_nc: + // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits + // FFF of G(TLSDESC(S+A)), with no overflow check. + case AArch64::fixup_a64_add_lo12: + // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of + // S+A, with no overflow check. + return (Value & 0xfff) << 10; + + case AArch64::fixup_a64_ldst8_dtprel_lo12: + // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst8_tprel_lo12: + // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst8_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst8_lo12: + // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF + // of S+A, with no overflow check. + return (Value & 0xfff) << 10; + + case AArch64::fixup_a64_ldst16_dtprel_lo12: + // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst16_tprel_lo12: + // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst16_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst16_lo12: + // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE + // of S+A, with no overflow check. + return (Value & 0xffe) << 9; + + case AArch64::fixup_a64_ldst32_dtprel_lo12: + // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst32_tprel_lo12: + // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst32_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst32_lo12: + // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC + // of S+A, with no overflow check. + return (Value & 0xffc) << 8; + + case AArch64::fixup_a64_ldst64_dtprel_lo12: + // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst64_tprel_lo12: + // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst64_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst64_lo12: + // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8 + // of S+A, with no overflow check. + return (Value & 0xff8) << 7; + + case AArch64::fixup_a64_ldst128_lo12: + // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0 + // of S+A, with no overflow check. + return (Value & 0xff0) << 6; + + case AArch64::fixup_a64_movw_uabs_g0: + // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A + // with a check that S+A < 2^16 + assert(Value <= 0xffff && "Out of range move wide fixup"); + return (Value & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g0_nc: + // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of DTPREL(S+A) with no overflow check. + case AArch64::fixup_a64_movw_gottprel_g0_nc: + // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of G(TPREL(S+A)) - GOT with no overflow check. + case AArch64::fixup_a64_movw_tprel_g0_nc: + // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of TPREL(S+A) with no overflow check. + case AArch64::fixup_a64_movw_uabs_g0_nc: + // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of + // S+A with no overflow check. + return (Value & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g1: + // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of + // S+A with a check that S+A < 2^32 + assert(Value <= 0xffffffffull && "Out of range move wide fixup"); + return ((Value >> 16) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g1_nc: + // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field + // to bits FFFF0000 of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_movw_tprel_g1_nc: + // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field + // to bits FFFF0000 of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_movw_uabs_g1_nc: + // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits + // FFFF0000 of S+A with no overflow check. + return ((Value >> 16) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g2: + // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000 + // 0000 of S+A with a check that S+A < 2^48 + assert(Value <= 0xffffffffffffull && "Out of range move wide fixup"); + return ((Value >> 32) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g2_nc: + // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000 + // 0000 of S+A with no overflow check. + return ((Value >> 32) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g3: + // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000 + // 0000 0000 of S+A (no overflow check needed) + return ((Value >> 48) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g0: + // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field + // to bits FFFF of DTPREL(S+A). + case AArch64::fixup_a64_movw_tprel_g0: + // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to + // bits FFFF of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g0: { + // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of + // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we + // should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 16) && Signed < (1LL << 16) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = (Value & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + // MCCodeEmitter should have encoded a MOVN, which is fine. + Value = (~Value & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_movw_dtprel_g1: + // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field + // to bits FFFF0000 of DTPREL(S+A). + case AArch64::fixup_a64_movw_gottprel_g1: + // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field + // to bits FFFF0000 of G(TPREL(S+A)) - GOT. + case AArch64::fixup_a64_movw_tprel_g1: + // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to + // bits FFFF0000 of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g1: { + // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000 + // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we + // should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 32) && Signed < (1LL << 32) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = ((Value >> 16) & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + Value = ((~Value >> 16) & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_movw_dtprel_g2: + // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field + // to bits FFFF 0000 0000 of DTPREL(S+A). + case AArch64::fixup_a64_movw_tprel_g2: + // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to + // bits FFFF 0000 0000 of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g2: { + // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000 + // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that + // we should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 48) && Signed < (1LL << 48) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = ((Value >> 32) & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + Value = ((~Value >> 32) & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_tstbr: + // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to + // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15. + assert((int64_t)Value >= -(1LL << 15) && + (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup"); + return (Value & 0xfffc) << (5 - 2); + + case AArch64::fixup_a64_condbr: + // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch + // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup"); + return (Value & 0x1ffffc) << (5 - 2); + + case AArch64::fixup_a64_uncondbr: + // R_AARCH64_JUMP26 same as below (except to a linker, possibly). + case AArch64::fixup_a64_call: + // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P, + // checking that -2^27 <= S+A-P < 2^27. + assert((int64_t)Value >= -(1LL << 27) && + (int64_t)Value < (1LL << 27) && "Out of range branch fixup"); + return (Value & 0xffffffc) >> 2; + + case AArch64::fixup_a64_adr_gottprel_page: + // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits + // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32. + case AArch64::fixup_a64_tlsdesc_adr_page: + // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000 + // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32. + case AArch64::fixup_a64_adr_prel_got_page: + // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits + // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) < + // 2^32. + assert((int64_t)Value >= -(1LL << 32) && + (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); + return ADRImmBits((Value & 0x1fffff000) >> 12); + + case AArch64::fixup_a64_ld64_gottprel_lo12_nc: + // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8 + // of X, with no overflow check. Check that X & 7 == 0. + case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: + // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of + // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0. + case AArch64::fixup_a64_ld64_got_lo12_nc: + // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of + // G(S) with no overflow check. Check X & 7 == 0 + assert(((int64_t)Value & 7) == 0 && "Misaligned fixup"); + return (Value & 0xff8) << 7; + + case AArch64::fixup_a64_tlsdesc_call: + // R_AARCH64_TLSDESC_CALL: For relaxation only. + return 0; + } +} + +MCAsmBackend * +llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) { + Triple TheTriple(TT); + + return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS()); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h new file mode 100644 index 0000000..b71eb0d --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h @@ -0,0 +1,779 @@ +//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the AArch64 target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_BASEINFO_H +#define LLVM_AARCH64_BASEINFO_H + +#include "AArch64MCTargetDesc.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +// // Enums corresponding to AArch64 condition codes +namespace A64CC { + // The CondCodes constants map directly to the 4-bit encoding of the + // condition field for predicated instructions. + enum CondCodes { // Meaning (integer) Meaning (floating-point) + EQ = 0, // Equal Equal + NE, // Not equal Not equal, or unordered + HS, // Unsigned higher or same >, ==, or unordered + LO, // Unsigned lower or same Less than + MI, // Minus, negative Less than + PL, // Plus, positive or zero >, ==, or unordered + VS, // Overflow Unordered + VC, // No overflow Ordered + HI, // Unsigned higher Greater than, or unordered + LS, // Unsigned lower or same Less than or equal + GE, // Greater than or equal Greater than or equal + LT, // Less than Less than, or unordered + GT, // Signed greater than Greater than + LE, // Signed less than or equal <, ==, or unordered + AL, // Always (unconditional) Always (unconditional) + NV, // Always (unconditional) Always (unconditional) + // Note the NV exists purely to disassemble 0b1111. Execution + // is "always". + Invalid + }; + +} // namespace A64CC + +inline static const char *A64CondCodeToString(A64CC::CondCodes CC) { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case A64CC::EQ: return "eq"; + case A64CC::NE: return "ne"; + case A64CC::HS: return "hs"; + case A64CC::LO: return "lo"; + case A64CC::MI: return "mi"; + case A64CC::PL: return "pl"; + case A64CC::VS: return "vs"; + case A64CC::VC: return "vc"; + case A64CC::HI: return "hi"; + case A64CC::LS: return "ls"; + case A64CC::GE: return "ge"; + case A64CC::LT: return "lt"; + case A64CC::GT: return "gt"; + case A64CC::LE: return "le"; + case A64CC::AL: return "al"; + case A64CC::NV: return "nv"; + } +} + +inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) { + return StringSwitch<A64CC::CondCodes>(CondStr.lower()) + .Case("eq", A64CC::EQ) + .Case("ne", A64CC::NE) + .Case("ne", A64CC::NE) + .Case("hs", A64CC::HS) + .Case("cs", A64CC::HS) + .Case("lo", A64CC::LO) + .Case("cc", A64CC::LO) + .Case("mi", A64CC::MI) + .Case("pl", A64CC::PL) + .Case("vs", A64CC::VS) + .Case("vc", A64CC::VC) + .Case("hi", A64CC::HI) + .Case("ls", A64CC::LS) + .Case("ge", A64CC::GE) + .Case("lt", A64CC::LT) + .Case("gt", A64CC::GT) + .Case("le", A64CC::LE) + .Case("al", A64CC::AL) + .Case("nv", A64CC::NV) + .Default(A64CC::Invalid); +} + +inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) { + // It turns out that the condition codes have been designed so that in order + // to reverse the intent of the condition you only have to invert the low bit: + + return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1); +} + +/// Instances of this class can perform bidirectional mapping from random +/// identifier strings to operand encodings. For example "MSR" takes a named +/// system-register which must be encoded somehow and decoded for printing. This +/// central location means that the information for those transformations is not +/// duplicated and remains in sync. +/// +/// FIXME: currently the algorithm is a completely unoptimised linear +/// search. Obviously this could be improved, but we would probably want to work +/// out just how often these instructions are emitted before working on it. It +/// might even be optimal to just reorder the tables for the common instructions +/// rather than changing the algorithm. +struct NamedImmMapper { + struct Mapping { + const char *Name; + uint32_t Value; + }; + + template<int N> + NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) + : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} + + StringRef toString(uint32_t Value, bool &Valid) const; + uint32_t fromString(StringRef Name, bool &Valid) const; + + /// Many of the instructions allow an alternative assembly form consisting of + /// a simple immediate. Currently the only valid forms are ranges [0, N) where + /// N being 0 indicates no immediate syntax-form is allowed. + bool validImm(uint32_t Value) const; +protected: + const Mapping *Pairs; + size_t NumPairs; + uint32_t TooBigImm; +}; + +namespace A64AT { + enum ATValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + S1E1R = 0x43c0, // 01 000 0111 1000 000 + S1E2R = 0x63c0, // 01 100 0111 1000 000 + S1E3R = 0x73c0, // 01 110 0111 1000 000 + S1E1W = 0x43c1, // 01 000 0111 1000 001 + S1E2W = 0x63c1, // 01 100 0111 1000 001 + S1E3W = 0x73c1, // 01 110 0111 1000 001 + S1E0R = 0x43c2, // 01 000 0111 1000 010 + S1E0W = 0x43c3, // 01 000 0111 1000 011 + S12E1R = 0x63c4, // 01 100 0111 1000 100 + S12E1W = 0x63c5, // 01 100 0111 1000 101 + S12E0R = 0x63c6, // 01 100 0111 1000 110 + S12E0W = 0x63c7 // 01 100 0111 1000 111 + }; + + struct ATMapper : NamedImmMapper { + const static Mapping ATPairs[]; + + ATMapper(); + }; + +} +namespace A64DB { + enum DBValues { + Invalid = -1, + OSHLD = 0x1, + OSHST = 0x2, + OSH = 0x3, + NSHLD = 0x5, + NSHST = 0x6, + NSH = 0x7, + ISHLD = 0x9, + ISHST = 0xa, + ISH = 0xb, + LD = 0xd, + ST = 0xe, + SY = 0xf + }; + + struct DBarrierMapper : NamedImmMapper { + const static Mapping DBarrierPairs[]; + + DBarrierMapper(); + }; +} + +namespace A64DC { + enum DCValues { + Invalid = -1, // Op1 CRn CRm Op2 + ZVA = 0x5ba1, // 01 011 0111 0100 001 + IVAC = 0x43b1, // 01 000 0111 0110 001 + ISW = 0x43b2, // 01 000 0111 0110 010 + CVAC = 0x5bd1, // 01 011 0111 1010 001 + CSW = 0x43d2, // 01 000 0111 1010 010 + CVAU = 0x5bd9, // 01 011 0111 1011 001 + CIVAC = 0x5bf1, // 01 011 0111 1110 001 + CISW = 0x43f2 // 01 000 0111 1110 010 + }; + + struct DCMapper : NamedImmMapper { + const static Mapping DCPairs[]; + + DCMapper(); + }; + +} + +namespace A64IC { + enum ICValues { + Invalid = -1, // Op1 CRn CRm Op2 + IALLUIS = 0x0388, // 000 0111 0001 000 + IALLU = 0x03a8, // 000 0111 0101 000 + IVAU = 0x1ba9 // 011 0111 0101 001 + }; + + + struct ICMapper : NamedImmMapper { + const static Mapping ICPairs[]; + + ICMapper(); + }; + + static inline bool NeedsRegister(ICValues Val) { + return Val == IVAU; + } +} + +namespace A64ISB { + enum ISBValues { + Invalid = -1, + SY = 0xf + }; + struct ISBMapper : NamedImmMapper { + const static Mapping ISBPairs[]; + + ISBMapper(); + }; +} + +namespace A64PRFM { + enum PRFMValues { + Invalid = -1, + PLDL1KEEP = 0x00, + PLDL1STRM = 0x01, + PLDL2KEEP = 0x02, + PLDL2STRM = 0x03, + PLDL3KEEP = 0x04, + PLDL3STRM = 0x05, + PSTL1KEEP = 0x10, + PSTL1STRM = 0x11, + PSTL2KEEP = 0x12, + PSTL2STRM = 0x13, + PSTL3KEEP = 0x14, + PSTL3STRM = 0x15 + }; + + struct PRFMMapper : NamedImmMapper { + const static Mapping PRFMPairs[]; + + PRFMMapper(); + }; +} + +namespace A64PState { + enum PStateValues { + Invalid = -1, + SPSel = 0x05, + DAIFSet = 0x1e, + DAIFClr = 0x1f + }; + + struct PStateMapper : NamedImmMapper { + const static Mapping PStatePairs[]; + + PStateMapper(); + }; + +} + +namespace A64SE { + enum ShiftExtSpecifiers { + Invalid = -1, + LSL, + LSR, + ASR, + ROR, + + UXTB, + UXTH, + UXTW, + UXTX, + + SXTB, + SXTH, + SXTW, + SXTX + }; +} + +namespace A64SysReg { + enum SysRegROValues { + MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 + DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000 + MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000 + OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100 + DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110 + PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110 + PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111 + MIDR_EL1 = 0xc000, // 11 000 0000 0000 000 + CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000 + CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001 + CTR_EL0 = 0xd801, // 11 011 0000 0000 001 + MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101 + REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110 + AIDR_EL1 = 0xc807, // 11 001 0000 0000 111 + DCZID_EL0 = 0xd807, // 11 011 0000 0000 111 + ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000 + ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001 + ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010 + ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011 + ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100 + ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101 + ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110 + ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111 + ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000 + ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001 + ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010 + ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011 + ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100 + ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101 + ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 + ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 + ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 + ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 + ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 + ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 + ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 + ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 + ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 + ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 + MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 + MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 + MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 + RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001 + RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001 + RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001 + ISR_EL1 = 0xc608, // 11 000 1100 0001 000 + CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 + CNTVCT_EL0 = 0xdf02 // 11 011 1110 0000 010 + }; + + enum SysRegWOValues { + DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000 + OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100 + PMSWINC_EL0 = 0xdce4 // 11 011 1001 1100 100 + }; + + enum SysRegValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010 + OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010 + TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000 + MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000 + MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010 + DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000 + OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010 + DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000 + DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100 + DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100 + DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100 + DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100 + DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100 + DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100 + DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100 + DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100 + DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100 + DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100 + DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100 + DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100 + DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100 + DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100 + DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100 + DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100 + DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101 + DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101 + DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101 + DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101 + DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101 + DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101 + DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101 + DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101 + DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101 + DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101 + DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101 + DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101 + DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101 + DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101 + DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101 + DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101 + DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110 + DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110 + DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110 + DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110 + DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110 + DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110 + DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110 + DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110 + DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110 + DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110 + DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110 + DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110 + DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110 + DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110 + DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110 + DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110 + DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111 + DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111 + DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111 + DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111 + DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111 + DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111 + DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111 + DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111 + DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111 + DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111 + DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111 + DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111 + DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111 + DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111 + DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111 + DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111 + TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000 + OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100 + DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100 + DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110 + DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110 + CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000 + VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000 + VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101 + CPACR_EL1 = 0xc082, // 11 000 0001 0000 010 + SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000 + SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000 + SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000 + ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001 + ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001 + ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001 + HCR_EL2 = 0xe088, // 11 100 0001 0001 000 + SCR_EL3 = 0xf088, // 11 110 0001 0001 000 + MDCR_EL2 = 0xe089, // 11 100 0001 0001 001 + SDER32_EL3 = 0xf089, // 11 110 0001 0001 001 + CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010 + CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010 + HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011 + HACR_EL2 = 0xe08f, // 11 100 0001 0001 111 + MDCR_EL3 = 0xf099, // 11 110 0001 0011 001 + TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000 + TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000 + TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000 + TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001 + TCR_EL1 = 0xc102, // 11 000 0010 0000 010 + TCR_EL2 = 0xe102, // 11 100 0010 0000 010 + TCR_EL3 = 0xf102, // 11 110 0010 0000 010 + VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000 + VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010 + DACR32_EL2 = 0xe180, // 11 100 0011 0000 000 + SPSR_EL1 = 0xc200, // 11 000 0100 0000 000 + SPSR_EL2 = 0xe200, // 11 100 0100 0000 000 + SPSR_EL3 = 0xf200, // 11 110 0100 0000 000 + ELR_EL1 = 0xc201, // 11 000 0100 0000 001 + ELR_EL2 = 0xe201, // 11 100 0100 0000 001 + ELR_EL3 = 0xf201, // 11 110 0100 0000 001 + SP_EL0 = 0xc208, // 11 000 0100 0001 000 + SP_EL1 = 0xe208, // 11 100 0100 0001 000 + SP_EL2 = 0xf208, // 11 110 0100 0001 000 + SPSel = 0xc210, // 11 000 0100 0010 000 + NZCV = 0xda10, // 11 011 0100 0010 000 + DAIF = 0xda11, // 11 011 0100 0010 001 + CurrentEL = 0xc212, // 11 000 0100 0010 010 + SPSR_irq = 0xe218, // 11 100 0100 0011 000 + SPSR_abt = 0xe219, // 11 100 0100 0011 001 + SPSR_und = 0xe21a, // 11 100 0100 0011 010 + SPSR_fiq = 0xe21b, // 11 100 0100 0011 011 + FPCR = 0xda20, // 11 011 0100 0100 000 + FPSR = 0xda21, // 11 011 0100 0100 001 + DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000 + DLR_EL0 = 0xda29, // 11 011 0100 0101 001 + IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001 + AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000 + AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000 + AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000 + AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001 + AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001 + AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001 + ESR_EL1 = 0xc290, // 11 000 0101 0010 000 + ESR_EL2 = 0xe290, // 11 100 0101 0010 000 + ESR_EL3 = 0xf290, // 11 110 0101 0010 000 + FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000 + FAR_EL1 = 0xc300, // 11 000 0110 0000 000 + FAR_EL2 = 0xe300, // 11 100 0110 0000 000 + FAR_EL3 = 0xf300, // 11 110 0110 0000 000 + HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100 + PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000 + PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000 + PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001 + PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010 + PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011 + PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101 + PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000 + PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001 + PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010 + PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000 + PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001 + PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010 + PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011 + MAIR_EL1 = 0xc510, // 11 000 1010 0010 000 + MAIR_EL2 = 0xe510, // 11 100 1010 0010 000 + MAIR_EL3 = 0xf510, // 11 110 1010 0010 000 + AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000 + AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000 + AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000 + VBAR_EL1 = 0xc600, // 11 000 1100 0000 000 + VBAR_EL2 = 0xe600, // 11 100 1100 0000 000 + VBAR_EL3 = 0xf600, // 11 110 1100 0000 000 + RMR_EL1 = 0xc602, // 11 000 1100 0000 010 + RMR_EL2 = 0xe602, // 11 100 1100 0000 010 + RMR_EL3 = 0xf602, // 11 110 1100 0000 010 + CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001 + TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010 + TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010 + TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010 + TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011 + TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100 + CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000 + CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011 + CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000 + CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000 + CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000 + CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000 + CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000 + CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001 + CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001 + CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001 + CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010 + CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010 + CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010 + CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000 + CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001 + CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010 + PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000 + PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001 + PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010 + PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011 + PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100 + PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101 + PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110 + PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111 + PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000 + PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001 + PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010 + PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011 + PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100 + PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101 + PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110 + PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111 + PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000 + PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001 + PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010 + PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011 + PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100 + PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101 + PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110 + PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111 + PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000 + PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001 + PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010 + PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011 + PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100 + PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101 + PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110 + PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111 + PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000 + PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001 + PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010 + PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011 + PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100 + PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101 + PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110 + PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111 + PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000 + PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001 + PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010 + PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011 + PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100 + PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101 + PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110 + PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111 + PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000 + PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001 + PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010 + PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011 + PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100 + PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101 + PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110 + PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111 + PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000 + PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001 + PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010 + PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011 + PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100 + PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101 + PMEVTYPER30_EL0 = 0xdf7e // 11 011 1110 1111 110 + }; + + // Note that these do not inherit from NamedImmMapper. This class is + // sufficiently different in its behaviour that I don't believe it's worth + // burdening the common NamedImmMapper with abstractions only needed in + // this one case. + struct SysRegMapper { + static const NamedImmMapper::Mapping SysRegPairs[]; + + const NamedImmMapper::Mapping *InstPairs; + size_t NumInstPairs; + + SysRegMapper() {} + uint32_t fromString(StringRef Name, bool &Valid) const; + std::string toString(uint32_t Bits, bool &Valid) const; + }; + + struct MSRMapper : SysRegMapper { + static const NamedImmMapper::Mapping MSRPairs[]; + MSRMapper(); + }; + + struct MRSMapper : SysRegMapper { + static const NamedImmMapper::Mapping MRSPairs[]; + MRSMapper(); + }; + + uint32_t ParseGenericRegister(StringRef Name, bool &Valid); +} + +namespace A64TLBI { + enum TLBIValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + IPAS2E1IS = 0x6401, // 01 100 1000 0000 001 + IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101 + VMALLE1IS = 0x4418, // 01 000 1000 0011 000 + ALLE2IS = 0x6418, // 01 100 1000 0011 000 + ALLE3IS = 0x7418, // 01 110 1000 0011 000 + VAE1IS = 0x4419, // 01 000 1000 0011 001 + VAE2IS = 0x6419, // 01 100 1000 0011 001 + VAE3IS = 0x7419, // 01 110 1000 0011 001 + ASIDE1IS = 0x441a, // 01 000 1000 0011 010 + VAAE1IS = 0x441b, // 01 000 1000 0011 011 + ALLE1IS = 0x641c, // 01 100 1000 0011 100 + VALE1IS = 0x441d, // 01 000 1000 0011 101 + VALE2IS = 0x641d, // 01 100 1000 0011 101 + VALE3IS = 0x741d, // 01 110 1000 0011 101 + VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110 + VAALE1IS = 0x441f, // 01 000 1000 0011 111 + IPAS2E1 = 0x6421, // 01 100 1000 0100 001 + IPAS2LE1 = 0x6425, // 01 100 1000 0100 101 + VMALLE1 = 0x4438, // 01 000 1000 0111 000 + ALLE2 = 0x6438, // 01 100 1000 0111 000 + ALLE3 = 0x7438, // 01 110 1000 0111 000 + VAE1 = 0x4439, // 01 000 1000 0111 001 + VAE2 = 0x6439, // 01 100 1000 0111 001 + VAE3 = 0x7439, // 01 110 1000 0111 001 + ASIDE1 = 0x443a, // 01 000 1000 0111 010 + VAAE1 = 0x443b, // 01 000 1000 0111 011 + ALLE1 = 0x643c, // 01 100 1000 0111 100 + VALE1 = 0x443d, // 01 000 1000 0111 101 + VALE2 = 0x643d, // 01 100 1000 0111 101 + VALE3 = 0x743d, // 01 110 1000 0111 101 + VMALLS12E1 = 0x643e, // 01 100 1000 0111 110 + VAALE1 = 0x443f // 01 000 1000 0111 111 + }; + + struct TLBIMapper : NamedImmMapper { + const static Mapping TLBIPairs[]; + + TLBIMapper(); + }; + + static inline bool NeedsRegister(TLBIValues Val) { + switch (Val) { + case VMALLE1IS: + case ALLE2IS: + case ALLE3IS: + case ALLE1IS: + case VMALLS12E1IS: + case VMALLE1: + case ALLE2: + case ALLE3: + case ALLE1: + case VMALLS12E1: + return false; + default: + return true; + } + } +} + +namespace AArch64II { + + enum TOF { + //===--------------------------------------------------------------===// + // AArch64 Specific MachineOperand flags. + + MO_NO_FLAG, + + // MO_GOT - Represents a relocation referring to the GOT entry of a given + // symbol. Used in adrp. + MO_GOT, + + // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the + // GOT entry of a given symbol. Used in ldr only. + MO_GOT_LO12, + + // MO_DTPREL_* - Represents a relocation referring to the offset from a + // module's dynamic thread pointer. Used in the local-dynamic TLS access + // model. + MO_DTPREL_G1, + MO_DTPREL_G0_NC, + + // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry + // providing the offset of a variable from the thread-pointer. Used in + // initial-exec TLS model where this offset is assigned in the static thread + // block and thus known by the dynamic linker. + MO_GOTTPREL, + MO_GOTTPREL_LO12, + + // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing + // a TLS descriptor chosen by the dynamic linker. Used for the + // general-dynamic and local-dynamic TLS access models where very littls is + // known at link-time. + MO_TLSDESC, + MO_TLSDESC_LO12, + + // MO_TPREL_* - Represents a relocation referring to the offset of a + // variable from the thread pointer itself. Used in the local-exec TLS + // access model. + MO_TPREL_G1, + MO_TPREL_G0_NC, + + // MO_LO12 - On a symbol operand, this represents a relocation containing + // lower 12 bits of the address. Used in add/sub/ldr/str. + MO_LO12 + }; +} + +class APFloat; + +namespace A64Imms { + bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits); + + inline bool isFPImm(const APFloat &Val) { + uint32_t Imm8; + return isFPImm(Val, Imm8); + } + + bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits); + bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm); + + bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + + // We sometimes want to know whether the immediate is representable with a + // MOVN but *not* with a MOVZ (because that would take priority). + bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + +} + +} // end namespace llvm; + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp new file mode 100644 index 0000000..476b94e --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -0,0 +1,287 @@ +//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { +class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { +public: + AArch64ELFObjectWriter(uint8_t OSABI); + + virtual ~AArch64ELFObjectWriter(); + +protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; +private: +}; +} + +AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, + /*HasRelocationAddend*/ true) +{} + +AArch64ELFObjectWriter::~AArch64ELFObjectWriter() +{} + +unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + unsigned Type; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented fixup -> relocation"); + case FK_Data_8: + return ELF::R_AARCH64_PREL64; + case FK_Data_4: + return ELF::R_AARCH64_PREL32; + case FK_Data_2: + return ELF::R_AARCH64_PREL16; + case AArch64::fixup_a64_ld_prel: + Type = ELF::R_AARCH64_LD_PREL_LO19; + break; + case AArch64::fixup_a64_adr_prel: + Type = ELF::R_AARCH64_ADR_PREL_LO21; + break; + case AArch64::fixup_a64_adr_prel_page: + Type = ELF::R_AARCH64_ADR_PREL_PG_HI21; + break; + case AArch64::fixup_a64_adr_prel_got_page: + Type = ELF::R_AARCH64_ADR_GOT_PAGE; + break; + case AArch64::fixup_a64_tstbr: + Type = ELF::R_AARCH64_TSTBR14; + break; + case AArch64::fixup_a64_condbr: + Type = ELF::R_AARCH64_CONDBR19; + break; + case AArch64::fixup_a64_uncondbr: + Type = ELF::R_AARCH64_JUMP26; + break; + case AArch64::fixup_a64_call: + Type = ELF::R_AARCH64_CALL26; + break; + case AArch64::fixup_a64_adr_gottprel_page: + Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; + break; + case AArch64::fixup_a64_ld_gottprel_prel19: + Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; + break; + case AArch64::fixup_a64_tlsdesc_adr_page: + Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented fixup -> relocation"); + case FK_Data_8: + return ELF::R_AARCH64_ABS64; + case FK_Data_4: + return ELF::R_AARCH64_ABS32; + case FK_Data_2: + return ELF::R_AARCH64_ABS16; + case AArch64::fixup_a64_add_lo12: + Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ld64_got_lo12_nc: + Type = ELF::R_AARCH64_LD64_GOT_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_lo12: + Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_lo12: + Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_lo12: + Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_lo12: + Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst128_lo12: + Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC; + break; + case AArch64::fixup_a64_movw_uabs_g0: + Type = ELF::R_AARCH64_MOVW_UABS_G0; + break; + case AArch64::fixup_a64_movw_uabs_g0_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G0_NC; + break; + case AArch64::fixup_a64_movw_uabs_g1: + Type = ELF::R_AARCH64_MOVW_UABS_G1; + break; + case AArch64::fixup_a64_movw_uabs_g1_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G1_NC; + break; + case AArch64::fixup_a64_movw_uabs_g2: + Type = ELF::R_AARCH64_MOVW_UABS_G2; + break; + case AArch64::fixup_a64_movw_uabs_g2_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G2_NC; + break; + case AArch64::fixup_a64_movw_uabs_g3: + Type = ELF::R_AARCH64_MOVW_UABS_G3; + break; + case AArch64::fixup_a64_movw_sabs_g0: + Type = ELF::R_AARCH64_MOVW_SABS_G0; + break; + case AArch64::fixup_a64_movw_sabs_g1: + Type = ELF::R_AARCH64_MOVW_SABS_G1; + break; + case AArch64::fixup_a64_movw_sabs_g2: + Type = ELF::R_AARCH64_MOVW_SABS_G2; + break; + + // TLS Local-dynamic block + case AArch64::fixup_a64_movw_dtprel_g2: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; + break; + case AArch64::fixup_a64_movw_dtprel_g1: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; + break; + case AArch64::fixup_a64_movw_dtprel_g1_nc: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; + break; + case AArch64::fixup_a64_movw_dtprel_g0: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; + break; + case AArch64::fixup_a64_movw_dtprel_g0_nc: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; + break; + case AArch64::fixup_a64_add_dtprel_hi12: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12; + break; + case AArch64::fixup_a64_add_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; + break; + case AArch64::fixup_a64_add_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; + break; + + // TLS initial-exec block + case AArch64::fixup_a64_movw_gottprel_g1: + Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; + break; + case AArch64::fixup_a64_movw_gottprel_g0_nc: + Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; + break; + case AArch64::fixup_a64_ld64_gottprel_lo12_nc: + Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; + break; + + // TLS local-exec block + case AArch64::fixup_a64_movw_tprel_g2: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; + break; + case AArch64::fixup_a64_movw_tprel_g1: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; + break; + case AArch64::fixup_a64_movw_tprel_g1_nc: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; + break; + case AArch64::fixup_a64_movw_tprel_g0: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; + break; + case AArch64::fixup_a64_movw_tprel_g0_nc: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; + break; + case AArch64::fixup_a64_add_tprel_hi12: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12; + break; + case AArch64::fixup_a64_add_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; + break; + case AArch64::fixup_a64_add_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst8_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst16_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst32_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst64_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; + break; + + // TLS general-dynamic block + case AArch64::fixup_a64_tlsdesc_adr_page: + Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; + break; + case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: + Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; + break; + case AArch64::fixup_a64_tlsdesc_add_lo12_nc: + Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; + break; + case AArch64::fixup_a64_tlsdesc_call: + Type = ELF::R_AARCH64_TLSDESC_CALL; + break; + } + } + + return Type; +} + +MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp new file mode 100644 index 0000000..b83577a --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -0,0 +1,160 @@ +//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits AArch64 ELF .o object files. Different +// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit +// regions of data and code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +/// Extend the generic ELFStreamer class so that it can emit mapping symbols at +/// the appropriate points in the object files. These symbols are defined in the +/// AArch64 ELF ABI: +/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf +/// +/// In brief: $x or $d should be emitted at the start of each contiguous region +/// of A64 code or data in a section. In practice, this emission does not rely +/// on explicit assembler directives but on inherent properties of the +/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an +/// instruction). +/// +/// As a result this system is orthogonal to the DataRegion infrastructure used +/// by MachO. Beware! +class AArch64ELFStreamer : public MCELFStreamer { +public: + AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCELFStreamer(Context, TAB, OS, Emitter), + MappingSymbolCounter(0), LastEMS(EMS_None) { + } + + ~AArch64ELFStreamer() {} + + virtual void ChangeSection(const MCSection *Section) { + // We have to keep track of the mapping symbol state of any sections we + // use. Each one should start off as EMS_None, which is provided as the + // default constructor by DenseMap::lookup. + LastMappingSymbols[getPreviousSection()] = LastEMS; + LastEMS = LastMappingSymbols.lookup(Section); + + MCELFStreamer::ChangeSection(Section); + } + + /// This function is the one used to emit instruction data into the ELF + /// streamer. We override it to add the appropriate mapping symbol if + /// necessary. + virtual void EmitInstruction(const MCInst& Inst) { + EmitA64MappingSymbol(); + MCELFStreamer::EmitInstruction(Inst); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) + /// if necessary. + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitBytes(Data, AddrSpace); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) + /// if necessary. + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + } + +private: + enum ElfMappingSymbol { + EMS_None, + EMS_A64, + EMS_Data + }; + + void EmitDataMappingSymbol() { + if (LastEMS == EMS_Data) return; + EmitMappingSymbol("$d"); + LastEMS = EMS_Data; + } + + void EmitA64MappingSymbol() { + if (LastEMS == EMS_A64) return; + EmitMappingSymbol("$x"); + LastEMS = EMS_A64; + } + + void EmitMappingSymbol(StringRef Name) { + MCSymbol *Start = getContext().CreateTempSymbol(); + EmitLabel(Start); + + MCSymbol *Symbol = + getContext().GetOrCreateSymbol(Name + "." + + Twine(MappingSymbolCounter++)); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + MCELF::SetType(SD, ELF::STT_NOTYPE); + MCELF::SetBinding(SD, ELF::STB_LOCAL); + SD.setExternal(false); + Symbol->setSection(*getCurrentSection()); + + const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + Symbol->setVariableValue(Value); + } + + int64_t MappingSymbolCounter; + + DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; + ElfMappingSymbol LastEMS; + + /// @} +}; +} + +namespace llvm { + MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack) { + AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + if (NoExecStack) + S->getAssembler().setNoExecStack(true); + return S; + } +} + + diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h new file mode 100644 index 0000000..5a89ca5 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -0,0 +1,27 @@ +//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF streamer information for the AArch64 backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_ELF_STREAMER_H +#define LLVM_AARCH64_ELF_STREAMER_H + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + + MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack); +} + +#endif // AArch64_ELF_STREAMER_H diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h new file mode 100644 index 0000000..15e0886 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -0,0 +1,108 @@ +//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H +#define LLVM_AARCH64_AARCH64FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { + namespace AArch64 { + enum Fixups { + fixup_a64_ld_prel = FirstTargetFixupKind, + fixup_a64_adr_prel, + fixup_a64_adr_prel_page, + + fixup_a64_add_lo12, + + fixup_a64_ldst8_lo12, + fixup_a64_ldst16_lo12, + fixup_a64_ldst32_lo12, + fixup_a64_ldst64_lo12, + fixup_a64_ldst128_lo12, + + fixup_a64_tstbr, + fixup_a64_condbr, + fixup_a64_uncondbr, + fixup_a64_call, + + fixup_a64_movw_uabs_g0, + fixup_a64_movw_uabs_g0_nc, + fixup_a64_movw_uabs_g1, + fixup_a64_movw_uabs_g1_nc, + fixup_a64_movw_uabs_g2, + fixup_a64_movw_uabs_g2_nc, + fixup_a64_movw_uabs_g3, + + fixup_a64_movw_sabs_g0, + fixup_a64_movw_sabs_g1, + fixup_a64_movw_sabs_g2, + + fixup_a64_adr_prel_got_page, + fixup_a64_ld64_got_lo12_nc, + + // Produce offsets relative to the module's dynamic TLS area. + fixup_a64_movw_dtprel_g2, + fixup_a64_movw_dtprel_g1, + fixup_a64_movw_dtprel_g1_nc, + fixup_a64_movw_dtprel_g0, + fixup_a64_movw_dtprel_g0_nc, + fixup_a64_add_dtprel_hi12, + fixup_a64_add_dtprel_lo12, + fixup_a64_add_dtprel_lo12_nc, + fixup_a64_ldst8_dtprel_lo12, + fixup_a64_ldst8_dtprel_lo12_nc, + fixup_a64_ldst16_dtprel_lo12, + fixup_a64_ldst16_dtprel_lo12_nc, + fixup_a64_ldst32_dtprel_lo12, + fixup_a64_ldst32_dtprel_lo12_nc, + fixup_a64_ldst64_dtprel_lo12, + fixup_a64_ldst64_dtprel_lo12_nc, + + // Produce the GOT entry containing a variable's address in TLS's + // initial-exec mode. + fixup_a64_movw_gottprel_g1, + fixup_a64_movw_gottprel_g0_nc, + fixup_a64_adr_gottprel_page, + fixup_a64_ld64_gottprel_lo12_nc, + fixup_a64_ld_gottprel_prel19, + + // Produce offsets relative to the thread pointer: TPIDR_EL0. + fixup_a64_movw_tprel_g2, + fixup_a64_movw_tprel_g1, + fixup_a64_movw_tprel_g1_nc, + fixup_a64_movw_tprel_g0, + fixup_a64_movw_tprel_g0_nc, + fixup_a64_add_tprel_hi12, + fixup_a64_add_tprel_lo12, + fixup_a64_add_tprel_lo12_nc, + fixup_a64_ldst8_tprel_lo12, + fixup_a64_ldst8_tprel_lo12_nc, + fixup_a64_ldst16_tprel_lo12, + fixup_a64_ldst16_tprel_lo12_nc, + fixup_a64_ldst32_tprel_lo12, + fixup_a64_ldst32_tprel_lo12_nc, + fixup_a64_ldst64_tprel_lo12, + fixup_a64_ldst64_tprel_lo12_nc, + + // Produce the special fixups used by the general-dynamic TLS model. + fixup_a64_tlsdesc_adr_page, + fixup_a64_tlsdesc_ld64_lo12_nc, + fixup_a64_tlsdesc_add_lo12_nc, + fixup_a64_tlsdesc_call, + + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind + }; + } +} + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp new file mode 100644 index 0000000..8ec8cbf --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -0,0 +1,41 @@ +//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the AArch64MCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCAsmInfo.h" + +using namespace llvm; + +AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() { + PointerSize = 8; + + // ".comm align is in bytes but .align is pow-2." + AlignmentIsInBytes = false; + + CommentString = "//"; + PrivateGlobalPrefix = ".L"; + Code32Directive = ".code\t32"; + + Data16bitsDirective = "\t.hword\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = "\t.xword\t"; + + UseDataRegionDirectives = true; + + WeakRefDirective = "\t.weak\t"; + + HasLEB128 = true; + SupportsDebugInformation = true; + + // Exceptions handling + ExceptionsType = ExceptionHandling::DwarfCFI; +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h new file mode 100644 index 0000000..a20bc47 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -0,0 +1,27 @@ +//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the AArch64MCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64TARGETASMINFO_H +#define LLVM_AARCH64TARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + + struct AArch64ELFMCAsmInfo : public MCAsmInfo { + explicit AArch64ELFMCAsmInfo(); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp new file mode 100644 index 0000000..f2bbd85 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -0,0 +1,517 @@ +//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64MCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class AArch64MCCodeEmitter : public MCCodeEmitter { + AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT + const MCInstrInfo &MCII; + const MCSubtargetInfo &STI; + MCContext &Ctx; + +public: + AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) + : MCII(mcii), STI(sti), Ctx(ctx) { + } + + ~AArch64MCCodeEmitter() {} + + unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + template<int MemSize> + unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + return getOffsetUImm12OpValue(MI, OpIdx, Fixups, MemSize); + } + + unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + int MemSize) const; + + unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + // Labels are handled mostly the same way: a symbol is needed, and + // just gets some fixup attached. + template<AArch64::Fixups fixupDesired> + unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + unsigned getAddressWithFixup(const MCOperand &MO, + unsigned FixupKind, + SmallVectorImpl<MCFixup> &Fixups) const; + + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const; + + + void EmitByte(unsigned char C, raw_ostream &OS) const { + OS << (char)C; + } + + void EmitInstruction(uint32_t Val, raw_ostream &OS) const { + // Output the constant in little endian byte order. + for (unsigned i = 0; i != 4; ++i) { + EmitByte(Val & 0xff, OS); + Val >>= 8; + } + } + + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const; + + template<int hasRs, int hasRt2> unsigned + fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const; + + unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue) const; + + unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue) const; + + +}; + +} // end anonymous namespace + +unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO, + unsigned FixupKind, + SmallVectorImpl<MCFixup> &Fixups) const { + if (!MO.isExpr()) { + // This can occur for manually decoded or constructed MCInsts, but neither + // the assembly-parser nor instruction selection will currently produce an + // MCInst that's not a symbol reference. + assert(MO.isImm() && "Unexpected address requested"); + return MO.getImm(); + } + + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind = MCFixupKind(FixupKind); + Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + + return 0; +} + +unsigned AArch64MCCodeEmitter:: +getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + int MemSize) const { + const MCOperand &ImmOp = MI.getOperand(OpIdx); + if (ImmOp.isImm()) + return ImmOp.getImm(); + + assert(ImmOp.isExpr() && "Unexpected operand type"); + const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr()); + unsigned FixupKind; + + + switch (Expr->getKind()) { + default: llvm_unreachable("Unexpected operand modifier"); + case AArch64MCExpr::VK_AARCH64_LO12: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, + AArch64::fixup_a64_ldst16_lo12, + AArch64::fixup_a64_ldst32_lo12, + AArch64::fixup_a64_ldst64_lo12, + AArch64::fixup_a64_ldst128_lo12 }; + assert(MemSize <= 16 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_GOT_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc; + break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12, + AArch64::fixup_a64_ldst16_dtprel_lo12, + AArch64::fixup_a64_ldst32_dtprel_lo12, + AArch64::fixup_a64_ldst64_dtprel_lo12 }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc, + AArch64::fixup_a64_ldst16_dtprel_lo12_nc, + AArch64::fixup_a64_ldst32_dtprel_lo12_nc, + AArch64::fixup_a64_ldst64_dtprel_lo12_nc }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc; + break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{ + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12, + AArch64::fixup_a64_ldst16_tprel_lo12, + AArch64::fixup_a64_ldst32_tprel_lo12, + AArch64::fixup_a64_ldst64_tprel_lo12 }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc, + AArch64::fixup_a64_ldst16_tprel_lo12_nc, + AArch64::fixup_a64_ldst32_tprel_lo12_nc, + AArch64::fixup_a64_ldst64_tprel_lo12_nc }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc; + break; + } + + return getAddressWithFixup(ImmOp, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + + assert(MO.isExpr()); + + unsigned FixupKind = 0; + switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) { + default: llvm_unreachable("Invalid expression modifier"); + case AArch64MCExpr::VK_AARCH64_LO12: + FixupKind = AArch64::fixup_a64_add_lo12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_HI12: + FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: + FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: + FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_HI12: + FixupKind = AArch64::fixup_a64_add_tprel_hi12; break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12: + FixupKind = AArch64::fixup_a64_add_tprel_lo12; break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: + FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break; + case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: + FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break; + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + + assert(MO.isExpr()); + + unsigned Modifier = AArch64MCExpr::VK_AARCH64_None; + if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr())) + Modifier = Expr->getKind(); + + unsigned FixupKind = 0; + switch(Modifier) { + case AArch64MCExpr::VK_AARCH64_None: + FixupKind = AArch64::fixup_a64_adr_prel_page; + break; + case AArch64MCExpr::VK_AARCH64_GOT: + FixupKind = AArch64::fixup_a64_adr_prel_got_page; + break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL: + FixupKind = AArch64::fixup_a64_adr_gottprel_page; + break; + case AArch64MCExpr::VK_AARCH64_TLSDESC: + FixupKind = AArch64::fixup_a64_tlsdesc_adr_page; + break; + default: + llvm_unreachable("Unknown symbol reference kind for ADRP instruction"); + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Only immediate expected for shift"); + + return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6; +} + +unsigned +AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Only immediate expected for shift"); + + return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6; +} + + +template<AArch64::Fixups fixupDesired> unsigned +AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, + unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + + if (MO.isExpr()) + return getAddressWithFixup(MO, fixupDesired, Fixups); + + assert(MO.isImm()); + return MO.getImm(); +} + +unsigned +AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI, + unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + + if (MO.isImm()) + return MO.getImm(); + + assert(MO.isExpr()); + + unsigned FixupKind; + if (isa<AArch64MCExpr>(MO.getExpr())) { + assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind() + == AArch64MCExpr::VK_AARCH64_GOTTPREL + && "Invalid symbol modifier for literal load"); + FixupKind = AArch64::fixup_a64_ld_gottprel_prel19; + } else { + FixupKind = AArch64::fixup_a64_ld_prel; + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + + +unsigned +AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MO.isReg()) { + return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); + } else if (MO.isImm()) { + return static_cast<unsigned>(MO.getImm()); + } + + llvm_unreachable("Unable to encode MCOperand!"); + return 0; +} + +unsigned +AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &UImm16MO = MI.getOperand(OpIdx); + const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1); + + unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16; + + if (UImm16MO.isImm()) { + Result |= UImm16MO.getImm(); + return Result; + } + + const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr()); + AArch64::Fixups requestedFixup; + switch (A64E->getKind()) { + default: llvm_unreachable("unexpected expression modifier"); + case AArch64MCExpr::VK_AARCH64_ABS_G0: + requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break; + case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G1: + requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break; + case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G2: + requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break; + case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G3: + requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break; + case AArch64MCExpr::VK_AARCH64_SABS_G0: + requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break; + case AArch64MCExpr::VK_AARCH64_SABS_G1: + requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break; + case AArch64MCExpr::VK_AARCH64_SABS_G2: + requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break; + } + + return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups); +} + +unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI, + unsigned EncodedValue) const { + // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation + // with 0s, but is architecturally ignored + EncodedValue &= ~0x1f0000u; + + return EncodedValue; +} + +template<int hasRs, int hasRt2> unsigned +AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, + unsigned EncodedValue) const { + if (!hasRs) EncodedValue |= 0x001F0000; + if (!hasRt2) EncodedValue |= 0x00007C00; + + return EncodedValue; +} + +unsigned +AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue) const { + // If one of the signed fixup kinds is applied to a MOVZ instruction, the + // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's + // job to ensure that any bits possibly affected by this are 0. This means we + // must zero out bit 30 (essentially emitting a MOVN). + MCOperand UImm16MO = MI.getOperand(1); + + // Nothing to do if there's no fixup. + if (UImm16MO.isImm()) + return EncodedValue; + + const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr()); + switch (A64E->getKind()) { + case AArch64MCExpr::VK_AARCH64_SABS_G0: + case AArch64MCExpr::VK_AARCH64_SABS_G1: + case AArch64MCExpr::VK_AARCH64_SABS_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + return EncodedValue & ~(1u << 30); + default: + // Nothing to do for an unsigned fixup. + return EncodedValue; + } + + llvm_unreachable("Should have returned by now"); +} + +unsigned +AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI, + unsigned EncodedValue) const { + // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 + // (i.e. all bits 1) but is ignored by the processor. + EncodedValue |= 0x1f << 10; + return EncodedValue; +} + +MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new AArch64MCCodeEmitter(MCII, STI, Ctx); +} + +void AArch64MCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MI.getOpcode() == AArch64::TLSDESCCALL) { + // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the + // following (BLR) instruction. It doesn't emit any code itself so it + // doesn't go through the normal TableGenerated channels. + MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call); + const MCExpr *Expr; + Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx); + Fixups.push_back(MCFixup::Create(0, Expr, Fixup)); + return; + } + + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); + + EmitInstruction(Binary, OS); +} + + +#include "AArch64GenMCCodeEmitter.inc" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp new file mode 100644 index 0000000..e86e04a --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -0,0 +1,173 @@ +//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64mcexpr" +#include "AArch64MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELF.h" +#include "llvm/Object/ELF.h" + +using namespace llvm; + +const AArch64MCExpr* +AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) AArch64MCExpr(Kind, Expr); +} + +void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { + switch (Kind) { + default: llvm_unreachable("Invalid kind!"); + case VK_AARCH64_GOT: OS << ":got:"; break; + case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break; + case VK_AARCH64_LO12: OS << ":lo12:"; break; + case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break; + case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break; + case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break; + case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break; + case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break; + case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break; + case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break; + case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break; + case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break; + case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break; + case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break; + case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break; + case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break; + case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break; + case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break; + case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break; + case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break; + case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break; + case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break; + case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break; + case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break; + case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break; + case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break; + case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break; + case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break; + case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break; + case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break; + case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break; + case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break; + case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break; + case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break; + case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break; + + } + + const MCExpr *Expr = getSubExpr(); + if (Expr->getKind() != MCExpr::SymbolRef) + OS << '('; + Expr->print(OS); + if (Expr->getKind() != MCExpr::SymbolRef) + OS << ')'; +} + +bool +AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const { + return getSubExpr()->EvaluateAsRelocatable(Res, *Layout); +} + +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expression"); + break; + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: { + // We're known to be under a TLS fixup, so any symbol should be + // modified. There should be only one. + const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr); + MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); + MCELF::SetType(SD, ELF::STT_TLS); + break; + } + + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm); + break; + } +} + +void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + switch (getKind()) { + default: + return; + case VK_AARCH64_DTPREL_G2: + case VK_AARCH64_DTPREL_G1: + case VK_AARCH64_DTPREL_G1_NC: + case VK_AARCH64_DTPREL_G0: + case VK_AARCH64_DTPREL_G0_NC: + case VK_AARCH64_DTPREL_HI12: + case VK_AARCH64_DTPREL_LO12: + case VK_AARCH64_DTPREL_LO12_NC: + case VK_AARCH64_GOTTPREL_G1: + case VK_AARCH64_GOTTPREL_G0_NC: + case VK_AARCH64_GOTTPREL: + case VK_AARCH64_GOTTPREL_LO12: + case VK_AARCH64_TPREL_G2: + case VK_AARCH64_TPREL_G1: + case VK_AARCH64_TPREL_G1_NC: + case VK_AARCH64_TPREL_G0: + case VK_AARCH64_TPREL_G0_NC: + case VK_AARCH64_TPREL_HI12: + case VK_AARCH64_TPREL_LO12: + case VK_AARCH64_TPREL_LO12_NC: + case VK_AARCH64_TLSDESC: + case VK_AARCH64_TLSDESC_LO12: + break; + } + + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); +} + +// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps +// that method should be made public? +// FIXME: really do above: now that two backends are using it. +static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { + switch (Value->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expr!"); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); + AddValueSymbolsImpl(BE->getLHS(), Asm); + AddValueSymbolsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: + Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); + break; + } +} + +void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { + AddValueSymbolsImpl(getSubExpr(), Asm); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h new file mode 100644 index 0000000..20adc0c --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -0,0 +1,161 @@ +//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64MCEXPR_H +#define LLVM_AARCH64MCEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +class AArch64MCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_AARCH64_None, + VK_AARCH64_GOT, // :got: modifier in assembly + VK_AARCH64_GOT_LO12, // :got_lo12: + VK_AARCH64_LO12, // :lo12: + + VK_AARCH64_ABS_G0, // :abs_g0: + VK_AARCH64_ABS_G0_NC, // :abs_g0_nc: + VK_AARCH64_ABS_G1, + VK_AARCH64_ABS_G1_NC, + VK_AARCH64_ABS_G2, + VK_AARCH64_ABS_G2_NC, + VK_AARCH64_ABS_G3, + + VK_AARCH64_SABS_G0, // :abs_g0_s: + VK_AARCH64_SABS_G1, + VK_AARCH64_SABS_G2, + + VK_AARCH64_DTPREL_G2, // :dtprel_g2: + VK_AARCH64_DTPREL_G1, + VK_AARCH64_DTPREL_G1_NC, + VK_AARCH64_DTPREL_G0, + VK_AARCH64_DTPREL_G0_NC, + VK_AARCH64_DTPREL_HI12, + VK_AARCH64_DTPREL_LO12, + VK_AARCH64_DTPREL_LO12_NC, + + VK_AARCH64_GOTTPREL_G1, // :gottprel: + VK_AARCH64_GOTTPREL_G0_NC, + VK_AARCH64_GOTTPREL, + VK_AARCH64_GOTTPREL_LO12, + + VK_AARCH64_TPREL_G2, // :tprel: + VK_AARCH64_TPREL_G1, + VK_AARCH64_TPREL_G1_NC, + VK_AARCH64_TPREL_G0, + VK_AARCH64_TPREL_G0_NC, + VK_AARCH64_TPREL_HI12, + VK_AARCH64_TPREL_LO12, + VK_AARCH64_TPREL_LO12_NC, + + VK_AARCH64_TLSDESC, // :tlsdesc: + VK_AARCH64_TLSDESC_LO12 + }; + +private: + const VariantKind Kind; + const MCExpr *Expr; + + explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr) + : Kind(_Kind), Expr(_Expr) {} + +public: + /// @name Construction + /// @{ + + static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_AARCH64_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_AARCH64_GOT, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_AARCH64_GOT_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx); + } + + static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_GOTTPREL, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TLSDESC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TPREL_G1, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx); + } + + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this expression. + VariantKind getKind() const { return Kind; } + + /// getSubExpr - Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + /// @} + + void PrintImpl(raw_ostream &OS) const; + bool EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const; + void AddValueSymbols(MCAssembler *) const; + const MCSection *FindAssociatedSection() const { + return getSubExpr()->FindAssociatedSection(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + + static bool classof(const AArch64MCExpr *) { return true; } + +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp new file mode 100644 index 0000000..0d2855f --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -0,0 +1,991 @@ +//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AArch64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCTargetDesc.h" +#include "AArch64BaseInfo.h" +#include "AArch64ELFStreamer.h" +#include "AArch64MCAsmInfo.h" +#include "InstPrinter/AArch64InstPrinter.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" + +#define GET_REGINFO_MC_DESC +#include "AArch64GenRegisterInfo.inc" + +#define GET_INSTRINFO_MC_DESC +#include "AArch64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "AArch64GenSubtargetInfo.inc" + +using namespace llvm; + +StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const { + for (unsigned i = 0; i < NumPairs; ++i) { + if (Pairs[i].Value == Value) { + Valid = true; + return Pairs[i].Name; + } + } + + Valid = false; + return StringRef(); +} + +uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const { + std::string LowerCaseName = Name.lower(); + for (unsigned i = 0; i < NumPairs; ++i) { + if (Pairs[i].Name == LowerCaseName) { + Valid = true; + return Pairs[i].Value; + } + } + + Valid = false; + return -1; +} + +bool NamedImmMapper::validImm(uint32_t Value) const { + return Value < TooBigImm; +} + +const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = { + {"s1e1r", S1E1R}, + {"s1e2r", S1E2R}, + {"s1e3r", S1E3R}, + {"s1e1w", S1E1W}, + {"s1e2w", S1E2W}, + {"s1e3w", S1E3W}, + {"s1e0r", S1E0R}, + {"s1e0w", S1E0W}, + {"s12e1r", S12E1R}, + {"s12e1w", S12E1W}, + {"s12e0r", S12E0R}, + {"s12e0w", S12E0W}, +}; + +A64AT::ATMapper::ATMapper() + : NamedImmMapper(ATPairs, 0) {} + +const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = { + {"oshld", OSHLD}, + {"oshst", OSHST}, + {"osh", OSH}, + {"nshld", NSHLD}, + {"nshst", NSHST}, + {"nsh", NSH}, + {"ishld", ISHLD}, + {"ishst", ISHST}, + {"ish", ISH}, + {"ld", LD}, + {"st", ST}, + {"sy", SY} +}; + +A64DB::DBarrierMapper::DBarrierMapper() + : NamedImmMapper(DBarrierPairs, 16u) {} + +const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = { + {"zva", ZVA}, + {"ivac", IVAC}, + {"isw", ISW}, + {"cvac", CVAC}, + {"csw", CSW}, + {"cvau", CVAU}, + {"civac", CIVAC}, + {"cisw", CISW} +}; + +A64DC::DCMapper::DCMapper() + : NamedImmMapper(DCPairs, 0) {} + +const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = { + {"ialluis", IALLUIS}, + {"iallu", IALLU}, + {"ivau", IVAU} +}; + +A64IC::ICMapper::ICMapper() + : NamedImmMapper(ICPairs, 0) {} + +const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = { + {"sy", SY}, +}; + +A64ISB::ISBMapper::ISBMapper() + : NamedImmMapper(ISBPairs, 16) {} + +const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { + {"pldl1keep", PLDL1KEEP}, + {"pldl1strm", PLDL1STRM}, + {"pldl2keep", PLDL2KEEP}, + {"pldl2strm", PLDL2STRM}, + {"pldl3keep", PLDL3KEEP}, + {"pldl3strm", PLDL3STRM}, + {"pstl1keep", PSTL1KEEP}, + {"pstl1strm", PSTL1STRM}, + {"pstl2keep", PSTL2KEEP}, + {"pstl2strm", PSTL2STRM}, + {"pstl3keep", PSTL3KEEP}, + {"pstl3strm", PSTL3STRM} +}; + +A64PRFM::PRFMMapper::PRFMMapper() + : NamedImmMapper(PRFMPairs, 32) {} + +const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = { + {"spsel", SPSel}, + {"daifset", DAIFSet}, + {"daifclr", DAIFClr} +}; + +A64PState::PStateMapper::PStateMapper() + : NamedImmMapper(PStatePairs, 0) {} + +const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { + {"mdccsr_el0", MDCCSR_EL0}, + {"dbgdtrrx_el0", DBGDTRRX_EL0}, + {"mdrar_el1", MDRAR_EL1}, + {"oslsr_el1", OSLSR_EL1}, + {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, + {"pmceid0_el0", PMCEID0_EL0}, + {"pmceid1_el0", PMCEID1_EL0}, + {"midr_el1", MIDR_EL1}, + {"ccsidr_el1", CCSIDR_EL1}, + {"clidr_el1", CLIDR_EL1}, + {"ctr_el0", CTR_EL0}, + {"mpidr_el1", MPIDR_EL1}, + {"revidr_el1", REVIDR_EL1}, + {"aidr_el1", AIDR_EL1}, + {"dczid_el0", DCZID_EL0}, + {"id_pfr0_el1", ID_PFR0_EL1}, + {"id_pfr1_el1", ID_PFR1_EL1}, + {"id_dfr0_el1", ID_DFR0_EL1}, + {"id_afr0_el1", ID_AFR0_EL1}, + {"id_mmfr0_el1", ID_MMFR0_EL1}, + {"id_mmfr1_el1", ID_MMFR1_EL1}, + {"id_mmfr2_el1", ID_MMFR2_EL1}, + {"id_mmfr3_el1", ID_MMFR3_EL1}, + {"id_isar0_el1", ID_ISAR0_EL1}, + {"id_isar1_el1", ID_ISAR1_EL1}, + {"id_isar2_el1", ID_ISAR2_EL1}, + {"id_isar3_el1", ID_ISAR3_EL1}, + {"id_isar4_el1", ID_ISAR4_EL1}, + {"id_isar5_el1", ID_ISAR5_EL1}, + {"id_aa64pfr0_el1", ID_AA64PFR0_EL1}, + {"id_aa64pfr1_el1", ID_AA64PFR1_EL1}, + {"id_aa64dfr0_el1", ID_AA64DFR0_EL1}, + {"id_aa64dfr1_el1", ID_AA64DFR1_EL1}, + {"id_aa64afr0_el1", ID_AA64AFR0_EL1}, + {"id_aa64afr1_el1", ID_AA64AFR1_EL1}, + {"id_aa64isar0_el1", ID_AA64ISAR0_EL1}, + {"id_aa64isar1_el1", ID_AA64ISAR1_EL1}, + {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1}, + {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1}, + {"mvfr0_el1", MVFR0_EL1}, + {"mvfr1_el1", MVFR1_EL1}, + {"mvfr2_el1", MVFR2_EL1}, + {"rvbar_el1", RVBAR_EL1}, + {"rvbar_el2", RVBAR_EL2}, + {"rvbar_el3", RVBAR_EL3}, + {"isr_el1", ISR_EL1}, + {"cntpct_el0", CNTPCT_EL0}, + {"cntvct_el0", CNTVCT_EL0} +}; + +A64SysReg::MRSMapper::MRSMapper() { + InstPairs = &MRSPairs[0]; + NumInstPairs = llvm::array_lengthof(MRSPairs); +} + +const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { + {"dbgdtrtx_el0", DBGDTRTX_EL0}, + {"oslar_el1", OSLAR_EL1}, + {"pmswinc_el0", PMSWINC_EL0} +}; + +A64SysReg::MSRMapper::MSRMapper() { + InstPairs = &MSRPairs[0]; + NumInstPairs = llvm::array_lengthof(MSRPairs); +} + + +const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { + {"osdtrrx_el1", OSDTRRX_EL1}, + {"osdtrtx_el1", OSDTRTX_EL1}, + {"teecr32_el1", TEECR32_EL1}, + {"mdccint_el1", MDCCINT_EL1}, + {"mdscr_el1", MDSCR_EL1}, + {"dbgdtr_el0", DBGDTR_EL0}, + {"oseccr_el1", OSECCR_EL1}, + {"dbgvcr32_el2", DBGVCR32_EL2}, + {"dbgbvr0_el1", DBGBVR0_EL1}, + {"dbgbvr1_el1", DBGBVR1_EL1}, + {"dbgbvr2_el1", DBGBVR2_EL1}, + {"dbgbvr3_el1", DBGBVR3_EL1}, + {"dbgbvr4_el1", DBGBVR4_EL1}, + {"dbgbvr5_el1", DBGBVR5_EL1}, + {"dbgbvr6_el1", DBGBVR6_EL1}, + {"dbgbvr7_el1", DBGBVR7_EL1}, + {"dbgbvr8_el1", DBGBVR8_EL1}, + {"dbgbvr9_el1", DBGBVR9_EL1}, + {"dbgbvr10_el1", DBGBVR10_EL1}, + {"dbgbvr11_el1", DBGBVR11_EL1}, + {"dbgbvr12_el1", DBGBVR12_EL1}, + {"dbgbvr13_el1", DBGBVR13_EL1}, + {"dbgbvr14_el1", DBGBVR14_EL1}, + {"dbgbvr15_el1", DBGBVR15_EL1}, + {"dbgbcr0_el1", DBGBCR0_EL1}, + {"dbgbcr1_el1", DBGBCR1_EL1}, + {"dbgbcr2_el1", DBGBCR2_EL1}, + {"dbgbcr3_el1", DBGBCR3_EL1}, + {"dbgbcr4_el1", DBGBCR4_EL1}, + {"dbgbcr5_el1", DBGBCR5_EL1}, + {"dbgbcr6_el1", DBGBCR6_EL1}, + {"dbgbcr7_el1", DBGBCR7_EL1}, + {"dbgbcr8_el1", DBGBCR8_EL1}, + {"dbgbcr9_el1", DBGBCR9_EL1}, + {"dbgbcr10_el1", DBGBCR10_EL1}, + {"dbgbcr11_el1", DBGBCR11_EL1}, + {"dbgbcr12_el1", DBGBCR12_EL1}, + {"dbgbcr13_el1", DBGBCR13_EL1}, + {"dbgbcr14_el1", DBGBCR14_EL1}, + {"dbgbcr15_el1", DBGBCR15_EL1}, + {"dbgwvr0_el1", DBGWVR0_EL1}, + {"dbgwvr1_el1", DBGWVR1_EL1}, + {"dbgwvr2_el1", DBGWVR2_EL1}, + {"dbgwvr3_el1", DBGWVR3_EL1}, + {"dbgwvr4_el1", DBGWVR4_EL1}, + {"dbgwvr5_el1", DBGWVR5_EL1}, + {"dbgwvr6_el1", DBGWVR6_EL1}, + {"dbgwvr7_el1", DBGWVR7_EL1}, + {"dbgwvr8_el1", DBGWVR8_EL1}, + {"dbgwvr9_el1", DBGWVR9_EL1}, + {"dbgwvr10_el1", DBGWVR10_EL1}, + {"dbgwvr11_el1", DBGWVR11_EL1}, + {"dbgwvr12_el1", DBGWVR12_EL1}, + {"dbgwvr13_el1", DBGWVR13_EL1}, + {"dbgwvr14_el1", DBGWVR14_EL1}, + {"dbgwvr15_el1", DBGWVR15_EL1}, + {"dbgwcr0_el1", DBGWCR0_EL1}, + {"dbgwcr1_el1", DBGWCR1_EL1}, + {"dbgwcr2_el1", DBGWCR2_EL1}, + {"dbgwcr3_el1", DBGWCR3_EL1}, + {"dbgwcr4_el1", DBGWCR4_EL1}, + {"dbgwcr5_el1", DBGWCR5_EL1}, + {"dbgwcr6_el1", DBGWCR6_EL1}, + {"dbgwcr7_el1", DBGWCR7_EL1}, + {"dbgwcr8_el1", DBGWCR8_EL1}, + {"dbgwcr9_el1", DBGWCR9_EL1}, + {"dbgwcr10_el1", DBGWCR10_EL1}, + {"dbgwcr11_el1", DBGWCR11_EL1}, + {"dbgwcr12_el1", DBGWCR12_EL1}, + {"dbgwcr13_el1", DBGWCR13_EL1}, + {"dbgwcr14_el1", DBGWCR14_EL1}, + {"dbgwcr15_el1", DBGWCR15_EL1}, + {"teehbr32_el1", TEEHBR32_EL1}, + {"osdlr_el1", OSDLR_EL1}, + {"dbgprcr_el1", DBGPRCR_EL1}, + {"dbgclaimset_el1", DBGCLAIMSET_EL1}, + {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, + {"csselr_el1", CSSELR_EL1}, + {"vpidr_el2", VPIDR_EL2}, + {"vmpidr_el2", VMPIDR_EL2}, + {"sctlr_el1", SCTLR_EL1}, + {"sctlr_el2", SCTLR_EL2}, + {"sctlr_el3", SCTLR_EL3}, + {"actlr_el1", ACTLR_EL1}, + {"actlr_el2", ACTLR_EL2}, + {"actlr_el3", ACTLR_EL3}, + {"cpacr_el1", CPACR_EL1}, + {"hcr_el2", HCR_EL2}, + {"scr_el3", SCR_EL3}, + {"mdcr_el2", MDCR_EL2}, + {"sder32_el3", SDER32_EL3}, + {"cptr_el2", CPTR_EL2}, + {"cptr_el3", CPTR_EL3}, + {"hstr_el2", HSTR_EL2}, + {"hacr_el2", HACR_EL2}, + {"mdcr_el3", MDCR_EL3}, + {"ttbr0_el1", TTBR0_EL1}, + {"ttbr0_el2", TTBR0_EL2}, + {"ttbr0_el3", TTBR0_EL3}, + {"ttbr1_el1", TTBR1_EL1}, + {"tcr_el1", TCR_EL1}, + {"tcr_el2", TCR_EL2}, + {"tcr_el3", TCR_EL3}, + {"vttbr_el2", VTTBR_EL2}, + {"vtcr_el2", VTCR_EL2}, + {"dacr32_el2", DACR32_EL2}, + {"spsr_el1", SPSR_EL1}, + {"spsr_el2", SPSR_EL2}, + {"spsr_el3", SPSR_EL3}, + {"elr_el1", ELR_EL1}, + {"elr_el2", ELR_EL2}, + {"elr_el3", ELR_EL3}, + {"sp_el0", SP_EL0}, + {"sp_el1", SP_EL1}, + {"sp_el2", SP_EL2}, + {"spsel", SPSel}, + {"nzcv", NZCV}, + {"daif", DAIF}, + {"currentel", CurrentEL}, + {"spsr_irq", SPSR_irq}, + {"spsr_abt", SPSR_abt}, + {"spsr_und", SPSR_und}, + {"spsr_fiq", SPSR_fiq}, + {"fpcr", FPCR}, + {"fpsr", FPSR}, + {"dspsr_el0", DSPSR_EL0}, + {"dlr_el0", DLR_EL0}, + {"ifsr32_el2", IFSR32_EL2}, + {"afsr0_el1", AFSR0_EL1}, + {"afsr0_el2", AFSR0_EL2}, + {"afsr0_el3", AFSR0_EL3}, + {"afsr1_el1", AFSR1_EL1}, + {"afsr1_el2", AFSR1_EL2}, + {"afsr1_el3", AFSR1_EL3}, + {"esr_el1", ESR_EL1}, + {"esr_el2", ESR_EL2}, + {"esr_el3", ESR_EL3}, + {"fpexc32_el2", FPEXC32_EL2}, + {"far_el1", FAR_EL1}, + {"far_el2", FAR_EL2}, + {"far_el3", FAR_EL3}, + {"hpfar_el2", HPFAR_EL2}, + {"par_el1", PAR_EL1}, + {"pmcr_el0", PMCR_EL0}, + {"pmcntenset_el0", PMCNTENSET_EL0}, + {"pmcntenclr_el0", PMCNTENCLR_EL0}, + {"pmovsclr_el0", PMOVSCLR_EL0}, + {"pmselr_el0", PMSELR_EL0}, + {"pmccntr_el0", PMCCNTR_EL0}, + {"pmxevtyper_el0", PMXEVTYPER_EL0}, + {"pmxevcntr_el0", PMXEVCNTR_EL0}, + {"pmuserenr_el0", PMUSERENR_EL0}, + {"pmintenset_el1", PMINTENSET_EL1}, + {"pmintenclr_el1", PMINTENCLR_EL1}, + {"pmovsset_el0", PMOVSSET_EL0}, + {"mair_el1", MAIR_EL1}, + {"mair_el2", MAIR_EL2}, + {"mair_el3", MAIR_EL3}, + {"amair_el1", AMAIR_EL1}, + {"amair_el2", AMAIR_EL2}, + {"amair_el3", AMAIR_EL3}, + {"vbar_el1", VBAR_EL1}, + {"vbar_el2", VBAR_EL2}, + {"vbar_el3", VBAR_EL3}, + {"rmr_el1", RMR_EL1}, + {"rmr_el2", RMR_EL2}, + {"rmr_el3", RMR_EL3}, + {"contextidr_el1", CONTEXTIDR_EL1}, + {"tpidr_el0", TPIDR_EL0}, + {"tpidr_el2", TPIDR_EL2}, + {"tpidr_el3", TPIDR_EL3}, + {"tpidrro_el0", TPIDRRO_EL0}, + {"tpidr_el1", TPIDR_EL1}, + {"cntfrq_el0", CNTFRQ_EL0}, + {"cntvoff_el2", CNTVOFF_EL2}, + {"cntkctl_el1", CNTKCTL_EL1}, + {"cnthctl_el2", CNTHCTL_EL2}, + {"cntp_tval_el0", CNTP_TVAL_EL0}, + {"cnthp_tval_el2", CNTHP_TVAL_EL2}, + {"cntps_tval_el1", CNTPS_TVAL_EL1}, + {"cntp_ctl_el0", CNTP_CTL_EL0}, + {"cnthp_ctl_el2", CNTHP_CTL_EL2}, + {"cntps_ctl_el1", CNTPS_CTL_EL1}, + {"cntp_cval_el0", CNTP_CVAL_EL0}, + {"cnthp_cval_el2", CNTHP_CVAL_EL2}, + {"cntps_cval_el1", CNTPS_CVAL_EL1}, + {"cntv_tval_el0", CNTV_TVAL_EL0}, + {"cntv_ctl_el0", CNTV_CTL_EL0}, + {"cntv_cval_el0", CNTV_CVAL_EL0}, + {"pmevcntr0_el0", PMEVCNTR0_EL0}, + {"pmevcntr1_el0", PMEVCNTR1_EL0}, + {"pmevcntr2_el0", PMEVCNTR2_EL0}, + {"pmevcntr3_el0", PMEVCNTR3_EL0}, + {"pmevcntr4_el0", PMEVCNTR4_EL0}, + {"pmevcntr5_el0", PMEVCNTR5_EL0}, + {"pmevcntr6_el0", PMEVCNTR6_EL0}, + {"pmevcntr7_el0", PMEVCNTR7_EL0}, + {"pmevcntr8_el0", PMEVCNTR8_EL0}, + {"pmevcntr9_el0", PMEVCNTR9_EL0}, + {"pmevcntr10_el0", PMEVCNTR10_EL0}, + {"pmevcntr11_el0", PMEVCNTR11_EL0}, + {"pmevcntr12_el0", PMEVCNTR12_EL0}, + {"pmevcntr13_el0", PMEVCNTR13_EL0}, + {"pmevcntr14_el0", PMEVCNTR14_EL0}, + {"pmevcntr15_el0", PMEVCNTR15_EL0}, + {"pmevcntr16_el0", PMEVCNTR16_EL0}, + {"pmevcntr17_el0", PMEVCNTR17_EL0}, + {"pmevcntr18_el0", PMEVCNTR18_EL0}, + {"pmevcntr19_el0", PMEVCNTR19_EL0}, + {"pmevcntr20_el0", PMEVCNTR20_EL0}, + {"pmevcntr21_el0", PMEVCNTR21_EL0}, + {"pmevcntr22_el0", PMEVCNTR22_EL0}, + {"pmevcntr23_el0", PMEVCNTR23_EL0}, + {"pmevcntr24_el0", PMEVCNTR24_EL0}, + {"pmevcntr25_el0", PMEVCNTR25_EL0}, + {"pmevcntr26_el0", PMEVCNTR26_EL0}, + {"pmevcntr27_el0", PMEVCNTR27_EL0}, + {"pmevcntr28_el0", PMEVCNTR28_EL0}, + {"pmevcntr29_el0", PMEVCNTR29_EL0}, + {"pmevcntr30_el0", PMEVCNTR30_EL0}, + {"pmccfiltr_el0", PMCCFILTR_EL0}, + {"pmevtyper0_el0", PMEVTYPER0_EL0}, + {"pmevtyper1_el0", PMEVTYPER1_EL0}, + {"pmevtyper2_el0", PMEVTYPER2_EL0}, + {"pmevtyper3_el0", PMEVTYPER3_EL0}, + {"pmevtyper4_el0", PMEVTYPER4_EL0}, + {"pmevtyper5_el0", PMEVTYPER5_EL0}, + {"pmevtyper6_el0", PMEVTYPER6_EL0}, + {"pmevtyper7_el0", PMEVTYPER7_EL0}, + {"pmevtyper8_el0", PMEVTYPER8_EL0}, + {"pmevtyper9_el0", PMEVTYPER9_EL0}, + {"pmevtyper10_el0", PMEVTYPER10_EL0}, + {"pmevtyper11_el0", PMEVTYPER11_EL0}, + {"pmevtyper12_el0", PMEVTYPER12_EL0}, + {"pmevtyper13_el0", PMEVTYPER13_EL0}, + {"pmevtyper14_el0", PMEVTYPER14_EL0}, + {"pmevtyper15_el0", PMEVTYPER15_EL0}, + {"pmevtyper16_el0", PMEVTYPER16_EL0}, + {"pmevtyper17_el0", PMEVTYPER17_EL0}, + {"pmevtyper18_el0", PMEVTYPER18_EL0}, + {"pmevtyper19_el0", PMEVTYPER19_EL0}, + {"pmevtyper20_el0", PMEVTYPER20_EL0}, + {"pmevtyper21_el0", PMEVTYPER21_EL0}, + {"pmevtyper22_el0", PMEVTYPER22_EL0}, + {"pmevtyper23_el0", PMEVTYPER23_EL0}, + {"pmevtyper24_el0", PMEVTYPER24_EL0}, + {"pmevtyper25_el0", PMEVTYPER25_EL0}, + {"pmevtyper26_el0", PMEVTYPER26_EL0}, + {"pmevtyper27_el0", PMEVTYPER27_EL0}, + {"pmevtyper28_el0", PMEVTYPER28_EL0}, + {"pmevtyper29_el0", PMEVTYPER29_EL0}, + {"pmevtyper30_el0", PMEVTYPER30_EL0}, +}; + +uint32_t +A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { + // First search the registers shared by all + std::string NameLower = Name.lower(); + for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { + if (SysRegPairs[i].Name == NameLower) { + Valid = true; + return SysRegPairs[i].Value; + } + } + + // Now try the instruction-specific registers (either read-only or + // write-only). + for (unsigned i = 0; i < NumInstPairs; ++i) { + if (InstPairs[i].Name == NameLower) { + Valid = true; + return InstPairs[i].Value; + } + } + + // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits + // are: 11 xxx 1x11 xxxx xxx + Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$"); + + SmallVector<StringRef, 4> Ops; + if (!GenericRegPattern.match(NameLower, &Ops)) { + Valid = false; + return -1; + } + + uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0; + uint32_t Bits; + Ops[1].getAsInteger(10, Op1); + Ops[2].getAsInteger(10, CRn); + Ops[3].getAsInteger(10, CRm); + Ops[4].getAsInteger(10, Op2); + Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2; + + Valid = true; + return Bits; +} + +std::string +A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { + for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { + if (SysRegPairs[i].Value == Bits) { + Valid = true; + return SysRegPairs[i].Name; + } + } + + for (unsigned i = 0; i < NumInstPairs; ++i) { + if (InstPairs[i].Value == Bits) { + Valid = true; + return InstPairs[i].Name; + } + } + + uint32_t Op0 = (Bits >> 14) & 0x3; + uint32_t Op1 = (Bits >> 11) & 0x7; + uint32_t CRn = (Bits >> 7) & 0xf; + uint32_t CRm = (Bits >> 3) & 0xf; + uint32_t Op2 = Bits & 0x7; + + // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic + // name. + if (Op0 != 3 || (CRn != 11 && CRn != 15)) { + Valid = false; + return ""; + } + + assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg"); + + Valid = true; + return "s3_" + utostr(Op1) + "_c" + utostr(CRn) + + "_c" + utostr(CRm) + "_" + utostr(Op2); +} + +const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = { + {"ipas2e1is", IPAS2E1IS}, + {"ipas2le1is", IPAS2LE1IS}, + {"vmalle1is", VMALLE1IS}, + {"alle2is", ALLE2IS}, + {"alle3is", ALLE3IS}, + {"vae1is", VAE1IS}, + {"vae2is", VAE2IS}, + {"vae3is", VAE3IS}, + {"aside1is", ASIDE1IS}, + {"vaae1is", VAAE1IS}, + {"alle1is", ALLE1IS}, + {"vale1is", VALE1IS}, + {"vale2is", VALE2IS}, + {"vale3is", VALE3IS}, + {"vmalls12e1is", VMALLS12E1IS}, + {"vaale1is", VAALE1IS}, + {"ipas2e1", IPAS2E1}, + {"ipas2le1", IPAS2LE1}, + {"vmalle1", VMALLE1}, + {"alle2", ALLE2}, + {"alle3", ALLE3}, + {"vae1", VAE1}, + {"vae2", VAE2}, + {"vae3", VAE3}, + {"aside1", ASIDE1}, + {"vaae1", VAAE1}, + {"alle1", ALLE1}, + {"vale1", VALE1}, + {"vale2", VALE2}, + {"vale3", VALE3}, + {"vmalls12e1", VMALLS12E1}, + {"vaale1", VAALE1} +}; + +A64TLBI::TLBIMapper::TLBIMapper() + : NamedImmMapper(TLBIPairs, 0) {} + +bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) { + const fltSemantics &Sem = Val.getSemantics(); + unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1; + + uint32_t ExpMask; + switch (FracBits) { + case 10: // IEEE half-precision + ExpMask = 0x1f; + break; + case 23: // IEEE single-precision + ExpMask = 0xff; + break; + case 52: // IEEE double-precision + ExpMask = 0x7ff; + break; + case 112: // IEEE quad-precision + // No immediates are valid for double precision. + return false; + default: + llvm_unreachable("Only half, single and double precision supported"); + } + + uint32_t ExpStart = FracBits; + uint64_t FracMask = (1ULL << FracBits) - 1; + + uint32_t Sign = Val.isNegative(); + + uint64_t Bits= Val.bitcastToAPInt().getLimitedValue(); + uint64_t Fraction = Bits & FracMask; + int32_t Exponent = ((Bits >> ExpStart) & ExpMask); + Exponent -= ExpMask >> 1; + + // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19) + // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48) + // This translates to: only 4 bits of fraction; -3 <= exp <= 4. + uint64_t A64FracStart = FracBits - 4; + uint64_t A64FracMask = 0xf; + + // Are there too many fraction bits? + if (Fraction & ~(A64FracMask << A64FracStart)) + return false; + + if (Exponent < -3 || Exponent > 4) + return false; + + uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask; + uint32_t PackedExp = (Exponent + 7) & 0x7; + + Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction; + return true; +} + +// Encoding of the immediate for logical (immediate) instructions: +// +// | N | imms | immr | size | R | S | +// |---+--------+--------+------+--------------+--------------| +// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) | +// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) | +// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) | +// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) | +// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) | +// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) | +// | 0 | 11111x | - | | UNALLOCATED | | +// +// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in +// which the lower S+1 bits are ones and the remaining bits are zero, then +// rotated right by R bits, which is then replicated across the datapath. +// +// + Values of 'N', 'imms' and 'immr' which do not match the above table are +// RESERVED. +// + If all 's' bits in the imms field are set then the instruction is +// RESERVED. +// + The 'x' bits in the 'immr' field are IGNORED. + +bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { + int RepeatWidth; + int Rotation = 0; + int Num1s = 0; + + // Because there are S+1 ones in the replicated mask, an immediate of all + // zeros is not allowed. Filtering it here is probably more efficient. + if (Imm == 0) return false; + + for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) { + uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1; + uint64_t ReplicatedMask = Imm & RepeatMask; + + if (ReplicatedMask == 0) continue; + + // First we have to make sure the mask is actually repeated in each slot for + // this width-specifier. + bool IsReplicatedMask = true; + for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) { + if (((Imm >> i) & RepeatMask) != ReplicatedMask) { + IsReplicatedMask = false; + break; + } + } + if (!IsReplicatedMask) continue; + + // Now we have to work out the amount of rotation needed. The first part of + // this calculation is actually independent of RepeatWidth, but the complex + // case will depend on it. + Rotation = CountTrailingZeros_64(Imm); + if (Rotation == 0) { + // There were no leading zeros, which means it's either in place or there + // are 1s at each end (e.g. 0x8003 needs rotating). + Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm) + : CountLeadingOnes_32(Imm); + Rotation = RepeatWidth - Rotation; + } + + uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation) + | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); + // Of course, they may not actually be ones, so we have to check that: + if (!isMask_64(ReplicatedOnes)) + continue; + + Num1s = CountTrailingOnes_64(ReplicatedOnes); + + // We know we've got an almost valid encoding (certainly, if this is invalid + // no other parameters would work). + break; + } + + // The encodings which would produce all 1s are RESERVED. + if (RepeatWidth == 1 || Num1s == RepeatWidth) return false; + + uint32_t N = RepeatWidth == 64; + uint32_t ImmR = RepeatWidth - Rotation; + uint32_t ImmS = Num1s - 1; + + switch (RepeatWidth) { + default: break; // No action required for other valid rotations. + case 16: ImmS |= 0x20; break; // 10ssss + case 8: ImmS |= 0x30; break; // 110sss + case 4: ImmS |= 0x38; break; // 1110ss + case 2: ImmS |= 0x3c; break; // 11110s + } + + Bits = ImmS | (ImmR << 6) | (N << 12); + + return true; +} + + +bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm) { + uint32_t N = Bits >> 12; + uint32_t ImmR = (Bits >> 6) & 0x3f; + uint32_t ImmS = Bits & 0x3f; + + // N=1 encodes a 64-bit replication and is invalid for the 32-bit + // instructions. + if (RegWidth == 32 && N != 0) return false; + + int Width = 0; + if (N == 1) + Width = 64; + else if ((ImmS & 0x20) == 0) + Width = 32; + else if ((ImmS & 0x10) == 0) + Width = 16; + else if ((ImmS & 0x08) == 0) + Width = 8; + else if ((ImmS & 0x04) == 0) + Width = 4; + else if ((ImmS & 0x02) == 0) + Width = 2; + else { + // ImmS is 0b11111x: UNALLOCATED + return false; + } + + int Num1s = (ImmS & (Width - 1)) + 1; + + // All encodings which would map to -1 (signed) are RESERVED. + if (Num1s == Width) return false; + + int Rotation = (ImmR & (Width - 1)); + uint64_t Mask = (1ULL << Num1s) - 1; + uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; + Mask = (Mask >> Rotation) + | ((Mask << (Width - Rotation)) & WidthMask); + + Imm = 0; + for (unsigned i = 0; i < RegWidth / Width; ++i) { + Imm |= Mask; + Mask <<= Width; + } + + return true; +} + +bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { + // If high bits are set then a 32-bit MOVZ can't possibly work. + if (RegWidth == 32 && (Value & ~0xffffffffULL)) + return false; + + for (int i = 0; i < RegWidth; i += 16) { + // If the value is 0 when we mask out all the bits that could be set with + // the current LSL value then it's representable. + if ((Value & ~(0xffffULL << i)) == 0) { + Shift = i / 16; + UImm16 = (Value >> i) & 0xffff; + return true; + } + } + return false; +} + +bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { + // MOVN is defined to set its register to NOT(LSL(imm16, shift)). + + // We have to be a little careful about a 32-bit register: 0xffff_1234 *is* + // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not + // a valid input for isMOVZImm. + if (RegWidth == 32 && (Value & ~0xffffffffULL)) + return false; + + uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value; + + return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift); +} + +bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value, + int &UImm16, int &Shift) { + if (isMOVZImm(RegWidth, Value, UImm16, Shift)) + return false; + + return isMOVNImm(RegWidth, Value, UImm16, Shift); +} + +MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT, + StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitAArch64MCSubtargetInfo(X, TT, CPU, ""); + return X; +} + + +static MCInstrInfo *createAArch64MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitAArch64MCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAArch64MCRegisterInfo(X, AArch64::X30); + return X; +} + +static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + + MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(); + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) { + // On ELF platforms the default static relocation model has a smart enough + // linker to cope with referencing external symbols defined in a shared + // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. + RM = Reloc::Static; + } + + if (CM == CodeModel::Default) + CM = CodeModel::Small; + + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + + +static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new AArch64InstPrinter(MAI, MII, MRI, STI); + return 0; +} + +namespace { + +class AArch64MCInstrAnalysis : public MCInstrAnalysis { +public: + AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + + virtual bool isUnconditionalBranch(const MCInst &Inst) const { + if (Inst.getOpcode() == AArch64::Bcc + && Inst.getOperand(0).getImm() == A64CC::AL) + return true; + return MCInstrAnalysis::isUnconditionalBranch(Inst); + } + + virtual bool isConditionalBranch(const MCInst &Inst) const { + if (Inst.getOpcode() == AArch64::Bcc + && Inst.getOperand(0).getImm() == A64CC::AL) + return false; + return MCInstrAnalysis::isConditionalBranch(Inst); + } + + uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size) const { + unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0; + // FIXME: We only handle PCRel branches for now. + if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType + != MCOI::OPERAND_PCREL) + return -1ULL; + + int64_t Imm = Inst.getOperand(LblOperand).getImm(); + + return Addr + Imm; + } +}; + +} + +static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) { + return new AArch64MCInstrAnalysis(Info); +} + + + +extern "C" void LLVMInitializeAArch64TargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn A(TheAArch64Target, createAArch64MCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheAArch64Target, + createAArch64MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheAArch64Target, + createAArch64MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheAArch64Target, + createAArch64MCRegisterInfo); + + // Register the MC subtarget info. + using AArch64_MC::createAArch64MCSubtargetInfo; + TargetRegistry::RegisterMCSubtargetInfo(TheAArch64Target, + createAArch64MCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(TheAArch64Target, + createAArch64MCInstrAnalysis); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(TheAArch64Target, + createAArch64MCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheAArch64Target, + createAArch64AsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheAArch64Target, + createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheAArch64Target, + createAArch64MCInstPrinter); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h new file mode 100644 index 0000000..3849fe3 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -0,0 +1,65 @@ +//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AArch64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64MCTARGETDESC_H +#define LLVM_AARCH64MCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class StringRef; +class Target; +class raw_ostream; + +extern Target TheAArch64Target; + +namespace AArch64_MC { + MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS); +} + +MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, + uint8_t OSABI); + +MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT, + StringRef CPU); + +} // End llvm namespace + +// Defines symbolic names for AArch64 registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "AArch64GenRegisterInfo.inc" + +// Defines symbolic names for the AArch64 instructions. +// +#define GET_INSTRINFO_ENUM +#include "AArch64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "AArch64GenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..44c66a2 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,13 @@ +add_llvm_library(LLVMAArch64Desc + AArch64AsmBackend.cpp + AArch64ELFObjectWriter.cpp + AArch64ELFStreamer.cpp + AArch64MCAsmInfo.cpp + AArch64MCCodeEmitter.cpp + AArch64MCExpr.cpp + AArch64MCTargetDesc.cpp + ) +add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..5a2f467 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Desc +parent = AArch64 +required_libraries = AArch64AsmPrinter MC Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/MCTargetDesc/Makefile b/lib/Target/AArch64/MCTargetDesc/Makefile new file mode 100644 index 0000000..5779ac5 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Desc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile new file mode 100644 index 0000000..b2ca278 --- /dev/null +++ b/lib/Target/AArch64/Makefile @@ -0,0 +1,30 @@ +##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMAArch64CodeGen +TARGET = AArch64 + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = AArch64GenAsmMatcher.inc \ + AArch64GenAsmWriter.inc \ + AArch64GenCallingConv.inc \ + AArch64GenDAGISel.inc \ + AArch64GenDisassemblerTables.inc \ + AArch64GenInstrInfo.inc \ + AArch64GenMCCodeEmitter.inc \ + AArch64GenMCPseudoLowering.inc \ + AArch64GenRegisterInfo.inc \ + AArch64GenSubtargetInfo.inc + +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common + + diff --git a/lib/Target/AArch64/README.txt b/lib/Target/AArch64/README.txt new file mode 100644 index 0000000..601990f --- /dev/null +++ b/lib/Target/AArch64/README.txt @@ -0,0 +1,2 @@ +This file will contain changes that need to be made before AArch64 can become an +officially supported target. Currently a placeholder. diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp new file mode 100644 index 0000000..fa07d49 --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -0,0 +1,20 @@ +//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheAArch64Target; + +extern "C" void LLVMInitializeAArch64TargetInfo() { + RegisterTarget<Triple::aarch64> + X(TheAArch64Target, "aarch64", "AArch64"); +} diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..e236eed --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64Info + AArch64TargetInfo.cpp + ) + +add_dependencies(LLVMAArch64Info AArch64CommonTableGen) diff --git a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..5b003f0 --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Info +parent = AArch64 +required_libraries = MC Support Target +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/TargetInfo/Makefile b/lib/Target/AArch64/TargetInfo/Makefile new file mode 100644 index 0000000..9dc9aa4 --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Info + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index b404e6c..cd4067a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -64,6 +64,9 @@ public: return getSubExpr()->FindAssociatedSection(); } + // There are no TLS ARMMCExprs at the moment. + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; } diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index f3a9c1c..c06e8bc 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore +subdirectories = AArch64 ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the |