diff options
Diffstat (limited to 'lib/Target')
370 files changed, 40649 insertions, 11923 deletions
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h new file mode 100644 index 0000000..4de4faa --- /dev/null +++ b/lib/Target/AArch64/AArch64.h @@ -0,0 +1,42 @@ +//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// AArch64 back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_H +#define LLVM_TARGET_AARCH64_H + +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class AArch64AsmPrinter; +class FunctionPass; +class AArch64TargetMachine; +class MachineInstr; +class MCInst; + +FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel); + +FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); + +FunctionPass *createAArch64BranchFixupPass(); + +void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AArch64AsmPrinter &AP); + + +} + +#endif diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td new file mode 100644 index 0000000..e17052b --- /dev/null +++ b/lib/Target/AArch64/AArch64.td @@ -0,0 +1,70 @@ +//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the top level entry point for the AArch64 target. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// AArch64 Subtarget features. +// + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable Advanced SIMD instructions">; + +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable cryptographic instructions">; + +//===----------------------------------------------------------------------===// +// AArch64 Processors +// + +include "AArch64Schedule.td" + +def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "AArch64RegisterInfo.td" + +include "AArch64CallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "AArch64InstrInfo.td" + +def AArch64InstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// + +def A64InstPrinter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def AArch64 : Target { + let InstructionSet = AArch64InstrInfo; + let AssemblyWriters = [A64InstPrinter]; +} diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp new file mode 100644 index 0000000..47ebb82 --- /dev/null +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -0,0 +1,347 @@ +//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format AArch64 assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "AArch64AsmPrinter.h" +#include "InstPrinter/AArch64InstPrinter.h" +#include "llvm/DebugInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +MachineLocation +AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + // See emitFrameIndexDebugValue in InstrInfo for where this instruction is + // expected to be created. + assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg() + && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE"); + return MachineLocation(MI->getOperand(0).getReg(), + MI->getOperand(1).getImm()); +} + +/// Try to print a floating-point register as if it belonged to a specified +/// register-class. For example the inline asm operand modifier "b" requires its +/// argument to be printed as "bN". +static bool printModifiedFPRAsmOperand(const MachineOperand &MO, + const TargetRegisterInfo *TRI, + const TargetRegisterClass &RegClass, + raw_ostream &O) { + if (!MO.isReg()) + return true; + + for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { + if (RegClass.contains(*AR)) { + O << AArch64InstPrinter::getRegisterName(*AR); + return false; + } + } + return true; +} + +/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR +/// with the obvious type and an immediate 0 as either wzr or xzr. +static bool printModifiedGPRAsmOperand(const MachineOperand &MO, + const TargetRegisterInfo *TRI, + const TargetRegisterClass &RegClass, + raw_ostream &O) { + char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x'; + + if (MO.isImm() && MO.getImm() == 0) { + O << Prefix << "zr"; + return false; + } else if (MO.isReg()) { + if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) { + O << (Prefix == 'x' ? "sp" : "wsp"); + return false; + } + + for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { + if (RegClass.contains(*AR)) { + O << AArch64InstPrinter::getRegisterName(*AR); + return false; + } + } + } + + return true; +} + +bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO, + bool PrintImmediatePrefix, + StringRef Suffix, raw_ostream &O) { + StringRef Name; + StringRef Modifier; + switch (MO.getType()) { + default: + llvm_unreachable("Unexpected operand for symbolic address constraint"); + case MachineOperand::MO_GlobalAddress: + Name = Mang->getSymbol(MO.getGlobal())->getName(); + + // Global variables may be accessed either via a GOT or in various fun and + // interesting TLS-model specific ways. Set the prefix modifier as + // appropriate here. + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) { + Reloc::Model RelocM = TM.getRelocationModel(); + if (GV->isThreadLocal()) { + switch (TM.getTLSModel(GV)) { + case TLSModel::GeneralDynamic: + Modifier = "tlsdesc"; + break; + case TLSModel::LocalDynamic: + Modifier = "dtprel"; + break; + case TLSModel::InitialExec: + Modifier = "gottprel"; + break; + case TLSModel::LocalExec: + Modifier = "tprel"; + break; + } + } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + Modifier = "got"; + } + } + break; + case MachineOperand::MO_BlockAddress: + Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName(); + break; + case MachineOperand::MO_ExternalSymbol: + Name = MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + Name = GetCPISymbol(MO.getIndex())->getName(); + break; + } + + // Some instructions (notably ADRP) don't take the # prefix for + // immediates. Only print it if asked to. + if (PrintImmediatePrefix) + O << '#'; + + // Only need the joining "_" if both the prefix and the suffix are + // non-null. This little block simply takes care of the four possibly + // combinations involved there. + if (Modifier == "" && Suffix == "") + O << Name; + else if (Modifier == "" && Suffix != "") + O << ":" << Suffix << ':' << Name; + else if (Modifier != "" && Suffix == "") + O << ":" << Modifier << ':' << Name; + else + O << ":" << Modifier << '_' << Suffix << ':' << Name; + + return false; +} + +bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (!ExtraCode || !ExtraCode[0]) { + // There's actually no operand modifier, which leads to a slightly eclectic + // set of behaviour which we have to handle here. + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + default: + llvm_unreachable("Unexpected operand for inline assembly"); + case MachineOperand::MO_Register: + // GCC prints the unmodified operand of a 'w' constraint as the vector + // register. Technically, we could allocate the argument as a VPR128, but + // that leads to extremely dodgy copies being generated to get the data + // there. + if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O)) + O << AArch64InstPrinter::getRegisterName(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + O << '#' << MO.getImm(); + break; + case MachineOperand::MO_FPImmediate: + assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected"); + O << "#0.0"; + break; + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + return printSymbolicAddress(MO, false, "", O); + } + return false; + } + + // We have a real modifier to handle. + switch(ExtraCode[0]) { + default: + // See if this is a generic operand + return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O); + case 'c': // Don't print "#" before an immediate operand. + if (!MI->getOperand(OpNum).isImm()) + return true; + O << MI->getOperand(OpNum).getImm(); + return false; + case 'w': + // Output 32-bit general register operand, constant zero as wzr, or stack + // pointer as wsp. Ignored when used with other operand types. + return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::GPR32RegClass, O); + case 'x': + // Output 64-bit general register operand, constant zero as xzr, or stack + // pointer as sp. Ignored when used with other operand types. + return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::GPR64RegClass, O); + case 'H': + // Output higher numbered of a 64-bit general register pair + case 'Q': + // Output least significant register of a 64-bit general register pair + case 'R': + // Output most significant register of a 64-bit general register pair + + // FIXME note: these three operand modifiers will require, to some extent, + // adding a paired GPR64 register class. Initial investigation suggests that + // assertions are hit unless it has a type and is made legal for that type + // in ISelLowering. After that step is made, the number of modifications + // needed explodes (operation legality, calling conventions, stores, reg + // copies ...). + llvm_unreachable("FIXME: Unimplemented register pairs"); + case 'b': + // Output 8-bit FP/SIMD scalar register operand, prefixed with b. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR8RegClass, O); + case 'h': + // Output 16-bit FP/SIMD scalar register operand, prefixed with h. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR16RegClass, O); + case 's': + // Output 32-bit FP/SIMD scalar register operand, prefixed with s. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR32RegClass, O); + case 'd': + // Output 64-bit FP/SIMD scalar register operand, prefixed with d. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR64RegClass, O); + case 'q': + // Output 128-bit FP/SIMD scalar register operand, prefixed with q. + return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::FPR128RegClass, O); + case 'A': + // Output symbolic address with appropriate relocation modifier (also + // suitable for ADRP). + return printSymbolicAddress(MI->getOperand(OpNum), false, "", O); + case 'L': + // Output bits 11:0 of symbolic address with appropriate :lo12: relocation + // modifier. + return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O); + case 'G': + // Output bits 23:12 of symbolic address with appropriate :hi12: relocation + // modifier (currently only for TLS local exec). + return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O); + } + + +} + +bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + // Currently both the memory constraints (m and Q) behave the same and amount + // to the address as a single register. In future, we may allow "m" to provide + // both a base and an offset. + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isReg() && "unexpected inline assembly memory operand"); + O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']'; + return false; +} + +void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg()); + OS << '+' << MI->getOperand(1).getImm(); + OS << ']'; + OS << "+" << MI->getOperand(NOps - 2).getImm(); +} + + +#include "AArch64GenMCPseudoLowering.inc" + +void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(OutStreamer, MI)) + return; + + switch (MI->getOpcode()) { + case AArch64::DBG_VALUE: { + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + SmallString<128> TmpStr; + raw_svector_ostream OS(TmpStr); + PrintDebugValueComment(MI, OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + return; + } + } + + MCInst TmpInst; + LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this); + OutStreamer.EmitInstruction(TmpInst); +} + +void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { + if (Subtarget->isTargetELF()) { + const TargetLoweringObjectFileELF &TLOFELF = + static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const DataLayout *TD = TM.getDataLayout(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + OutStreamer.EmitLabel(Stubs[i].first); + OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), + TD->getPointerSize(0), 0); + } + Stubs.clear(); + } + } +} + +bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + return AsmPrinter::runOnMachineFunction(MF); +} + +// Force static initialization. +extern "C" void LLVMInitializeAArch64AsmPrinter() { + RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64Target); +} + diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h new file mode 100644 index 0000000..af0c9fe --- /dev/null +++ b/lib/Target/AArch64/AArch64AsmPrinter.h @@ -0,0 +1,80 @@ +// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AArch64 assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64ASMPRINTER_H +#define LLVM_AARCH64ASMPRINTER_H + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MCOperand; + +class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { + + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when printing asm code for different targets. + const AArch64Subtarget *Subtarget; + + // emitPseudoExpansionLowering - tblgen'erated. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + + public: + explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<AArch64Subtarget>(); + } + + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; + + MCOperand lowerSymbolOperand(const MachineOperand &MO, + const MCSymbol *Sym) const; + + void EmitInstruction(const MachineInstr *MI); + void EmitEndOfAsmFile(Module &M); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + + /// printSymbolicAddress - Given some kind of reasonably bare symbolic + /// reference, print out the appropriate asm string to represent it. If + /// appropriate, a relocation-specifier will be produced, composed of a + /// general class derived from the MO parameter and an instruction-specific + /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is + /// given. + bool printSymbolicAddress(const MachineOperand &MO, + bool PrintImmediatePrefix, + StringRef Suffix, raw_ostream &O); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + + virtual const char *getPassName() const { + return "AArch64 Assembly Printer"; + } + + virtual bool runOnMachineFunction(MachineFunction &MF); +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/lib/Target/AArch64/AArch64BranchFixupPass.cpp new file mode 100644 index 0000000..71233ba --- /dev/null +++ b/lib/Target/AArch64/AArch64BranchFixupPass.cpp @@ -0,0 +1,600 @@ +//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that fixes AArch64 branches which have ended up out +// of range for their immediate operands. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-branch-fixup" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumSplit, "Number of uncond branches inserted"); +STATISTIC(NumCBrFixed, "Number of cond branches fixed"); + +/// Return the worst case padding that could result from unknown offset bits. +/// This does not include alignment padding caused by known offset bits. +/// +/// @param LogAlign log2(alignment) +/// @param KnownBits Number of known low offset bits. +static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { + if (KnownBits < LogAlign) + return (1u << LogAlign) - (1u << KnownBits); + return 0; +} + +namespace { + /// Due to limited PC-relative displacements, conditional branches to distant + /// blocks may need converting into an unconditional equivalent. For example: + /// tbz w1, #0, far_away + /// becomes + /// tbnz w1, #0, skip + /// b far_away + /// skip: + class AArch64BranchFixup : public MachineFunctionPass { + /// Information about the offset and size of a single basic block. + struct BasicBlockInfo { + /// Distance from the beginning of the function to the beginning of this + /// basic block. + /// + /// Offsets are computed assuming worst case padding before an aligned + /// block. This means that subtracting basic block offsets always gives a + /// conservative estimate of the real distance which may be smaller. + /// + /// Because worst case padding is used, the computed offset of an aligned + /// block may not actually be aligned. + unsigned Offset; + + /// Size of the basic block in bytes. If the block contains inline + /// assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + /// The number of low bits in Offset that are known to be exact. The + /// remaining bits of Offset are an upper bound. + uint8_t KnownBits; + + /// When non-zero, the block contains instructions (inline asm) of unknown + /// size. The real size may be smaller than Size bytes by a multiple of 1 + /// << Unalign. + uint8_t Unalign; + + BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} + + /// Compute the number of known offset bits internally to this block. + /// This number should be used to predict worst case padding when + /// splitting the block. + unsigned internalKnownBits() const { + unsigned Bits = Unalign ? Unalign : KnownBits; + // If the block size isn't a multiple of the known bits, assume the + // worst case padding. + if (Size & ((1u << Bits) - 1)) + Bits = CountTrailingZeros_32(Size); + return Bits; + } + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + if (!LogAlign) + return PO; + // Add alignment padding from the terminator. + return PO + UnknownPadding(LogAlign, internalKnownBits()); + } + + /// Compute the number of known low bits of postOffset. If this block + /// contains inline asm, the number of known bits drops to the + /// instruction alignment. An aligned terminator may increase the number + /// of know bits. + /// If LogAlign is given, also consider the alignment of the next block. + unsigned postKnownBits(unsigned LogAlign = 0) const { + return std::max(LogAlign, internalKnownBits()); + } + }; + + std::vector<BasicBlockInfo> BBInfo; + + /// One per immediate branch, keeping the machine instruction pointer, + /// conditional or unconditional, the max displacement, and (if IsCond is + /// true) the corresponding inverted branch opcode. + struct ImmBranch { + MachineInstr *MI; + unsigned OffsetBits : 31; + bool IsCond : 1; + ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) + : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} + }; + + /// Keep track of all the immediate branch instructions. + /// + std::vector<ImmBranch> ImmBranches; + + MachineFunction *MF; + const AArch64InstrInfo *TII; + public: + static char ID; + AArch64BranchFixup() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "AArch64 branch fixup pass"; + } + + private: + void initializeFunctionInfo(); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); + void adjustBBOffsetsAfter(MachineBasicBlock *BB); + bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, + unsigned OffsetBits); + bool fixupImmediateBr(ImmBranch &Br); + bool fixupConditionalBr(ImmBranch &Br); + + void computeBlockSize(MachineBasicBlock *MBB); + unsigned getOffsetOf(MachineInstr *MI) const; + void dumpBBs(); + void verify(); + }; + char AArch64BranchFixup::ID = 0; +} + +/// check BBOffsets +void AArch64BranchFixup::verify() { +#ifndef NDEBUG + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + unsigned MBBId = MBB->getNumber(); + assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); + } +#endif +} + +/// print block size and offset information - debugging +void AArch64BranchFixup::dumpBBs() { + DEBUG({ + for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { + const BasicBlockInfo &BBI = BBInfo[J]; + dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + << " kb=" << unsigned(BBI.KnownBits) + << " ua=" << unsigned(BBI.Unalign) + << format(" size=%#x\n", BBInfo[J].Size); + } + }); +} + +/// Returns an instance of the branch fixup pass. +FunctionPass *llvm::createAArch64BranchFixupPass() { + return new AArch64BranchFixup(); +} + +bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + DEBUG(dbgs() << "***** AArch64BranchFixup ******"); + TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); + + // This pass invalidates liveness information when it splits basic blocks. + MF->getRegInfo().invalidateLiveness(); + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + MF->RenumberBlocks(); + + // Do the initial scan of the function, building up information about the + // sizes of each block and location of each immediate branch. + initializeFunctionInfo(); + + // Iteratively fix up branches until there is no change. + unsigned NoBRIters = 0; + bool MadeChange = false; + while (true) { + DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n'); + bool BRChange = false; + for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) + BRChange |= fixupImmediateBr(ImmBranches[i]); + if (BRChange && ++NoBRIters > 30) + report_fatal_error("Branch Fix Up pass failed to converge!"); + DEBUG(dumpBBs()); + + if (!BRChange) + break; + MadeChange = true; + } + + // After a while, this might be made debug-only, but it is not expensive. + verify(); + + DEBUG(dbgs() << '\n'; dumpBBs()); + + BBInfo.clear(); + ImmBranches.clear(); + + return MadeChange; +} + +/// Return true if the specified basic block can fallthrough into the block +/// immediately after it. +static bool BBHasFallthrough(MachineBasicBlock *MBB) { + // Get the next machine basic block in the function. + MachineFunction::iterator MBBI = MBB; + // Can't fall off end of function. + if (llvm::next(MBBI) == MBB->getParent()->end()) + return false; + + MachineBasicBlock *NextBB = llvm::next(MBBI); + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I == NextBB) + return true; + + return false; +} + +/// Do the initial scan of the function, building up information about the sizes +/// of each block, and each immediate branch. +void AArch64BranchFixup::initializeFunctionInfo() { + BBInfo.clear(); + BBInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) + computeBlockSize(I); + + // The known bits of the entry block offset are determined by the function + // alignment. + BBInfo.front().KnownBits = MF->getAlignment(); + + // Compute block offsets and known bits. + adjustBBOffsetsAfter(MF->begin()); + + // Now go back through the instructions and build up our data structures. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + if (I->isDebugValue()) + continue; + + int Opc = I->getOpcode(); + if (I->isBranch()) { + bool IsCond = false; + + // The offsets encoded in instructions here scale by the instruction + // size (4 bytes), effectively increasing their range by 2 bits. + unsigned Bits = 0; + switch (Opc) { + default: + continue; // Ignore other JT branches + case AArch64::TBZxii: + case AArch64::TBZwii: + case AArch64::TBNZxii: + case AArch64::TBNZwii: + IsCond = true; + Bits = 14 + 2; + break; + case AArch64::Bcc: + case AArch64::CBZx: + case AArch64::CBZw: + case AArch64::CBNZx: + case AArch64::CBNZw: + IsCond = true; + Bits = 19 + 2; + break; + case AArch64::Bimm: + Bits = 26 + 2; + break; + } + + // Record this immediate branch. + ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); + } + } + } +} + +/// Compute the size and some alignment information for MBB. This function +/// updates BBInfo directly. +void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) { + BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; + BBI.Size = 0; + BBI.Unalign = 0; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + BBI.Size += TII->getInstSizeInBytes(*I); + // For inline asm, GetInstSizeInBytes returns a conservative estimate. + // The actual size may be smaller, but still a multiple of the instr size. + if (I->isInlineAsm()) + BBI.Unalign = 2; + } +} + +/// Return the current offset of the specified machine instruction from the +/// start of the function. This offset changes as stuff is moved around inside +/// the function. +unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + + // The offset is composed of two things: the sum of the sizes of all MBB's + // before this instruction's block, and the offset from the start of the block + // it is in. + unsigned Offset = BBInfo[MBB->getNumber()].Offset; + + // Sum instructions before MI in MBB. + for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { + assert(I != MBB->end() && "Didn't find MI in its own basic block?"); + Offset += TII->getInstSizeInBytes(*I); + } + return Offset; +} + +/// Split the basic block containing MI into two blocks, which are joined by +/// an unconditional branch. Update data structures and renumber blocks to +/// account for this change and returns the newly created block. +MachineBasicBlock * +AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) { + MachineBasicBlock *OrigBB = MI->getParent(); + + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MachineFunction::iterator MBBI = OrigBB; ++MBBI; + MF->insert(MBBI, NewBB); + + // Splice the instructions starting with MI over to NewBB. + NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); + + // Add an unconditional branch from OrigBB to NewBB. + // Note the new unconditional branch is not being recorded. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond to anything in the source. + BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); + ++NumSplit; + + // Update the CFG. All succs of OrigBB are now succs of NewBB. + NewBB->transferSuccessors(OrigBB); + + // OrigBB branches to NewBB. + OrigBB->addSuccessor(NewBB); + + // Update internal data structures to account for the newly inserted MBB. + MF->RenumberBlocks(NewBB); + + // Insert an entry into BBInfo to align it properly with the (newly + // renumbered) block numbers. + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + // Figure out how large the OrigBB is. As the first half of the original + // block, it cannot contain a tablejump. The size includes + // the new jump we added. (It should be possible to do this without + // recounting everything, but it's very confusing, and this is rarely + // executed.) + computeBlockSize(OrigBB); + + // Figure out how large the NewMBB is. As the second half of the original + // block, it may contain a tablejump. + computeBlockSize(NewBB); + + // All BBOffsets following these blocks must be modified. + adjustBBOffsetsAfter(OrigBB); + + return NewBB; +} + +void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) { + unsigned BBNum = BB->getNumber(); + for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); + unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); + unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + + // This is where block i begins. Stop if the offset is already correct, + // and we have updated 2 blocks. This is the maximum number of blocks + // changed before calling this function. + if (i > BBNum + 2 && + BBInfo[i].Offset == Offset && + BBInfo[i].KnownBits == KnownBits) + break; + + BBInfo[i].Offset = Offset; + BBInfo[i].KnownBits = KnownBits; + } +} + +/// Returns true if the distance between specific MI and specific BB can fit in +/// MI's displacement field. +bool AArch64BranchFixup::isBBInRange(MachineInstr *MI, + MachineBasicBlock *DestBB, + unsigned OffsetBits) { + int64_t BrOffset = getOffsetOf(MI); + int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; + + DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() + << " from BB#" << MI->getParent()->getNumber() + << " bits available=" << OffsetBits + << " from " << getOffsetOf(MI) << " to " << DestOffset + << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); + + return isIntN(OffsetBits, DestOffset - BrOffset); +} + +/// Fix up an immediate branch whose destination is too far away to fit in its +/// displacement field. +bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *DestBB = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isMBB()) { + DestBB = MI->getOperand(i).getMBB(); + break; + } + } + assert(DestBB && "Branch with no destination BB?"); + + // Check to see if the DestBB is already in-range. + if (isBBInRange(MI, DestBB, Br.OffsetBits)) + return false; + + assert(Br.IsCond && "Only conditional branches should need fixup"); + return fixupConditionalBr(Br); +} + +/// Fix up a conditional branch whose destination is too far away to fit in its +/// displacement field. It is converted to an inverse conditional branch + an +/// unconditional branch to the destination. +bool +AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *MBB = MI->getParent(); + unsigned CondBrMBBOperand = 0; + + // The general idea is to add an unconditional branch to the destination and + // invert the conditional branch to jump over it. Complications occur around + // fallthrough and unreachable ends to the block. + // b.lt L1 + // => + // b.ge L2 + // b L1 + // L2: + + // First we invert the conditional branch, by creating a replacement if + // necessary. This if statement contains all the special handling of different + // branch types. + if (MI->getOpcode() == AArch64::Bcc) { + // The basic block is operand number 1 for Bcc + CondBrMBBOperand = 1; + + A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); + CC = A64InvertCondCode(CC); + MI->getOperand(0).setImm(CC); + } else { + MachineInstrBuilder InvertedMI; + int InvertedOpcode; + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown branch type"); + case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; + case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; + case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; + case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; + case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; + case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; + case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; + case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; + } + + InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); + for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { + InvertedMI.addOperand(MI->getOperand(i)); + if (MI->getOperand(i).isMBB()) + CondBrMBBOperand = i; + } + + MI->eraseFromParent(); + MI = Br.MI = InvertedMI; + } + + // If the branch is at the end of its MBB and that has a fall-through block, + // direct the updated conditional branch to the fall-through + // block. Otherwise, split the MBB before the next instruction. + MachineInstr *BMI = &MBB->back(); + bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); + + ++NumCBrFixed; + if (BMI != MI) { + if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && + BMI->getOpcode() == AArch64::Bimm) { + // Last MI in the BB is an unconditional branch. We can swap destinations: + // b.eq L1 (temporarily b.ne L1 after first change) + // b L2 + // => + // b.ne L2 + // b L1 + MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); + if (isBBInRange(MI, NewDest, Br.OffsetBits)) { + DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " + << *BMI); + MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); + BMI->getOperand(0).setMBB(DestBB); + MI->getOperand(CondBrMBBOperand).setMBB(NewDest); + return true; + } + } + } + + if (NeedSplit) { + MachineBasicBlock::iterator MBBI = MI; ++MBBI; + splitBlockBeforeInstr(MBBI); + // No need for the branch to the next block. We're adding an unconditional + // branch to the destination. + int delta = TII->getInstSizeInBytes(MBB->back()); + BBInfo[MBB->getNumber()].Size -= delta; + MBB->back().eraseFromParent(); + // BBInfo[SplitBB].Offset is wrong temporarily, fixed below + } + + // After splitting and removing the unconditional branch from the original BB, + // the structure is now: + // oldbb: + // [things] + // b.invertedCC L1 + // splitbb/fallthroughbb: + // [old b L2/real continuation] + // + // We now have to change the conditional branch to point to splitbb and add an + // unconditional branch after it to L1, giving the final structure: + // oldbb: + // [things] + // b.invertedCC splitbb + // b L1 + // splitbb/fallthroughbb: + // [old b L2/real continuation] + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + + DEBUG(dbgs() << " Insert B to BB#" + << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() + << " also invert condition and change dest. to BB#" + << NextBB->getNumber() << "\n"); + + // Insert a new unconditional branch and fixup the destination of the + // conditional one. Also update the ImmBranch as well as adding a new entry + // for the new branch. + BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) + .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); + MI->getOperand(CondBrMBBOperand).setMBB(NextBB); + + BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); + + // 26 bits written down in Bimm, specifying a multiple of 4. + unsigned OffsetBits = 26 + 2; + ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); + + adjustBBOffsetsAfter(MBB); + return true; +} diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td new file mode 100644 index 0000000..b880d83 --- /dev/null +++ b/lib/Target/AArch64/AArch64CallingConv.td @@ -0,0 +1,196 @@ +//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for AArch64 architecture. +//===----------------------------------------------------------------------===// + + +// The AArch64 Procedure Call Standard is unfortunately specified at a slightly +// higher level of abstraction than LLVM's target interface presents. In +// particular, it refers (like other ABIs, in fact) directly to +// structs. However, generic LLVM code takes the liberty of lowering structure +// arguments to the component fields before we see them. +// +// As a result, the obvious direct map from LLVM IR to PCS concepts can't be +// implemented, so the goals of this calling convention are, in decreasing +// priority order: +// 1. Expose *some* way to express the concepts required to implement the +// generic PCS from a front-end. +// 2. Provide a sane ABI for pure LLVM. +// 3. Follow the generic PCS as closely as is naturally possible. +// +// The suggested front-end implementation of PCS features is: +// * Integer, float and vector arguments of all sizes which end up in +// registers are passed and returned via the natural LLVM type. +// * Structure arguments with size <= 16 bytes are passed and returned in +// registers as similar integer or composite types. For example: +// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed). +// * HFAs in registers follow rules similar to small structs: appropriate +// composite types. +// * Structure arguments with size > 16 bytes are passed via a pointer, +// handled completely by the front-end. +// * Structure return values > 16 bytes via an sret pointer argument. +// * Other stack-based arguments (not large structs) are passed using byval +// pointers. Padding arguments are added beforehand to guarantee a large +// struct doesn't later use integer registers. +// +// N.b. this means that it is the front-end's responsibility (if it cares about +// PCS compliance) to check whether enough registers are available for an +// argument when deciding how to pass it. + +class CCIfAlign<int Align, CCAction A>: + CCIf<"ArgFlags.getOrigAlign() == " # Align, A>; + +def CC_A64_APCS : CallingConv<[ + // SRet is an LLVM-specific concept, so it takes precedence over general ABI + // concerns. However, this rule will be used by C/C++ frontends to implement + // structure return. + CCIfSRet<CCAssignToReg<[X8]>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal<CCPassByVal<8, 8>>, + + // Canonicalise the various types that live in different floating-point + // registers. This makes sense because the PCS does not distinguish Short + // Vectors and Floating-point types. + CCIfType<[v2i8], CCBitConvertToType<f16>>, + CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>, + CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCBitConvertToType<f128>>, + + // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision + // Floating-point or Short Vector Type and the NSRN is less than 8, then the + // argument is allocated to the least significant bits of register + // v[NSRN]. The NSRN is incremented by one. The argument has now been + // allocated." + CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated + // SIMD and Floating-point registers (NSRN - number of elements < 8), then the + // argument is allocated to SIMD and Floating-point registers (with one + // register per element of the HFA). The NSRN is incremented by the number of + // registers used. The argument has now been allocated." + // + // N.b. As above, this rule is the responsibility of the front-end. + + // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of + // the argument is rounded up to the nearest multiple of 8 bytes." + // + // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short + // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural + // Alignment of the Argument's type." + // + // It is expected that these will be satisfied by adding dummy arguments to + // the prototype. + + // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point + // type then the size of the argument is set to 8 bytes. The effect is as if + // the argument had been copied to the least significant bits of a 64-bit + // register and the remaining bits filled with unspecified values." + CCIfType<[f16, f32], CCPromoteToType<f64>>, + + // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad- + // precision Floating-point or Short Vector Type, then the argument is copied + // to memory at the adjusted NSAA. The NSAA is incremented by the size of the + // argument. The argument has now been allocated." + CCIfType<[f64], CCAssignToStack<8, 8>>, + CCIfType<[f128], CCAssignToStack<16, 16>>, + + // PCS: "C.7: If the argument is an Integral Type, the size of the argument is + // less than or equal to 8 bytes and the NGRN is less than 8, the argument is + // copied to the least significant bits of x[NGRN]. The NGRN is incremented by + // one. The argument has now been allocated." + + // First we implement C.8 and C.9 (128-bit types get even registers). i128 is + // represented as two i64s, the first one being split. If we delayed this + // operation C.8 would never be reached. + CCIfType<[i64], + CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>, + + // Note: the promotion also implements C.14. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // And now the real implementation of C.7 + CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, + + // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded + // up to the next even number." + // + // "C.9: If the argument is an Integral Type, the size of the argument is + // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN] + // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the + // memory representation of the argument. The NGRN is incremented by two. The + // argument has now been allocated." + // + // Subtlety here: what if alignment is 16 but it is not an integral type? All + // floating-point types have been allocated already, which leaves composite + // types: this is why a front-end may need to produce i128 for a struct <= 16 + // bytes. + + // PCS: "C.10 If the argument is a Composite Type and the size in double-words + // of the argument is not more than 8 minus NGRN, then the argument is copied + // into consecutive general-purpose registers, starting at x[NGRN]. The + // argument is passed as though it had been loaded into the registers from a + // double-word aligned address with an appropriate sequence of LDR + // instructions loading consecutive registers from memory (the contents of any + // unused parts of the registers are unspecified by this standard). The NGRN + // is incremented by the number of registers used. The argument has now been + // allocated." + // + // Another one that's the responsibility of the front-end (sigh). + + // PCS: "C.11: The NGRN is set to 8." + CCCustom<"CC_AArch64NoMoreRegs">, + + // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural + // Alignment of the argument's type." + // + // PCS: "C.13: If the argument is a composite type then the argument is copied + // to memory at the adjusted NSAA. The NSAA is by the size of the + // argument. The argument has now been allocated." + // + // Note that the effect of this corresponds to a memcpy rather than register + // stores so that the struct ends up correctly addressable at the adjusted + // NSAA. + + // PCS: "C.14: If the size of the argument is less than 8 bytes then the size + // of the argument is set to 8 bytes. The effect is as if the argument was + // copied to the least significant bits of a 64-bit register and the remaining + // bits filled with unspecified values." + // + // Integer types were widened above. Floating-point and composite types have + // already been allocated completely. Nothing to do. + + // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA + // is incremented by the size of the argument. The argument has now been + // allocated." + CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>, + CCIfType<[i64], CCAssignToStack<8, 8>> + +]>; + +// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits +// of vector registers (8-15) are callee-saved. The order here is is picked up +// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of +// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at +// [sp-16], ... +def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19), + (sequence "D%u", 15, 8))>; + + +// TLS descriptor calls are extremely restricted in their changes, to allow +// optimisations in the (hopefully) more common fast path where no real action +// is needed. They actually have to preserve all registers, except for the +// unavoidable X30 and the return register X0. +def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1), + (sequence "Q%u", 31, 0))>; diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp new file mode 100644 index 0000000..cca6d12 --- /dev/null +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -0,0 +1,686 @@ +//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64FrameLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64InstrInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +void AArch64FrameLowering::splitSPAdjustments(uint64_t Total, + uint64_t &Initial, + uint64_t &Residual) const { + // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP + // instructions have a 7-bit signed immediate scaled by 8, giving a reach of + // 0x1f8, but stack adjustment should always be a multiple of 16. + if (Total <= 0x1f0) { + Initial = Total; + Residual = 0; + } else { + Initial = 0x1f0; + Residual = Total - Initial; + } +} + +void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo<AArch64MachineFunctionInfo>(); + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + MachineModuleInfo &MMI = MF.getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + bool NeedsFrameMoves = MMI.hasDebugInfo() + || MF.getFunction()->needsUnwindTableEntry(); + + uint64_t NumInitialBytes, NumResidualBytes; + + // Currently we expect the stack to be laid out by + // sub sp, sp, #initial + // stp x29, x30, [sp, #offset] + // ... + // str xxx, [sp, #offset] + // sub sp, sp, #rest (possibly via extra instructions). + if (MFI->getCalleeSavedInfo().size()) { + // If there are callee-saved registers, we want to store them efficiently as + // a block, and virtual base assignment happens too early to do it for us so + // we adjust the stack in two phases: first just for callee-saved fiddling, + // then to allocate the rest of the frame. + splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes); + } else { + // If there aren't any callee-saved registers, two-phase adjustment is + // inefficient. It's more efficient to adjust with NumInitialBytes too + // because when we're in a "callee pops argument space" situation, that pop + // must be tacked onto Initial for correctness. + NumInitialBytes = MFI->getStackSize(); + NumResidualBytes = 0; + } + + // Tell everyone else how much adjustment we're expecting them to use. In + // particular if an adjustment is required for a tail call the epilogue could + // have a different view of things. + FuncInfo->setInitialStackAdjust(NumInitialBytes); + + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes, + MachineInstr::FrameSetup); + + if (NeedsFrameMoves && NumInitialBytes) { + // We emit this update even if the CFA is set from a frame pointer later so + // that the CFA is valid in the interim. + MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(SPLabel); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, NumInitialBytes); + Moves.push_back(MachineMove(SPLabel, Dst, Src)); + } + + // Otherwise we need to set the frame pointer and/or add a second stack + // adjustment. + + bool FPNeedsSetting = hasFP(MF); + for (; MBBI != MBB.end(); ++MBBI) { + // Note that this search makes strong assumptions about the operation used + // to store the frame-pointer: it must be "STP x29, x30, ...". This could + // change in future, but until then there's no point in implementing + // untestable more generic cases. + if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR + && MBBI->getOperand(0).getReg() == AArch64::X29) { + int64_t X29FrameIdx = MBBI->getOperand(2).getIndex(); + FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx)); + + ++MBBI; + emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP, + AArch64::X29, + NumInitialBytes + MFI->getObjectOffset(X29FrameIdx), + MachineInstr::FrameSetup); + + // The offset adjustment used when emitting debugging locations relative + // to whatever frame base is set. AArch64 uses the default frame base (FP + // or SP) and this adjusts the calculations to be correct. + MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx) + - MFI->getStackSize()); + + if (NeedsFrameMoves) { + MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(FPLabel); + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx)); + Moves.push_back(MachineMove(FPLabel, Dst, Src)); + } + + FPNeedsSetting = false; + } + + if (!MBBI->getFlag(MachineInstr::FrameSetup)) + break; + } + + assert(!FPNeedsSetting && "Frame pointer couldn't be set"); + + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes, + MachineInstr::FrameSetup); + + // Now we emit the rest of the frame setup information, if necessary: we've + // already noted the FP and initial SP moves so we're left with the prologue's + // final SP update and callee-saved register locations. + if (!NeedsFrameMoves) + return; + + // Reuse the label if appropriate, so create it in this outer scope. + MCSymbol *CSLabel = 0; + + // The rest of the stack adjustment + if (!hasFP(MF) && NumResidualBytes) { + CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(CSLabel); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes); + Moves.push_back(MachineMove(CSLabel, Dst, Src)); + } + + // And any callee-saved registers (it's fine to leave them to the end here, + // because the old values are still valid at this point. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + if (CSI.size()) { + if (!CSLabel) { + CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) + .addSym(CSLabel); + } + + for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + MachineLocation Dst(MachineLocation::VirtualFP, + MFI->getObjectOffset(I->getFrameIdx())); + MachineLocation Src(I->getReg()); + Moves.push_back(MachineMove(CSLabel, Dst, Src)); + } + } +} + +void +AArch64FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo<AArch64MachineFunctionInfo>(); + + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + DebugLoc DL = MBBI->getDebugLoc(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned RetOpcode = MBBI->getOpcode(); + + // Initial and residual are named for consitency with the prologue. Note that + // in the epilogue, the residual adjustment is executed first. + uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); + uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; + uint64_t ArgumentPopSize = 0; + if (RetOpcode == AArch64::TC_RETURNdi || + RetOpcode == AArch64::TC_RETURNxi) { + MachineOperand &JumpTarget = MBBI->getOperand(0); + MachineOperand &StackAdjust = MBBI->getOperand(1); + + MachineInstrBuilder MIB; + if (RetOpcode == AArch64::TC_RETURNdi) { + MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); + if (JumpTarget.isGlobal()) { + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else { + assert(JumpTarget.isSymbol() && "unexpected tail call destination"); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + } else { + assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() + && "Unexpected tail call"); + + MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); + MIB.addReg(JumpTarget.getReg(), RegState::Kill); + } + + // Add the extra operands onto the new tail call instruction even though + // they're not used directly (so that liveness is tracked properly etc). + for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) + MIB->addOperand(MBBI->getOperand(i)); + + + // Delete the pseudo instruction TC_RETURN. + MachineInstr *NewMI = prior(MBBI); + MBB.erase(MBBI); + MBBI = NewMI; + + // For a tail-call in a callee-pops-arguments environment, some or all of + // the stack may actually be in use for the call's arguments, this is + // calculated during LowerCall and consumed here... + ArgumentPopSize = StackAdjust.getImm(); + } else { + // ... otherwise the amount to pop is *all* of the argument space, + // conveniently stored in the MachineFunctionInfo by + // LowerFormalArguments. This will, of course, be zero for the C calling + // convention. + ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); + } + + assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 + && "refusing to adjust stack by misaligned amt"); + + // We may need to address callee-saved registers differently, so find out the + // bound on the frame indices. + const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The "residual" stack update comes first from this direction and guarantees + // that SP is NumInitialBytes below its value on function entry, either by a + // direct update or restoring it from the frame pointer. + if (NumInitialBytes + ArgumentPopSize != 0) { + emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, + NumInitialBytes + ArgumentPopSize); + --MBBI; + } + + + // MBBI now points to the instruction just past the last callee-saved + // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" + // otherwise). + + // Now we need to find out where to put the bulk of the stack adjustment + MachineBasicBlock::iterator FirstEpilogue = MBBI; + while (MBBI != MBB.begin()) { + --MBBI; + + unsigned FrameOp; + for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { + if (MBBI->getOperand(FrameOp).isFI()) + break; + } + + // If this instruction doesn't have a frame index we've reached the end of + // the callee-save restoration. + if (FrameOp == MBBI->getNumOperands()) + break; + + // Likewise if it *is* a local reference, but not to a callee-saved object. + int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); + if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) + break; + + FirstEpilogue = MBBI; + } + + if (MF.getFrameInfo()->hasVarSizedObjects()) { + int64_t StaticFrameBase; + StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); + emitRegUpdate(MBB, FirstEpilogue, DL, TII, + AArch64::XSP, AArch64::X29, AArch64::NoRegister, + StaticFrameBase); + } else { + emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); + } +} + +int64_t +AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, + int FrameIndex, + unsigned &FrameReg, + int SPAdj, + bool IsCalleeSaveOp) const { + AArch64MachineFunctionInfo *FuncInfo = + MF.getInfo<AArch64MachineFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex); + + assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0) + && "callee-saved register in unexpected place"); + + // If the frame for this function is particularly large, we adjust the stack + // in two phases which means the callee-save related operations see a + // different (intermediate) stack size. + int64_t FrameRegPos; + if (IsCalleeSaveOp) { + FrameReg = AArch64::XSP; + FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust()); + } else if (useFPForAddressing(MF)) { + // Have to use the frame pointer since we have no idea where SP is. + FrameReg = AArch64::X29; + FrameRegPos = FuncInfo->getFramePointerOffset(); + } else { + FrameReg = AArch64::XSP; + FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj; + } + + return TopOfFrameOffset - FrameRegPos; +} + +/// Estimate and return the size of the frame. +static unsigned estimateStackSize(MachineFunction &MF) { + // FIXME: Make generic? Really consider after upstreaming. This code is now + // shared between PEI, ARM *and* here. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -MFI->getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + Offset += MFI->getObjectSize(i); + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} + +void +AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + const AArch64RegisterInfo *RegInfo = + static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo()); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64InstrInfo &TII = + *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo()); + + if (hasFP(MF)) { + MF.getRegInfo().setPhysRegUsed(AArch64::X29); + MF.getRegInfo().setPhysRegUsed(AArch64::X30); + } + + // If addressing of local variables is going to be more complicated than + // shoving a base register and an offset into the instruction then we may well + // need to scavenge registers. We should either specifically add an + // callee-save register for this purpose or allocate an extra spill slot. + + bool BigStack = + (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + || MFI->hasVarSizedObjects() // Access will be from X29: messes things up + || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); + + if (!BigStack) + return; + + // We certainly need some slack space for the scavenger, preferably an extra + // register. + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + uint16_t ExtraReg = AArch64::NoRegister; + + for (unsigned i = 0; CSRegs[i]; ++i) { + if (AArch64::GPR64RegClass.contains(CSRegs[i]) && + !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) { + ExtraReg = CSRegs[i]; + break; + } + } + + if (ExtraReg != 0) { + MF.getRegInfo().setPhysRegUsed(ExtraReg); + } else { + // Create a stack slot for scavenging purposes. PrologEpilogInserter + // helpfully places it near either SP or FP for us to avoid + // infinitely-regression during scavenging. + const TargetRegisterClass *RC = &AArch64::GPR64RegClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } +} + +bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB, + unsigned Reg) const { + // If @llvm.returnaddress is called then it will refer to X30 by some means; + // the prologue store does not kill the register. + if (Reg == AArch64::X30) { + if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken() + && MBB.getParent()->getRegInfo().isLiveIn(Reg)) + return false; + } + + // In all other cases, physical registers are dead after they've been saved + // but live at the beginning of the prologue block. + MBB.addLiveIn(Reg); + return true; +} + +void +AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI, + LoadStoreMethod PossClasses[], + unsigned NumClasses) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // A certain amount of implicit contract is present here. The actual stack + // offsets haven't been allocated officially yet, so for strictly correct code + // we rely on the fact that the elements of CSI are allocated in order + // starting at SP, purely as dictated by size and alignment. In practice since + // this function handles the only accesses to those slots it's not quite so + // important. + // + // We have also ordered the Callee-saved register list in AArch64CallingConv + // so that the above scheme puts registers in order: in particular we want + // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2) + for (unsigned i = 0, e = CSI.size(); i < e; ++i) { + unsigned Reg = CSI[i].getReg(); + + // First we need to find out which register class the register belongs to so + // that we can use the correct load/store instrucitons. + unsigned ClassIdx; + for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) { + if (PossClasses[ClassIdx].RegClass->contains(Reg)) + break; + } + assert(ClassIdx != NumClasses + && "Asked to store register in unexpected class"); + const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass; + + // Now we need to decide whether it's possible to emit a paired instruction: + // for this we want the next register to be in the same class. + MachineInstrBuilder NewMI; + bool Pair = false; + if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) { + Pair = true; + unsigned StLow = 0, StHigh = 0; + if (isPrologue) { + // Most of these registers will be live-in to the MBB and killed by our + // store, though there are exceptions (see determinePrologueDeath). + StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg())); + StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); + } else { + StLow = RegState::Define; + StHigh = RegState::Define; + } + + NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode)) + .addReg(CSI[i+1].getReg(), StLow) + .addReg(CSI[i].getReg(), StHigh); + + // If it's a paired op, we've consumed two registers + ++i; + } else { + unsigned State; + if (isPrologue) { + State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); + } else { + State = RegState::Define; + } + + NewMI = BuildMI(MBB, MBBI, DL, + TII.get(PossClasses[ClassIdx].SingleOpcode)) + .addReg(CSI[i].getReg(), State); + } + + // Note that the FrameIdx refers to the second register in a pair: it will + // be allocated the smaller numeric address and so is the one an LDP/STP + // address must use. + int FrameIdx = CSI[i].getFrameIdx(); + MachineMemOperand::MemOperandFlags Flags; + Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + Flags, + Pair ? TheClass.getSize() * 2 : TheClass.getSize(), + MFI.getObjectAlignment(FrameIdx)); + + NewMI.addFrameIndex(FrameIdx) + .addImm(0) // address-register offset + .addMemOperand(MMO); + + if (isPrologue) + NewMI.setMIFlags(MachineInstr::FrameSetup); + + // For aesthetic reasons, during an epilogue we want to emit complementary + // operations to the prologue, but in the opposite order. So we still + // iterate through the CalleeSavedInfo list in order, but we put the + // instructions successively earlier in the MBB. + if (!isPrologue) + --MBBI; + } +} + +bool +AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + static LoadStoreMethod PossibleClasses[] = { + {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR}, + {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR}, + }; + unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + + emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI, + PossibleClasses, NumClasses); + + return true; +} + +bool +AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + if (CSI.empty()) + return false; + + static LoadStoreMethod PossibleClasses[] = { + {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR}, + {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR}, + }; + unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + + emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI, + PossibleClasses, NumClasses); + + return true; +} + +bool +AArch64FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); + + // This is a decision of ABI compliance. The AArch64 PCS gives various options + // for conformance, and even at the most stringent level more or less permits + // elimination for leaf functions because there's no loss of functionality + // (for debugging etc).. + if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls()) + return true; + + // The following are hard-limits: incorrect code will be generated if we try + // to omit the frame. + return (RI->needsStackRealignment(MF) || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken()); +} + +bool +AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const { + return MF.getFrameInfo()->hasVarSizedObjects(); +} + +bool +AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Of the various reasons for having a frame pointer, it's actually only + // variable-sized objects that prevent reservation of a call frame. + return !(hasFP(MF) && MFI->hasVarSizedObjects()); +} + +void +AArch64FrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + const AArch64InstrInfo &TII = + *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo()); + DebugLoc dl = MI->getDebugLoc(); + int Opcode = MI->getOpcode(); + bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0; + + if (!hasReservedCallFrame(MF)) { + unsigned Align = getStackAlignment(); + + int64_t Amount = MI->getOperand(0).getImm(); + Amount = RoundUpToAlignment(Amount, Align); + if (!IsDestroy) Amount = -Amount; + + // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it + // doesn't have to pop anything), then the first operand will be zero too so + // this adjustment is a no-op. + if (CalleePopAmount == 0) { + // FIXME: in-function stack adjustment for calls is limited to 12-bits + // because there's no guaranteed temporary register available. Mostly call + // frames will be allocated at the start of a function so this is OK, but + // it is a limitation that needs dealing with. + assert(Amount > -0xfff && Amount < 0xfff && "call frame too large"); + emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount); + } + } else if (CalleePopAmount != 0) { + // If the calling convention demands that the callee pops arguments from the + // stack, we want to add it back if we have a reserved call frame. + assert(CalleePopAmount < 0xfff && "call frame too large"); + emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount); + } + + MBB.erase(MI); +} diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h new file mode 100644 index 0000000..45ea0ec --- /dev/null +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -0,0 +1,108 @@ +//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the AArch64-specific parts of the TargetFrameLowering +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_FRAMEINFO_H +#define LLVM_AARCH64_FRAMEINFO_H + +#include "AArch64Subtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { +class AArch64Subtarget; + +class AArch64FrameLowering : public TargetFrameLowering { +private: + // In order to unify the spilling and restoring of callee-saved registers into + // emitFrameMemOps, we need to be able to specify which instructions to use + // for the relevant memory operations on each register class. An array of the + // following struct is populated and passed in to achieve this. + struct LoadStoreMethod { + const TargetRegisterClass *RegClass; // E.g. GPR64RegClass + + // The preferred instruction. + unsigned PairOpcode; // E.g. LSPair64_STR + + // Sometimes only a single register can be handled at once. + unsigned SingleOpcode; // E.g. LS64_STR + }; +protected: + const AArch64Subtarget &STI; + +public: + explicit AArch64FrameLowering(const AArch64Subtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16), + STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + /// Decides how much stack adjustment to perform in each phase of the prologue + /// and epilogue. + void splitSPAdjustments(uint64_t Total, uint64_t &Initial, + uint64_t &Residual) const; + + int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex, + unsigned &FrameReg, int SPAdj, + bool IsCalleeSaveOp) const; + + virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; + + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + /// If the register is X30 (i.e. LR) and the return address is used in the + /// function then the callee-save store doesn't actually kill the register, + /// otherwise it does. + bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const; + + /// This function emits the loads or stores required during prologue and + /// epilogue as efficiently as possible. + /// + /// The operations involved in setting up and tearing down the frame are + /// similar enough to warrant a shared function, particularly as discrepancies + /// between the two would be disastrous. + void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI, + LoadStoreMethod PossibleClasses[], + unsigned NumClasses) const; + + + virtual bool hasFP(const MachineFunction &MF) const; + + virtual bool useFPForAddressing(const MachineFunction &MF) const; + + /// On AA + virtual bool hasReservedCallFrame(const MachineFunction &MF) const; + +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp new file mode 100644 index 0000000..46b8221 --- /dev/null +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -0,0 +1,415 @@ +//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the AArch64 target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-isel" +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "AArch64TargetMachine.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===--------------------------------------------------------------------===// +/// AArch64 specific code to select AArch64 machine instructions for +/// SelectionDAG operations. +/// +namespace { + +class AArch64DAGToDAGISel : public SelectionDAGISel { + AArch64TargetMachine &TM; + const AArch64InstrInfo *TII; + + /// Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when generating code for different targets. + const AArch64Subtarget *Subtarget; + +public: + explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), TM(tm), + TII(static_cast<const AArch64InstrInfo*>(TM.getInstrInfo())), + Subtarget(&TM.getSubtarget<AArch64Subtarget>()) { + } + + virtual const char *getPassName() const { + return "AArch64 Instruction Selection"; + } + + // Include the pieces autogenerated from the target description. +#include "AArch64GenDAGISel.inc" + + template<unsigned MemSize> + bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN || CN->getZExtValue() % MemSize != 0 + || CN->getZExtValue() / MemSize > 0xfff) + return false; + + UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64); + return true; + } + + template<unsigned RegWidth> + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { + return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); + } + + bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); + + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth); + + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); + + bool SelectLogicalImm(SDValue N, SDValue &Imm); + + template<unsigned RegWidth> + bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) { + return SelectTSTBOperand(N, FixedPos, RegWidth); + } + + bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); + + SDNode *TrySelectToMoveImm(SDNode *N); + SDNode *LowerToFPLitPool(SDNode *Node); + SDNode *SelectToLitPool(SDNode *N); + + SDNode* Select(SDNode*); +private: +}; +} + +bool +AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth) { + const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); + if (!CN) return false; + + // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits + // is between 1 and 32 for a destination w-register, or 1 and 64 for an + // x-register. + // + // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we + // want THIS_NODE to be 2^fbits. This is much easier to deal with using + // integers. + bool IsExact; + + // fbits is between 1 and 64 in the worst-case, which means the fmul + // could have 2^64 as an actual operand. Need 65 bits of precision. + APSInt IntVal(65, true); + CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); + + // N.b. isPowerOf2 also checks for > 0. + if (!IsExact || !IntVal.isPowerOf2()) return false; + unsigned FBits = IntVal.logBase2(); + + // Checks above should have guaranteed that we haven't lost information in + // finding FBits, but it must still be in range. + if (FBits == 0 || FBits > RegWidth) return false; + + FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32); + return true; +} + +bool +AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps) { + switch (ConstraintCode) { + default: llvm_unreachable("Unrecognised AArch64 memory constraint"); + case 'm': + // FIXME: more freedom is actually permitted for 'm'. We can go + // hunting for a base and an offset if we want. Of course, since + // we don't really know how the operand is going to be used we're + // probably restricted to the load/store pair's simm7 as an offset + // range anyway. + case 'Q': + OutOps.push_back(Op); + } + + return false; +} + +bool +AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { + ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N); + if (!Imm || !Imm->getValueAPF().isPosZero()) + return false; + + // Doesn't actually carry any information, but keeps TableGen quiet. + Dummy = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { + uint32_t Bits; + uint32_t RegWidth = N.getValueType().getSizeInBits(); + + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) return false; + + if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) + return false; + + Imm = CurDAG->getTargetConstant(Bits, MVT::i32); + return true; +} + +SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { + SDNode *ResNode; + DebugLoc dl = Node->getDebugLoc(); + EVT DestType = Node->getValueType(0); + unsigned DestWidth = DestType.getSizeInBits(); + + unsigned MOVOpcode; + EVT MOVType; + int UImm16, Shift; + uint32_t LogicalBits; + + uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue(); + if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) { + MOVType = DestType; + MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii; + } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) { + MOVType = DestType; + MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii; + } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) { + // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can + // use a 32-bit instruction: "movn w0, 0xedbc". + MOVType = MVT::i32; + MOVOpcode = AArch64::MOVNwii; + } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) { + MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi; + uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR; + + return CurDAG->getMachineNode(MOVOpcode, dl, DestType, + CurDAG->getRegister(ZR, DestType), + CurDAG->getTargetConstant(LogicalBits, MVT::i32)); + } else { + // Can't handle it in one instruction. There's scope for permitting two (or + // more) instructions, but that'll need more thought. + return NULL; + } + + ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType, + CurDAG->getTargetConstant(UImm16, MVT::i32), + CurDAG->getTargetConstant(Shift, MVT::i32)); + + if (MOVType != DestType) { + ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, + MVT::i64, MVT::i32, MVT::Other, + CurDAG->getTargetConstant(0, MVT::i64), + SDValue(ResNode, 0), + CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); + } + + return ResNode; +} + +SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { + DebugLoc DL = Node->getDebugLoc(); + uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue(); + int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue(); + EVT DestType = Node->getValueType(0); + EVT PtrVT = TLI.getPointerTy(); + + // Since we may end up loading a 64-bit constant from a 32-bit entry the + // constant in the pool may have a different type to the eventual node. + ISD::LoadExtType Extension; + EVT MemType; + + assert((DestType == MVT::i64 || DestType == MVT::i32) + && "Only expect integer constants at the moment"); + + if (DestType == MVT::i32) { + Extension = ISD::NON_EXTLOAD; + MemType = MVT::i32; + } else if (UnsignedVal <= UINT32_MAX) { + Extension = ISD::ZEXTLOAD; + MemType = MVT::i32; + } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) { + Extension = ISD::SEXTLOAD; + MemType = MVT::i32; + } else { + Extension = ISD::NON_EXTLOAD; + MemType = MVT::i64; + } + + Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(), + MemType.getSizeInBits()), + UnsignedVal); + SDValue PoolAddr; + unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType()); + PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, + AArch64II::MO_LO12), + CurDAG->getConstant(Alignment, MVT::i32)); + + return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), + PoolAddr, + MachinePointerInfo::getConstantPool(), MemType, + /* isVolatile = */ false, + /* isNonTemporal = */ false, + Alignment).getNode(); +} + +SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { + DebugLoc DL = Node->getDebugLoc(); + const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue(); + EVT PtrVT = TLI.getPointerTy(); + EVT DestType = Node->getValueType(0); + + unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType()); + SDValue PoolAddr; + + assert(TM.getCodeModel() == CodeModel::Small && + "Only small code model supported"); + PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0, + AArch64II::MO_LO12), + CurDAG->getConstant(Alignment, MVT::i32)); + + return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, + MachinePointerInfo::getConstantPool(), + /* isVolatile = */ false, + /* isNonTemporal = */ false, + /* isInvariant = */ true, + Alignment).getNode(); +} + +bool +AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth) { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) return false; + + uint64_t Val = CN->getZExtValue(); + + if (!isPowerOf2_64(Val)) return false; + + unsigned TestedBit = Log2_64(Val); + // Checks above should have guaranteed that we haven't lost information in + // finding TestedBit, but it must still be in range. + if (TestedBit >= RegWidth) return false; + + FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64); + return true; +} + +SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { + // Dump information about the Node being selected + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); + + if (Node->isMachineOpcode()) { + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); + return NULL; + } + + switch (Node->getOpcode()) { + case ISD::FrameIndex: { + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + EVT PtrTy = TLI.getPointerTy(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy); + return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, + TFI, CurDAG->getTargetConstant(0, PtrTy)); + } + case ISD::ConstantPool: { + // Constant pools are fine, just create a Target entry. + ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node); + const Constant *C = CN->getConstVal(); + SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0)); + + ReplaceUses(SDValue(Node, 0), CP); + return NULL; + } + case ISD::Constant: { + SDNode *ResNode = 0; + if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) { + // XZR and WZR are probably even better than an actual move: most of the + // time they can be folded into another instruction with *no* cost. + + EVT Ty = Node->getValueType(0); + assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type"); + uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR; + ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + Node->getDebugLoc(), + Register, Ty).getNode(); + } + + // Next best option is a move-immediate, see if we can do that. + if (!ResNode) { + ResNode = TrySelectToMoveImm(Node); + } + + if (ResNode) + return ResNode; + + // If even that fails we fall back to a lit-pool entry at the moment. Future + // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions. + ResNode = SelectToLitPool(Node); + assert(ResNode && "We need *some* way to materialise a constant"); + + // We want to continue selection at this point since the litpool access + // generated used generic nodes for simplicity. + ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + Node = ResNode; + break; + } + case ISD::ConstantFP: { + if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) { + // FMOV will take care of it from TableGen + break; + } + + SDNode *ResNode = LowerToFPLitPool(Node); + ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + + // We want to continue selection at this point since the litpool access + // generated used generic nodes for simplicity. + Node = ResNode; + break; + } + default: + break; // Let generic code handle it + } + + SDNode *ResNode = SelectCode(Node); + + DEBUG(dbgs() << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << "\n"); + + return ResNode; +} + +/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for +/// instruction scheduling. +FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new AArch64DAGToDAGISel(TM, OptLevel); +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp new file mode 100644 index 0000000..cea7f91 --- /dev/null +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -0,0 +1,2976 @@ +//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that AArch64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64-isel" +#include "AArch64.h" +#include "AArch64ISelLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "AArch64TargetObjectFile.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/CallingConv.h" + +using namespace llvm; + +static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) { + const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>(); + + if (Subtarget->isTargetLinux()) + return new AArch64LinuxTargetObjectFile(); + if (Subtarget->isTargetELF()) + return new TargetLoweringObjectFileELF(); + llvm_unreachable("unknown subtarget type"); +} + + +AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) + : TargetLowering(TM, createTLOF(TM)), + Subtarget(&TM.getSubtarget<AArch64Subtarget>()), + RegInfo(TM.getRegisterInfo()), + Itins(TM.getInstrItineraryData()) { + + // SIMD compares set the entire lane's bits to 1 + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + // Scalar register <-> type mapping + addRegisterClass(MVT::i32, &AArch64::GPR32RegClass); + addRegisterClass(MVT::i64, &AArch64::GPR64RegClass); + addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); + + computeRegisterProperties(); + + // Some atomic operations can be folded into load-acquire or store-release + // instructions on AArch64. It's marginally simpler to let LLVM expand + // everything out to a barrier and then recombine the (few) barriers we can. + setInsertFencesForAtomic(true); + setTargetDAGCombine(ISD::ATOMIC_FENCE); + setTargetDAGCombine(ISD::ATOMIC_STORE); + + // We combine OR nodes for bitfield and NEON BSL operations. + setTargetDAGCombine(ISD::OR); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::SRA); + + // AArch64 does not have i1 loads, or much of anything for i1 really. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + + setStackPointerRegisterToSaveRestore(AArch64::XSP); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + + // We'll lower globals to wrappers for selection. + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + + // A64 instructions have the comparison predicate attached to the user of the + // result, but having a separate comparison is valuable for matching. + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::i64, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::f64, Custom); + + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::i64, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + + setOperationAction(ISD::SETCC, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::i64, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + + // Legal floating-point operations. + setOperationAction(ISD::FABS, MVT::f32, Legal); + setOperationAction(ISD::FABS, MVT::f64, Legal); + + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + + setOperationAction(ISD::FNEG, MVT::f32, Legal); + setOperationAction(ISD::FNEG, MVT::f64, Legal); + + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + + setOperationAction(ISD::FSQRT, MVT::f32, Legal); + setOperationAction(ISD::FSQRT, MVT::f64, Legal); + + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f128, Legal); + + // Illegal floating-point operations. + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + + setOperationAction(ISD::FEXP, MVT::f32, Expand); + setOperationAction(ISD::FEXP, MVT::f64, Expand); + + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + + setOperationAction(ISD::FLOG, MVT::f32, Expand); + setOperationAction(ISD::FLOG, MVT::f64, Expand); + + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + + setOperationAction(ISD::FPOWI, MVT::f32, Expand); + setOperationAction(ISD::FPOWI, MVT::f64, Expand); + + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + + + // Virtually no operation on f128 is legal, but LLVM can't expand them when + // there's a valid register class, so we need custom operations in most cases. + setOperationAction(ISD::FABS, MVT::f128, Expand); + setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + setOperationAction(ISD::FCOS, MVT::f128, Expand); + setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FMA, MVT::f128, Expand); + setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FNEG, MVT::f128, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f128, Expand); + setOperationAction(ISD::FPOW, MVT::f128, Expand); + setOperationAction(ISD::FREM, MVT::f128, Expand); + setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FSQRT, MVT::f128, Expand); + setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + setOperationAction(ISD::SETCC, MVT::f128, Custom); + setOperationAction(ISD::BR_CC, MVT::f128, Custom); + setOperationAction(ISD::SELECT, MVT::f128, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + + // Lowering for many of the conversions is actually specified by the non-f128 + // type. The LowerXXX function will be trivial when f128 isn't involved. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); + + // This prevents LLVM trying to compress double constants into a floating + // constant-pool entry and trying to load from there. It's of doubtful benefit + // for A64: we'd need LDR followed by FCVT, I believe. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + + setExceptionPointerRegister(AArch64::X0); + setExceptionSelectorRegister(AArch64::X1); +} + +EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const { + // It's reasonably important that this value matches the "natural" legal + // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself + // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64). + if (!VT.isVector()) return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc, + unsigned &strOpc) { + switch (Size) { + default: llvm_unreachable("unsupported size for atomic binary op!"); + case 1: + ldrOpc = AArch64::LDXR_byte; + strOpc = AArch64::STXR_byte; + break; + case 2: + ldrOpc = AArch64::LDXR_hword; + strOpc = AArch64::STXR_hword; + break; + case 4: + ldrOpc = AArch64::LDXR_word; + strOpc = AArch64::STXR_word; + break; + case 8: + ldrOpc = AArch64::LDXR_dword; + strOpc = AArch64::STXR_dword; + break; + } +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, + unsigned BinOpcode) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + const TargetRegisterClass *TRC + = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldxr dest, ptr + // <binop> scratch, dest, incr + // stxr stxr_status, scratch, ptr + // cmp stxr_status, #0 + // b.ne loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (BinOpcode) { + // All arithmetic operations we'll be creating are designed to take an extra + // shift or extend operand, which we can conveniently set to zero. + + // Operand order needs to go the other way for NAND. + if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl) + BuildMI(BB, dl, TII->get(BinOpcode), scratch) + .addReg(incr).addReg(dest).addImm(0); + else + BuildMI(BB, dl, TII->get(BinOpcode), scratch) + .addReg(dest).addReg(incr).addImm(0); + } + + // From the stxr, the register is GPR32; from the cmp it's GPR32wsp + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) + .addReg(stxr_status).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(loopMBB); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + unsigned CmpOp, + A64CC::CondCodes Cond) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + unsigned oldval = dest; + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const TargetRegisterClass *TRC, *TRCsp; + if (Size == 8) { + TRC = &AArch64::GPR64RegClass; + TRCsp = &AArch64::GPR64xspRegClass; + } else { + TRC = &AArch64::GPR32RegClass; + TRCsp = &AArch64::GPR32wspRegClass; + } + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + unsigned scratch = MRI.createVirtualRegister(TRC); + MRI.constrainRegClass(scratch, TRCsp); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldxr dest, ptr + // cmp incr, dest (, sign extend if necessary) + // csel scratch, dest, incr, cond + // stxr stxr_status, scratch, ptr + // cmp stxr_status, #0 + // b.ne loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + + // Build compare and cmov instructions. + MRI.constrainRegClass(incr, TRCsp); + BuildMI(BB, dl, TII->get(CmpOp)) + .addReg(incr).addReg(oldval).addImm(0); + + BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc), + scratch) + .addReg(oldval).addReg(incr).addImm(Cond); + + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status) + .addReg(scratch).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) + .addReg(stxr_status).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(loopMBB); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size) const { + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned oldval = MI->getOperand(2).getReg(); + unsigned newval = MI->getOperand(3).getReg(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const TargetRegisterClass *TRCsp; + TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; + + unsigned ldrOpc, strOpc; + getExclusiveOperation(Size, ldrOpc, strOpc); + + MachineFunction *MF = BB->getParent(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; // insert the new blocks after the current block + + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB->addSuccessor(loop1MBB); + + // loop1MBB: + // ldxr dest, [ptr] + // cmp dest, oldval + // b.ne exitMBB + BB = loop1MBB; + BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + + unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl; + MRI.constrainRegClass(dest, TRCsp); + BuildMI(BB, dl, TII->get(CmpOp)) + .addReg(dest).addReg(oldval).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(exitMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + + // loop2MBB: + // strex stxr_status, newval, [ptr] + // cmp stxr_status, #0 + // b.ne loop1MBB + BB = loop2MBB; + unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + + BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr); + BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) + .addReg(stxr_status).addImm(0); + BuildMI(BB, dl, TII->get(AArch64::Bcc)) + .addImm(A64CC::NE).addMBB(loop1MBB); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, + MachineBasicBlock *MBB) const { + // We materialise the F128CSEL pseudo-instruction using conditional branches + // and loads, giving an instruciton sequence like: + // str q0, [sp] + // b.ne IfTrue + // b Finish + // IfTrue: + // str q1, [sp] + // Finish: + // ldr q0, [sp] + // + // Using virtual registers would probably not be beneficial since COPY + // instructions are expensive for f128 (there's no actual instruction to + // implement them). + // + // An alternative would be to do an integer-CSEL on some address. E.g.: + // mov x0, sp + // add x1, sp, #16 + // str q0, [x0] + // str q1, [x1] + // csel x0, x0, x1, ne + // ldr q0, [x0] + // + // It's unclear which approach is actually optimal. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineFunction *MF = MBB->getParent(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction::iterator It = MBB; + ++It; + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned IfTrueReg = MI->getOperand(1).getReg(); + unsigned IfFalseReg = MI->getOperand(2).getReg(); + unsigned CondCode = MI->getOperand(3).getImm(); + bool NZCVKilled = MI->getOperand(4).isKill(); + + MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, TrueBB); + MF->insert(It, EndBB); + + // Transfer rest of current basic-block to EndBB + EndBB->splice(EndBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + EndBB->transferSuccessorsAndUpdatePHIs(MBB); + + // We need somewhere to store the f128 value needed. + int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16); + + // [... start of incoming MBB ...] + // str qIFFALSE, [sp] + // b.cc IfTrue + // b Done + BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR)) + .addReg(IfFalseReg) + .addFrameIndex(ScratchFI) + .addImm(0); + BuildMI(MBB, DL, TII->get(AArch64::Bcc)) + .addImm(CondCode) + .addMBB(TrueBB); + BuildMI(MBB, DL, TII->get(AArch64::Bimm)) + .addMBB(EndBB); + MBB->addSuccessor(TrueBB); + MBB->addSuccessor(EndBB); + + // IfTrue: + // str qIFTRUE, [sp] + BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR)) + .addReg(IfTrueReg) + .addFrameIndex(ScratchFI) + .addImm(0); + + // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the + // blocks. + TrueBB->addSuccessor(EndBB); + + // Done: + // ldr qDEST, [sp] + // [... rest of incoming MBB ...] + if (!NZCVKilled) + EndBB->addLiveIn(AArch64::NZCV); + MachineInstr *StartOfEnd = EndBB->begin(); + BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg) + .addFrameIndex(ScratchFI) + .addImm(0); + + MI->eraseFromParent(); + return EndBB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { + switch (MI->getOpcode()) { + default: llvm_unreachable("Unhandled instruction with custom inserter"); + case AArch64::F128CSEL: + return EmitF128CSEL(MI, MBB); + case AArch64::ATOMIC_LOAD_ADD_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl); + case AArch64::ATOMIC_LOAD_ADD_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl); + + case AArch64::ATOMIC_LOAD_SUB_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl); + case AArch64::ATOMIC_LOAD_SUB_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl); + + case AArch64::ATOMIC_LOAD_AND_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl); + case AArch64::ATOMIC_LOAD_AND_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl); + + case AArch64::ATOMIC_LOAD_OR_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl); + case AArch64::ATOMIC_LOAD_OR_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl); + + case AArch64::ATOMIC_LOAD_XOR_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl); + case AArch64::ATOMIC_LOAD_XOR_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl); + + case AArch64::ATOMIC_LOAD_NAND_I8: + return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I16: + return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I32: + return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl); + case AArch64::ATOMIC_LOAD_NAND_I64: + return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl); + + case AArch64::ATOMIC_LOAD_MIN_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT); + case AArch64::ATOMIC_LOAD_MIN_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT); + + case AArch64::ATOMIC_LOAD_MAX_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT); + case AArch64::ATOMIC_LOAD_MAX_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT); + + case AArch64::ATOMIC_LOAD_UMIN_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI); + case AArch64::ATOMIC_LOAD_UMIN_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI); + + case AArch64::ATOMIC_LOAD_UMAX_I8: + return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I16: + return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I32: + return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO); + case AArch64::ATOMIC_LOAD_UMAX_I64: + return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO); + + case AArch64::ATOMIC_SWAP_I8: + return emitAtomicBinary(MI, MBB, 1, 0); + case AArch64::ATOMIC_SWAP_I16: + return emitAtomicBinary(MI, MBB, 2, 0); + case AArch64::ATOMIC_SWAP_I32: + return emitAtomicBinary(MI, MBB, 4, 0); + case AArch64::ATOMIC_SWAP_I64: + return emitAtomicBinary(MI, MBB, 8, 0); + + case AArch64::ATOMIC_CMP_SWAP_I8: + return emitAtomicCmpSwap(MI, MBB, 1); + case AArch64::ATOMIC_CMP_SWAP_I16: + return emitAtomicCmpSwap(MI, MBB, 2); + case AArch64::ATOMIC_CMP_SWAP_I32: + return emitAtomicCmpSwap(MI, MBB, 4); + case AArch64::ATOMIC_CMP_SWAP_I64: + return emitAtomicCmpSwap(MI, MBB, 8); + } +} + + +const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC"; + case AArch64ISD::Call: return "AArch64ISD::Call"; + case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV"; + case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad"; + case AArch64ISD::BFI: return "AArch64ISD::BFI"; + case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; + case AArch64ISD::Ret: return "AArch64ISD::Ret"; + case AArch64ISD::SBFX: return "AArch64ISD::SBFX"; + case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC"; + case AArch64ISD::SETCC: return "AArch64ISD::SETCC"; + case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; + case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; + case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL"; + case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; + + default: return NULL; + } +} + +static const uint16_t AArch64FPRArgRegs[] = { + AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, + AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7 +}; +static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs); + +static const uint16_t AArch64ArgRegs[] = { + AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, + AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7 +}; +static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs); + +static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + // Mark all remaining general purpose registers as allocated. We don't + // backtrack: if (for example) an i128 gets put on the stack, no subsequent + // i64 will go in registers (C.11). + for (unsigned i = 0; i < NumArgRegs; ++i) + State.AllocateReg(AArch64ArgRegs[i]); + + return false; +} + +#include "AArch64GenCallingConv.inc" + +CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { + + switch(CC) { + default: llvm_unreachable("Unsupported calling convention"); + case CallingConv::Fast: + case CallingConv::C: + return CC_A64_APCS; + } +} + +void +AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc DL, SDValue &Chain) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + + SmallVector<SDValue, 8> MemOps; + + unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs, + NumArgRegs); + unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs, + NumFPRArgRegs); + + unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR); + int GPRIdx = 0; + if (GPRSaveSize != 0) { + GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); + + SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); + SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 8), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(8, getPointerTy())); + } + } + + unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); + int FPRIdx = 0; + if (FPRSaveSize != 0) { + FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); + + SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], + &AArch64::FPR128RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); + SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 16), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(16, getPointerTy())); + } + } + + int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true); + + FuncInfo->setVariadicStackIdx(StackIdx); + FuncInfo->setVariadicGPRIdx(GPRIdx); + FuncInfo->setVariadicGPRSize(GPRSaveSize); + FuncInfo->setVariadicFPRIdx(FPRIdx); + FuncInfo->setVariadicFPRSize(FPRSaveSize); + + if (!MemOps.empty()) { + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], + MemOps.size()); + } +} + + +SDValue +AArch64TargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); + + SmallVector<SDValue, 16> ArgValues; + + SDValue ArgValue; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + + if (Flags.isByVal()) { + // Byval is used for small structs and HFAs in the PCS, but the system + // should work in a non-compliant manner for larger structs. + EVT PtrTy = getPointerTy(); + int Size = Flags.getByValSize(); + unsigned NumRegs = (Size + 7) / 8; + + unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs, + VA.getLocMemOffset(), + false); + SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); + InVals.push_back(FrameIdxN); + + continue; + } else if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + const TargetRegisterClass *RC = getRegClassFor(RegVT); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + + ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + } else { // VA.isRegLoc() + assert(VA.isMemLoc()); + + int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); + + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); + + + } + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: { + unsigned DestSize = VA.getValVT().getSizeInBits(); + unsigned DestSubReg; + + switch (DestSize) { + case 8: DestSubReg = AArch64::sub_8; break; + case 16: DestSubReg = AArch64::sub_16; break; + case 32: DestSubReg = AArch64::sub_32; break; + case 64: DestSubReg = AArch64::sub_64; break; + default: llvm_unreachable("Unexpected argument promotion"); + } + + ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + VA.getValVT(), ArgValue, + DAG.getTargetConstant(DestSubReg, MVT::i32)), + 0); + break; + } + } + + InVals.push_back(ArgValue); + } + + if (isVarArg) + SaveVarArgRegisters(CCInfo, DAG, dl, Chain); + + unsigned StackArgSize = CCInfo.getNextStackOffset(); + if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { + // This is a non-standard ABI so by fiat I say we're allowed to make full + // use of the stack area to be popped, which must be aligned to 16 bytes in + // any case: + StackArgSize = RoundUpToAlignment(StackArgSize, 16); + + // If we're expected to restore the stack (e.g. fastcc) then we'll be adding + // a multiple of 16. + FuncInfo->setArgumentStackToRestore(StackArgSize); + + // This realignment carries over to the available bytes below. Our own + // callers will guarantee the space is free by giving an aligned value to + // CALLSEQ_START. + } + // Even if we're not expected to free up the space, it's useful to know how + // much is there while considering tail calls (because we can reuse it). + FuncInfo->setBytesInStackArgArea(StackArgSize); + + return Chain; +} + +SDValue +AArch64TargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + // CCValAssign - represent the assignment of the return value to a location. + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slots. + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + // Analyze outgoing return values. + CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv)); + + SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); + + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + // PCS: "If the type, T, of the result of a function is such that + // void func(T arg) would require that arg be passed as a value in a + // register (or set of registers) according to the rules in 5.4, then the + // result is returned in the same registers as would be used for such an + // argument. + // + // Otherwise, the caller shall reserve a block of memory of sufficient + // size and alignment to hold the result. The address of the memory block + // shall be passed as an additional argument to the function in x8." + // + // This is implemented in two places. The register-return values are dealt + // with here, more complex returns are passed as an sret parameter, which + // means we don't have to worry about it during actual return. + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Only register-returns should be created by PCS"); + + + SDValue Arg = OutVals[i]; + + // There's no convenient note in the ABI about this as there is for normal + // arguments, but it says return values are passed in the same registers as + // an argument would be. I believe that includes the comments about + // unspecified higher bits, putting the burden of widening on the *caller* + // for return values. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: + // Floating-point values should only be extended when they're going into + // memory, which can't happen here so an integer extend is acceptable. + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); + break; + } + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, + &RetOps[0], RetOps.size()); +} + +SDValue +AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; + SmallVector<SDValue, 32> &OutVals = CLI.OutVals; + SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet(); + bool IsSibCall = false; + + if (IsTailCall) { + IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), + Outs, OutVals, Ins, DAG); + + // A sibling call is one where we're under the usual C ABI and not planning + // to change that but can still do a tail call: + if (!TailCallOpt && IsTailCall) + IsSibCall = true; + } + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); + + // On AArch64 (and all other architectures I'm aware of) the most this has to + // do is adjust the stack pointer. + unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16); + if (IsSibCall) { + // Since we're not changing the ABI to make this a tail call, the memory + // operands are already available in the caller's incoming argument space. + NumBytes = 0; + } + + // FPDiff is the byte offset of the call's argument area from the callee's. + // Stores to callee stack arguments will be placed in FixedStackSlots offset + // by this amount for a tail call. In a sibling call it must be 0 because the + // caller will deallocate the entire stack and the callee still expects its + // arguments to begin at SP+0. Completely unused for non-tail calls. + int FPDiff = 0; + + if (IsTailCall && !IsSibCall) { + unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); + + // FPDiff will be negative if this tail call requires more space than we + // would automatically have in our incoming argument space. Positive if we + // can actually shrink the stack. + FPDiff = NumReusableBytes - NumBytes; + + // The stack pointer must be 16-byte aligned at all times it's used for a + // memory operation, which in practice means at *all* times and in + // particular across call boundaries. Therefore our own arguments started at + // a 16-byte aligned SP and the delta applied for the tail call should + // satisfy the same constraint. + assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); + } + + if (!IsSibCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, + getPointerTy()); + + SmallVector<SDValue, 8> MemOpChains; + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + SDValue Arg = OutVals[i]; + + // Callee does the actual widening, so all extensions just use an implicit + // definition of the rest of the Loc. Aesthetically, this would be nicer as + // an ANY_EXTEND, but that isn't valid for floating-point types and this + // alternative works on integer types too. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + case CCValAssign::ZExt: + case CCValAssign::AExt: { + unsigned SrcSize = VA.getValVT().getSizeInBits(); + unsigned SrcSubReg; + + switch (SrcSize) { + case 8: SrcSubReg = AArch64::sub_8; break; + case 16: SrcSubReg = AArch64::sub_16; break; + case 32: SrcSubReg = AArch64::sub_32; break; + case 64: SrcSubReg = AArch64::sub_64; break; + default: llvm_unreachable("Unexpected argument promotion"); + } + + Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, + VA.getLocVT(), + DAG.getUNDEF(VA.getLocVT()), + Arg, + DAG.getTargetConstant(SrcSubReg, MVT::i32)), + 0); + + break; + } + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + // A normal register (sub-) argument. For now we just note it down because + // we want to copy things into registers as late as possible to avoid + // register-pressure (and possibly worse). + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; + } + + assert(VA.isMemLoc() && "unexpected argument location"); + + SDValue DstAddr; + MachinePointerInfo DstInfo; + if (IsTailCall) { + uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() : + VA.getLocVT().getSizeInBits(); + OpSize = (OpSize + 7) / 8; + int32_t Offset = VA.getLocMemOffset() + FPDiff; + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); + + DstAddr = DAG.getFrameIndex(FI, getPointerTy()); + DstInfo = MachinePointerInfo::getFixedStack(FI); + + // Make sure any stack arguments overlapping with where we're storing are + // loaded before this eventual operation. Otherwise they'll be clobbered. + Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); + } else { + SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()); + + DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset()); + } + + if (Flags.isByVal()) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64); + SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode, + Flags.getByValAlign(), + /*isVolatile = */ false, + /*alwaysInline = */ false, + DstInfo, MachinePointerInfo(0)); + MemOpChains.push_back(Cpy); + } else { + // Normal stack argument, put it where it's needed. + SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo, + false, false, 0); + MemOpChains.push_back(Store); + } + } + + // The loads and stores generated above shouldn't clash with each + // other. Combining them with this TokenFactor notes that fact for the rest of + // the backend. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Most of the rest of the instructions need to be glued together; we don't + // want assignments to actual registers used by a call to be rearranged by a + // well-meaning scheduler. + SDValue InFlag; + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // The linker is responsible for inserting veneers when necessary to put a + // function call destination in range, so we don't need to bother with a + // wrapper here. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + const char *Sym = S->getSymbol(); + Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + } + + // We don't usually want to end the call-sequence here because we would tidy + // the frame up *after* the call, however in the ABI-changing tail-call case + // we've carefully laid out the parameters so that when sp is reset they'll be + // in the correct location. + if (IsTailCall && !IsSibCall) { + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + } + + // We produce the following DAG scheme for the actual call instruction: + // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag? + // + // Most arguments aren't going to be used and just keep the values live as + // far as LLVM is concerned. It's expected to be selected as simply "bl + // callee" (for a direct, non-tail call). + std::vector<SDValue> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + if (IsTailCall) { + // Each tail call may have to adjust the stack by a different amount, so + // this information must travel along with the operation for eventual + // consumption by emitEpilogue. + Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32)); + } + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + + // Add a register mask operand representing the call-preserved registers. This + // is used later in codegen to constrain register-allocation. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // If we needed glue, put it in as the last argument. + if (InFlag.getNode()) + Ops.push_back(InFlag); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + if (IsTailCall) { + return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + } + + Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Now we can reclaim the stack, just as well do it before working out where + // our return value is. + if (!IsSibCall) { + uint64_t CalleePopBytes + = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0; + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(CalleePopBytes, true), + InFlag); + InFlag = Chain.getValue(1); + } + + return LowerCallResult(Chain, InFlag, CallConv, + IsVarArg, Ins, dl, DAG, InVals); +} + +SDValue +AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv)); + + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + // Return values that are too big to fit into registers should use an sret + // pointer, so this can be a lot simpler than the main argument code. + assert(VA.isRegLoc() && "Memory locations not expected for call return"); + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); + break; + case CCValAssign::ZExt: + case CCValAssign::SExt: + case CCValAssign::AExt: + // Floating-point arguments only get extended/truncated if they're going + // in memory, so using the integer operation is acceptable here. + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + break; + } + + InVals.push_back(Val); + } + + return Chain; +} + +bool +AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool IsVarArg, + bool IsCalleeStructRet, + bool IsCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + + // For CallingConv::C this function knows whether the ABI needs + // changing. That's not true for other conventions so they will have to opt in + // manually. + if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) + return false; + + const MachineFunction &MF = DAG.getMachineFunction(); + const Function *CallerF = MF.getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. Working around this *is* possible (see + // X86) but less efficient and uglier in LowerCall. + for (Function::const_arg_iterator i = CallerF->arg_begin(), + e = CallerF->arg_end(); i != e; ++i) + if (i->hasByValAttr()) + return false; + + if (getTargetMachine().Options.GuaranteedTailCallOpt) { + if (IsTailCallConvention(CalleeCC) && CCMatch) + return true; + return false; + } + + // Now we search for cases where we can use a tail call without changing the + // ABI. Sibcall is used in some places (particularly gcc) to refer to this + // concept. + + // I want anyone implementing a new calling convention to think long and hard + // about this assert. + assert((!IsVarArg || CalleeCC == CallingConv::C) + && "Unexpected variadic calling convention"); + + if (IsVarArg && !Outs.empty()) { + // At least two cases here: if caller is fastcc then we can't have any + // memory arguments (we'd be expected to clean up the stack afterwards). If + // caller is C then we could potentially use its argument area. + + // FIXME: for now we take the most conservative of these in both cases: + // disallow all variadic memory operands. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) + if (!ArgLocs[i].isRegLoc()) + return false; + } + + // If the calling conventions do not match, then we'd better make sure the + // results are returned in the same way as what the caller expects. + if (!CCMatch) { + SmallVector<CCValAssign, 16> RVLocs1; + CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), + getTargetMachine(), RVLocs1, *DAG.getContext()); + CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC)); + + SmallVector<CCValAssign, 16> RVLocs2; + CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), + getTargetMachine(), RVLocs2, *DAG.getContext()); + CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC)); + + if (RVLocs1.size() != RVLocs2.size()) + return false; + for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { + if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) + return false; + if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) + return false; + if (RVLocs1[i].isRegLoc()) { + if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) + return false; + } else { + if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) + return false; + } + } + } + + // Nothing more to check if the callee is taking no arguments + if (Outs.empty()) + return true; + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + + const AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + + // If the stack arguments for this call would fit into our own save area then + // the call can be made tail. + return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea(); +} + +bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, + bool TailCallOpt) const { + return CallCC == CallingConv::Fast && TailCallOpt; +} + +bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const { + return CallCC == CallingConv::Fast; +} + +SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, + SelectionDAG &DAG, + MachineFrameInfo *MFI, + int ClobberedFI) const { + SmallVector<SDValue, 8> ArgChains; + int64_t FirstByte = MFI->getObjectOffset(ClobberedFI); + int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1; + + // Include the original chain at the beginning of the list. When this is + // used by target LowerCall hooks, this helps legalize find the + // CALLSEQ_BEGIN node. + ArgChains.push_back(Chain); + + // Add a chain value for each stack argument corresponding + for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), + UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U) + if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) + if (FI->getIndex() < 0) { + int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex()); + int64_t InLastByte = InFirstByte; + InLastByte += MFI->getObjectSize(FI->getIndex()) - 1; + + if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || + (FirstByte <= InFirstByte && InFirstByte <= LastByte)) + ArgChains.push_back(SDValue(L, 1)); + } + + // Build a tokenfactor for all the chains. + return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + &ArgChains[0], ArgChains.size()); +} + +static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) { + switch (CC) { + case ISD::SETEQ: return A64CC::EQ; + case ISD::SETGT: return A64CC::GT; + case ISD::SETGE: return A64CC::GE; + case ISD::SETLT: return A64CC::LT; + case ISD::SETLE: return A64CC::LE; + case ISD::SETNE: return A64CC::NE; + case ISD::SETUGT: return A64CC::HI; + case ISD::SETUGE: return A64CC::HS; + case ISD::SETULT: return A64CC::LO; + case ISD::SETULE: return A64CC::LS; + default: llvm_unreachable("Unexpected condition code"); + } +} + +bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const { + // icmp is implemented using adds/subs immediate, which take an unsigned + // 12-bit immediate, optionally shifted left by 12 bits. + + // Symmetric by using adds/subs + if (Val < 0) + Val = -Val; + + return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0; +} + +SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDValue &A64cc, + SelectionDAG &DAG, DebugLoc &dl) const { + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { + int64_t C = 0; + EVT VT = RHSC->getValueType(0); + bool knownInvalid = false; + + // I'm not convinced the rest of LLVM handles these edge cases properly, but + // we can at least get it right. + if (isSignedIntSetCC(CC)) { + C = RHSC->getSExtValue(); + } else if (RHSC->getZExtValue() > INT64_MAX) { + // A 64-bit constant not representable by a signed 64-bit integer is far + // too big to fit into a SUBS immediate anyway. + knownInvalid = true; + } else { + C = RHSC->getZExtValue(); + } + + if (!knownInvalid && !isLegalICmpImmediate(C)) { + // Constant does not fit, try adjusting it by one? + switch (CC) { + default: break; + case ISD::SETLT: + case ISD::SETGE: + if (isLegalICmpImmediate(C-1)) { + CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; + RHS = DAG.getConstant(C-1, VT); + } + break; + case ISD::SETULT: + case ISD::SETUGE: + if (isLegalICmpImmediate(C-1)) { + CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; + RHS = DAG.getConstant(C-1, VT); + } + break; + case ISD::SETLE: + case ISD::SETGT: + if (isLegalICmpImmediate(C+1)) { + CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; + RHS = DAG.getConstant(C+1, VT); + } + break; + case ISD::SETULE: + case ISD::SETUGT: + if (isLegalICmpImmediate(C+1)) { + CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + RHS = DAG.getConstant(C+1, VT); + } + break; + } + } + } + + A64CC::CondCodes CondCode = IntCCToA64CC(CC); + A64cc = DAG.getConstant(CondCode, MVT::i32); + return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); +} + +static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC, + A64CC::CondCodes &Alternative) { + A64CC::CondCodes CondCode = A64CC::Invalid; + Alternative = A64CC::Invalid; + + switch (CC) { + default: llvm_unreachable("Unknown FP condition!"); + case ISD::SETEQ: + case ISD::SETOEQ: CondCode = A64CC::EQ; break; + case ISD::SETGT: + case ISD::SETOGT: CondCode = A64CC::GT; break; + case ISD::SETGE: + case ISD::SETOGE: CondCode = A64CC::GE; break; + case ISD::SETOLT: CondCode = A64CC::MI; break; + case ISD::SETOLE: CondCode = A64CC::LS; break; + case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break; + case ISD::SETO: CondCode = A64CC::VC; break; + case ISD::SETUO: CondCode = A64CC::VS; break; + case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break; + case ISD::SETUGT: CondCode = A64CC::HI; break; + case ISD::SETUGE: CondCode = A64CC::PL; break; + case ISD::SETLT: + case ISD::SETULT: CondCode = A64CC::LT; break; + case ISD::SETLE: + case ISD::SETULE: CondCode = A64CC::LE; break; + case ISD::SETNE: + case ISD::SETUNE: CondCode = A64CC::NE; break; + } + return CondCode; +} + +SDValue +AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT PtrVT = getPointerTy(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + + assert(getTargetMachine().getCodeModel() == CodeModel::Small + && "Only small code model supported at the moment"); + + // The most efficient code is PC-relative anyway for the small memory model, + // so we don't need to worry about relocation model. + return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_LO12), + DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); +} + + +// (BRCOND chain, val, dest) +SDValue +AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Chain = Op.getOperand(0); + SDValue TheBit = Op.getOperand(1); + SDValue DestBB = Op.getOperand(2); + + // AArch64 BooleanContents is the default UndefinedBooleanContent, which means + // that as the consumer we are responsible for ignoring rubbish in higher + // bits. + TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, + DAG.getConstant(1, MVT::i32)); + + SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, + DAG.getConstant(0, TheBit.getValueType()), + DAG.getCondCode(ISD::SETNE)); + + return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain, + A64CMP, DAG.getConstant(A64CC::NE, MVT::i32), + DestBB); +} + +// (BR_CC chain, condcode, lhs, rhs, dest) +SDValue +AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue DestBB = Op.getOperand(4); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons are lowered to runtime calls by a routine which sets + // LHS, RHS and CC appropriately for the rest of this function to continue. + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (RHS.getNode() == 0) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + Chain, CmpOp, A64cc, DestBB); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + Chain, SetCC, A64cc, DestBB); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, + A64BR_CC, SetCC, A64cc, DestBB); + + } + + return A64BR_CC; +} + +SDValue +AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const { + ArgListTy Args; + ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op.getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy()); + + Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext()); + + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + + // isTailCall may be true since the callee does not reference caller stack + // frame. Check if it's in the right position. + SDValue TCChain = InChain; + bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain); + if (isTailCall) + InChain = TCChain; + + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, false, false, false, false, + 0, getLibcallCallingConv(Call), isTailCall, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, Op->getDebugLoc()); + std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); + + if (!CallInfo.second.getNode()) + // It's a tailcall, return the chain (which is the DAG root). + return DAG.getRoot(); + + return CallInfo.first; +} + +SDValue +AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + if (Op.getOperand(0).getValueType() != MVT::f128) { + // It's legal except when f128 is involved + return Op; + } + + RTLIB::Libcall LC; + LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, Op.getDebugLoc()); +} + +SDValue +AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + +SDValue +AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + bool IsSigned) const { + if (Op.getOperand(0).getValueType() != MVT::f128) { + // It's legal except when f128 is involved + return Op; + } + + RTLIB::Libcall LC; + if (IsSigned) + LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); + else + LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + +SDValue +AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, + SelectionDAG &DAG) const { + // TableGen doesn't have easy access to the CodeModel or RelocationModel, so + // we make that distinction here. + + // We support the small memory model for now. + assert(getTargetMachine().getCodeModel() == CodeModel::Small); + + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); + const GlobalValue *GV = GN->getGlobal(); + unsigned Alignment = GV->getAlignment(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + + if (GV->isWeakForLinker() && RelocM == Reloc::Static) { + // Weak symbols can't use ADRP/ADD pair since they should evaluate to + // zero when undefined. In PIC mode the GOT can take care of this, but in + // absolute mode we use a constant pool load. + SDValue PoolAddr; + PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, + DAG.getTargetConstantPool(GV, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetConstantPool(GV, PtrVT, 0, 0, + AArch64II::MO_LO12), + DAG.getConstant(8, MVT::i32)); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr, + MachinePointerInfo::getConstantPool(), + /*isVolatile=*/ false, /*isNonTemporal=*/ true, + /*isInvariant=*/ true, 8); + } + + if (Alignment == 0) { + const PointerType *GVPtrTy = cast<PointerType>(GV->getType()); + if (GVPtrTy->getElementType()->isSized()) { + Alignment + = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType()); + } else { + // Be conservative if we can't guess, not that it really matters: + // functions and labels aren't valid for loads, and the methods used to + // actually calculate an address work with any alignment. + Alignment = 1; + } + } + + unsigned char HiFixup, LoFixup; + bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM); + + if (UseGOT) { + HiFixup = AArch64II::MO_GOT; + LoFixup = AArch64II::MO_GOT_LO12; + Alignment = 8; + } else { + HiFixup = AArch64II::MO_NO_FLAG; + LoFixup = AArch64II::MO_LO12; + } + + // AArch64's small model demands the following sequence: + // ADRP x0, somewhere + // ADD x0, x0, #:lo12:somewhere ; (or LDR directly). + SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + HiFixup), + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + LoFixup), + DAG.getConstant(Alignment, MVT::i32)); + + if (UseGOT) { + GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(), + GlobalRef); + } + + if (GN->getOffset() != 0) + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef, + DAG.getConstant(GN->getOffset(), PtrVT)); + + return GlobalRef; +} + +SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, + SDValue DescAddr, + DebugLoc DL, + SelectionDAG &DAG) const { + EVT PtrVT = getPointerTy(); + + // The function we need to call is simply the first entry in the GOT for this + // descriptor, load it in preparation. + SDValue Func, Chain; + Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), + DescAddr); + + // The function takes only one argument: the address of the descriptor itself + // in X0. + SDValue Glue; + Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); + Glue = Chain.getValue(1); + + // Finally, there's a special calling-convention which means that the lookup + // must preserve all registers (except X0, obviously). + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const AArch64RegisterInfo *A64RI + = static_cast<const AArch64RegisterInfo *>(TRI); + const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask(); + + // We're now ready to populate the argument list, as with a normal call: + std::vector<SDValue> Ops; + Ops.push_back(Chain); + Ops.push_back(Func); + Ops.push_back(SymAddr); + Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT)); + Ops.push_back(DAG.getRegisterMask(Mask)); + Ops.push_back(Glue); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0], + Ops.size()); + Glue = Chain.getValue(1); + + // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it + // back to the generic handling code. + return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); +} + +SDValue +AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetELF() && + "TLS not implemented for non-ELF targets"); + const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); + + TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + + SDValue TPOff; + EVT PtrVT = getPointerTy(); + DebugLoc DL = Op.getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + + SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); + + if (Model == TLSModel::InitialExec) { + TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_GOTTPREL), + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_GOTTPREL_LO12), + DAG.getConstant(8, MVT::i32)); + TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), + TPOff); + } else if (Model == TLSModel::LocalExec) { + SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_TPREL_G1); + SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_TPREL_G0_NC); + + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, + TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + } else if (Model == TLSModel::GeneralDynamic) { + // Accesses used in this sequence go via the TLS descriptor which lives in + // the GOT. Prepare an address we can use to handle this. + SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_TLSDESC); + SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + AArch64II::MO_TLSDESC_LO12); + SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + HiDesc, LoDesc, + DAG.getConstant(8, MVT::i32)); + SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0); + + TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); + } else if (Model == TLSModel::LocalDynamic) { + // Local-dynamic accesses proceed in two phases. A general-dynamic TLS + // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate + // the beginning of the module's TLS region, followed by a DTPREL offset + // calculation. + + // These accesses will need deduplicating if there's more than one. + AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction() + .getInfo<AArch64MachineFunctionInfo>(); + MFI->incNumLocalDynamicTLSAccesses(); + + + // Get the location of _TLS_MODULE_BASE_: + SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, + AArch64II::MO_TLSDESC); + SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, + AArch64II::MO_TLSDESC_LO12); + SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + HiDesc, LoDesc, + DAG.getConstant(8, MVT::i32)); + SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT); + + ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); + + // Get the variable's offset from _TLS_MODULE_BASE_ + SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_DTPREL_G1); + SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, + AArch64II::MO_DTPREL_G0_NC); + + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, + TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), 0); + } else + llvm_unreachable("Unsupported TLS access model"); + + + return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); +} + +SDValue +AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + bool IsSigned) const { + if (Op.getValueType() != MVT::f128) { + // Legal for everything except f128. + return Op; + } + + RTLIB::Libcall LC; + if (IsSigned) + LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + else + LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128ToCall(Op, DAG, LC); +} + + +SDValue +AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + DebugLoc dl = JT->getDebugLoc(); + + // When compiling PIC, jump tables get put in the code section so a static + // relocation-style is acceptable for both cases. + return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(), + DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()), + DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), + AArch64II::MO_LO12), + DAG.getConstant(1, MVT::i32)); +} + +// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) +SDValue +AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue IfTrue = Op.getOperand(2); + SDValue IfFalse = Op.getOperand(3); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons are lowered to libcalls, but slot in nicely here + // afterwards. + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (RHS.getNode() == 0) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + CmpOp, IfTrue, IfFalse, A64cc); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, + Op.getValueType(), + SetCC, IfTrue, IfFalse, A64cc); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + SetCC, IfTrue, A64SELECT_CC, A64cc); + + } + + return A64SELECT_CC; +} + +// (SELECT testbit, iftrue, iffalse) +SDValue +AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue TheBit = Op.getOperand(0); + SDValue IfTrue = Op.getOperand(1); + SDValue IfFalse = Op.getOperand(2); + + // AArch64 BooleanContents is the default UndefinedBooleanContent, which means + // that as the consumer we are responsible for ignoring rubbish in higher + // bits. + TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, + DAG.getConstant(1, MVT::i32)); + SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, + DAG.getConstant(0, TheBit.getValueType()), + DAG.getCondCode(ISD::SETNE)); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), + A64CMP, IfTrue, IfFalse, + DAG.getConstant(A64CC::NE, MVT::i32)); +} + +// (SETCC lhs, rhs, condcode) +SDValue +AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + EVT VT = Op.getValueType(); + + if (LHS.getValueType() == MVT::f128) { + // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS + // for the rest of the function (some i32 or i64 values). + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + + // If softenSetCCOperands returned a scalar, use it. + if (RHS.getNode() == 0) { + assert(LHS.getValueType() == Op.getValueType() && + "Unexpected setcc expansion!"); + return LHS; + } + } + + if (LHS.getValueType().isInteger()) { + SDValue A64cc; + + // Integers are handled in a separate function because the combinations of + // immediates and tests can get hairy and we may want to fiddle things. + SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + + return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, + CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT), + A64cc); + } + + // Note that some LLVM floating-point CondCodes can't be lowered to a single + // conditional branch, hence FPCCToA64CC can set a second test, where either + // passing is sufficient. + A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; + CondCode = FPCCToA64CC(CC, Alternative); + SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); + SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, + DAG.getCondCode(CC)); + SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, + CmpOp, DAG.getConstant(1, VT), + DAG.getConstant(0, VT), A64cc); + + if (Alternative != A64CC::Invalid) { + A64cc = DAG.getConstant(Alternative, MVT::i32); + A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, + DAG.getConstant(1, VT), A64SELECT_CC, A64cc); + } + + return A64SELECT_CC; +} + +SDValue +AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { + const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); + + // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes + // rather than just 8. + return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(), + Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(32, MVT::i32), 8, false, false, + MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); +} + +SDValue +AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + // The layout of the va_list struct is specified in the AArch64 Procedure Call + // Standard, section B.3. + MachineFunction &MF = DAG.getMachineFunction(); + AArch64MachineFunctionInfo *FuncInfo + = MF.getInfo<AArch64MachineFunctionInfo>(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Chain = Op.getOperand(0); + SDValue VAList = Op.getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + SmallVector<SDValue, 4> MemOps; + + // void *__stack at offset 0 + SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(), + getPointerTy()); + MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, + MachinePointerInfo(SV), false, false, 0)); + + // void *__gr_top at offset 8 + int GPRSize = FuncInfo->getVariadicGPRSize(); + if (GPRSize > 0) { + SDValue GRTop, GRTopAddr; + + GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(8, getPointerTy())); + + GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy()); + GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, + DAG.getConstant(GPRSize, getPointerTy())); + + MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, + MachinePointerInfo(SV, 8), + false, false, 0)); + } + + // void *__vr_top at offset 16 + int FPRSize = FuncInfo->getVariadicFPRSize(); + if (FPRSize > 0) { + SDValue VRTop, VRTopAddr; + VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(16, getPointerTy())); + + VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy()); + VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, + DAG.getConstant(FPRSize, getPointerTy())); + + MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, + MachinePointerInfo(SV, 16), + false, false, 0)); + } + + // int __gr_offs at offset 24 + SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(24, getPointerTy())); + MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32), + GROffsAddr, MachinePointerInfo(SV, 24), + false, false, 0)); + + // int __vr_offs at offset 28 + SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(28, getPointerTy())); + MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32), + VROffsAddr, MachinePointerInfo(SV, 28), + false, false, 0)); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], + MemOps.size()); +} + +SDValue +AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: llvm_unreachable("Don't know how to custom lower this!"); + case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128); + case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128); + case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128); + case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128); + case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true); + case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false); + case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true); + case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false); + case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::BR_CC: return LowerBR_CC(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::VACOPY: return LowerVACOPY(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + } + + return SDValue(); +} + +static SDValue PerformANDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // We're looking for an SRA/SHL pair which form an SBFX. + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + if (!isa<ConstantSDNode>(N->getOperand(1))) + return SDValue(); + + uint64_t TruncMask = N->getConstantOperandVal(1); + if (!isMask_64(TruncMask)) + return SDValue(); + + uint64_t Width = CountPopulation_64(TruncMask); + SDValue Shift = N->getOperand(0); + + if (Shift.getOpcode() != ISD::SRL) + return SDValue(); + + if (!isa<ConstantSDNode>(Shift->getOperand(1))) + return SDValue(); + uint64_t LSB = Shift->getConstantOperandVal(1); + + if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) + return SDValue(); + + return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0), + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(LSB + Width - 1, MVT::i64)); +} + +static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, + TargetLowering::DAGCombinerInfo &DCI) { + // An atomic operation followed by an acquiring atomic fence can be reduced to + // an acquiring load. The atomic operation provides a convenient pointer to + // load from. If the original operation was a load anyway we can actually + // combine the two operations into an acquiring load. + SelectionDAG &DAG = DCI.DAG; + SDValue AtomicOp = FenceNode->getOperand(0); + AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp); + + // A fence on its own can't be optimised + if (!AtomicNode) + return SDValue(); + + AtomicOrdering FenceOrder + = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1)); + SynchronizationScope FenceScope + = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2)); + + if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope()) + return SDValue(); + + // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so + // the chain we use should be its input, otherwise we'll put our store after + // it so we use its output chain. + SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ? + AtomicNode->getChain() : AtomicOp; + + // We have an acquire fence with a handy atomic operation nearby, we can + // convert the fence into a load-acquire, discarding the result. + DebugLoc DL = FenceNode->getDebugLoc(); + SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(), + AtomicNode->getValueType(0), + Chain, // Chain + AtomicOp.getOperand(1), // Pointer + AtomicNode->getMemOperand(), Acquire, + FenceScope); + + if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD) + DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode()); + + return Op.getValue(1); +} + +static SDValue PerformATOMIC_STORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // A releasing atomic fence followed by an atomic store can be combined into a + // single store operation. + SelectionDAG &DAG = DCI.DAG; + AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N); + SDValue FenceOp = AtomicNode->getOperand(0); + + if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE) + return SDValue(); + + AtomicOrdering FenceOrder + = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1)); + SynchronizationScope FenceScope + = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2)); + + if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope()) + return SDValue(); + + DebugLoc DL = AtomicNode->getDebugLoc(); + return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(), + FenceOp.getOperand(0), // Chain + AtomicNode->getOperand(1), // Pointer + AtomicNode->getOperand(2), // Value + AtomicNode->getMemOperand(), Release, + FenceScope); +} + +/// For a true bitfield insert, the bits getting into that contiguous mask +/// should come from the low part of an existing value: they must be formed from +/// a compatible SHL operation (unless they're already low). This function +/// checks that condition and returns the least-significant bit that's +/// intended. If the operation not a field preparation, -1 is returned. +static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT, + SDValue &MaskedVal, uint64_t Mask) { + if (!isShiftedMask_64(Mask)) + return -1; + + // Now we need to alter MaskedVal so that it is an appropriate input for a BFI + // instruction. BFI will do a left-shift by LSB before applying the mask we've + // spotted, so in general we should pre-emptively "undo" that by making sure + // the incoming bits have had a right-shift applied to them. + // + // This right shift, however, will combine with existing left/right shifts. In + // the simplest case of a completely straight bitfield operation, it will be + // expected to completely cancel out with an existing SHL. More complicated + // cases (e.g. bitfield to bitfield copy) may still need a real shift before + // the BFI. + + uint64_t LSB = CountTrailingZeros_64(Mask); + int64_t ShiftRightRequired = LSB; + if (MaskedVal.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(MaskedVal.getOperand(1))) { + ShiftRightRequired -= MaskedVal.getConstantOperandVal(1); + MaskedVal = MaskedVal.getOperand(0); + } else if (MaskedVal.getOpcode() == ISD::SRL && + isa<ConstantSDNode>(MaskedVal.getOperand(1))) { + ShiftRightRequired += MaskedVal.getConstantOperandVal(1); + MaskedVal = MaskedVal.getOperand(0); + } + + if (ShiftRightRequired > 0) + MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal, + DAG.getConstant(ShiftRightRequired, MVT::i64)); + else if (ShiftRightRequired < 0) { + // We could actually end up with a residual left shift, for example with + // "struc.bitfield = val << 1". + MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal, + DAG.getConstant(-ShiftRightRequired, MVT::i64)); + } + + return LSB; +} + +/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by +/// a mask and an extension. Returns true if a BFI was found and provides +/// information on its surroundings. +static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask, + bool &Extended) { + Extended = false; + if (N.getOpcode() == ISD::ZERO_EXTEND) { + Extended = true; + N = N.getOperand(0); + } + + if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { + Mask = N->getConstantOperandVal(1); + N = N.getOperand(0); + } else { + // Mask is the whole width. + Mask = -1ULL >> (64 - N.getValueType().getSizeInBits()); + } + + if (N.getOpcode() == AArch64ISD::BFI) { + BFI = N; + return true; + } + + return false; +} + +/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which +/// is roughly equivalent to (and (BFI ...), mask). This form is used because it +/// can often be further combined with a larger mask. Ultimately, we want mask +/// to be 2^32-1 or 2^64-1 so the AND can be skipped. +static SDValue tryCombineToBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + assert(N->getOpcode() == ISD::OR && "Unexpected root"); + + // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or + // abandon the effort. + SDValue LHS = N->getOperand(0); + if (LHS.getOpcode() != ISD::AND) + return SDValue(); + + uint64_t LHSMask; + if (isa<ConstantSDNode>(LHS.getOperand(1))) + LHSMask = LHS->getConstantOperandVal(1); + else + return SDValue(); + + // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask + // is or abandon the effort. + SDValue RHS = N->getOperand(1); + if (RHS.getOpcode() != ISD::AND) + return SDValue(); + + uint64_t RHSMask; + if (isa<ConstantSDNode>(RHS.getOperand(1))) + RHSMask = RHS->getConstantOperandVal(1); + else + return SDValue(); + + // Can't do anything if the masks are incompatible. + if (LHSMask & RHSMask) + return SDValue(); + + // Now we need one of the masks to be a contiguous field. Without loss of + // generality that should be the RHS one. + SDValue Bitfield = LHS.getOperand(0); + if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) { + // We know that LHS is a candidate new value, and RHS isn't already a better + // one. + std::swap(LHS, RHS); + std::swap(LHSMask, RHSMask); + } + + // We've done our best to put the right operands in the right places, all we + // can do now is check whether a BFI exists. + Bitfield = RHS.getOperand(0); + int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask); + if (LSB == -1) + return SDValue(); + + uint32_t Width = CountPopulation_64(RHSMask); + assert(Width && "Expected non-zero bitfield width"); + + SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, + LHS.getOperand(0), Bitfield, + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(Width, MVT::i64)); + + // Mask is trivial + if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits()))) + return BFI; + + return DAG.getNode(ISD::AND, DL, VT, BFI, + DAG.getConstant(LHSMask | RHSMask, VT)); +} + +/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its +/// original input. This is surprisingly common because SROA splits things up +/// into i8 chunks, so the originally detected MaskedBFI may actually only act +/// on the low (say) byte of a word. This is then orred into the rest of the +/// word afterwards. +/// +/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)). +/// +/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the +/// MaskedBFI. We can also deal with a certain amount of extend/truncate being +/// involved. +static SDValue tryCombineToLargerBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // First job is to hunt for a MaskedBFI on either the left or right. Swap + // operands if it's actually on the right. + SDValue BFI; + SDValue PossExtraMask; + uint64_t ExistingMask = 0; + bool Extended = false; + if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended)) + PossExtraMask = N->getOperand(1); + else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended)) + PossExtraMask = N->getOperand(0); + else + return SDValue(); + + // We can only combine a BFI with another compatible mask. + if (PossExtraMask.getOpcode() != ISD::AND || + !isa<ConstantSDNode>(PossExtraMask.getOperand(1))) + return SDValue(); + + uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1); + + // Masks must be compatible. + if (ExtraMask & ExistingMask) + return SDValue(); + + SDValue OldBFIVal = BFI.getOperand(0); + SDValue NewBFIVal = BFI.getOperand(1); + if (Extended) { + // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be + // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments + // need to be made compatible. + assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32 + && "Invalid types for BFI"); + OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal); + NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal); + } + + // We need the MaskedBFI to be combined with a mask of the *same* value. + if (PossExtraMask.getOperand(0) != OldBFIVal) + return SDValue(); + + BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, + OldBFIVal, NewBFIVal, + BFI.getOperand(2), BFI.getOperand(3)); + + // If the masking is trivial, we don't need to create it. + if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits()))) + return BFI; + + return DAG.getNode(ISD::AND, DL, VT, BFI, + DAG.getConstant(ExtraMask | ExistingMask, VT)); +} + +/// An EXTR instruction is made up of two shifts, ORed together. This helper +/// searches for and classifies those shifts. +static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, + bool &FromHi) { + if (N.getOpcode() == ISD::SHL) + FromHi = false; + else if (N.getOpcode() == ISD::SRL) + FromHi = true; + else + return false; + + if (!isa<ConstantSDNode>(N.getOperand(1))) + return false; + + ShiftAmount = N->getConstantOperandVal(1); + Src = N->getOperand(0); + return true; +} + +/// EXTR instruction extracts a contiguous chunk of bits from two existing +/// registers viewed as a high/low pair. This function looks for the pattern: +/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an +/// EXTR. Can't quite be done in TableGen because the two immediates aren't +/// independent. +static SDValue tryCombineToEXTR(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + assert(N->getOpcode() == ISD::OR && "Unexpected root"); + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + SDValue LHS; + uint32_t ShiftLHS = 0; + bool LHSFromHi = 0; + if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) + return SDValue(); + + SDValue RHS; + uint32_t ShiftRHS = 0; + bool RHSFromHi = 0; + if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) + return SDValue(); + + // If they're both trying to come from the high part of the register, they're + // not really an EXTR. + if (LHSFromHi == RHSFromHi) + return SDValue(); + + if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) + return SDValue(); + + if (LHSFromHi) { + std::swap(LHS, RHS); + std::swap(ShiftLHS, ShiftRHS); + } + + return DAG.getNode(AArch64ISD::EXTR, DL, VT, + LHS, RHS, + DAG.getConstant(ShiftRHS, MVT::i64)); +} + +/// Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + // Attempt to recognise bitfield-insert operations. + SDValue Res = tryCombineToBFI(N, DCI, Subtarget); + if (Res.getNode()) + return Res; + + // Attempt to combine an existing MaskedBFI operation into one with a larger + // mask. + Res = tryCombineToLargerBFI(N, DCI, Subtarget); + if (Res.getNode()) + return Res; + + Res = tryCombineToEXTR(N, DCI); + if (Res.getNode()) + return Res; + + return SDValue(); +} + +/// Target-specific dag combine xforms for ISD::SRA +static SDValue PerformSRACombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // We're looking for an SRA/SHL pair which form an SBFX. + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + if (!isa<ConstantSDNode>(N->getOperand(1))) + return SDValue(); + + uint64_t ExtraSignBits = N->getConstantOperandVal(1); + SDValue Shift = N->getOperand(0); + + if (Shift.getOpcode() != ISD::SHL) + return SDValue(); + + if (!isa<ConstantSDNode>(Shift->getOperand(1))) + return SDValue(); + + uint64_t BitsOnLeft = Shift->getConstantOperandVal(1); + uint64_t Width = VT.getSizeInBits() - ExtraSignBits; + uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft; + + if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) + return SDValue(); + + return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0), + DAG.getConstant(LSB, MVT::i64), + DAG.getConstant(LSB + Width - 1, MVT::i64)); +} + + +SDValue +AArch64TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + default: break; + case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI); + case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI); + case ISD::OR: return PerformORCombine(N, DCI, Subtarget); + case ISD::SRA: return PerformSRACombine(N, DCI); + } + return SDValue(); +} + +AArch64TargetLowering::ConstraintType +AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'w': // An FP/SIMD vector register + return C_RegisterClass; + case 'I': // Constant that can be used with an ADD instruction + case 'J': // Constant that can be used with a SUB instruction + case 'K': // Constant that can be used with a 32-bit logical instruction + case 'L': // Constant that can be used with a 64-bit logical instruction + case 'M': // Constant that can be used as a 32-bit MOV immediate + case 'N': // Constant that can be used as a 64-bit MOV immediate + case 'Y': // Floating point constant zero + case 'Z': // Integer constant zero + return C_Other; + case 'Q': // A memory reference with base register and no offset + return C_Memory; + case 'S': // A symbolic address + return C_Other; + } + } + + // FIXME: Ump, Utf, Usa, Ush + // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, + // whatever they may be + // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be + // Usa: An absolute symbolic address + // Ush: The high part (bits 32:12) of a pc-relative symbolic address + assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa" + && Constraint != "Ush" && "Unimplemented constraints"); + + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight +AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const { + + llvm_unreachable("Constraint weight unimplemented"); +} + +void +AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + SDValue Result(0, 0); + + // Only length 1 constraints are C_Other. + if (Constraint.size() != 1) return; + + // Only C_Other constraints get lowered like this. That means constants for us + // so return early if there's no hope the constraint can be lowered. + + switch(Constraint[0]) { + default: break; + case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'Z': { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + if (!C) + return; + + uint64_t CVal = C->getZExtValue(); + uint32_t Bits; + + switch (Constraint[0]) { + default: + // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J' + // is a peculiarly useless SUB constraint. + llvm_unreachable("Unimplemented C_Other constraint"); + case 'I': + if (CVal <= 0xfff) + break; + return; + case 'K': + if (A64Imms::isLogicalImm(32, CVal, Bits)) + break; + return; + case 'L': + if (A64Imms::isLogicalImm(64, CVal, Bits)) + break; + return; + case 'Z': + if (CVal == 0) + break; + return; + } + + Result = DAG.getTargetConstant(CVal, Op.getValueType()); + break; + } + case 'S': { + // An absolute symbolic address or label reference. + if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { + Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), + GA->getValueType(0)); + } else if (const BlockAddressSDNode *BA + = dyn_cast<BlockAddressSDNode>(Op)) { + Result = DAG.getTargetBlockAddress(BA->getBlockAddress(), + BA->getValueType(0)); + } else if (const ExternalSymbolSDNode *ES + = dyn_cast<ExternalSymbolSDNode>(Op)) { + Result = DAG.getTargetExternalSymbol(ES->getSymbol(), + ES->getValueType(0)); + } else + return; + break; + } + case 'Y': + if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) { + if (CFP->isExactlyValue(0.0)) { + Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0)); + break; + } + } + return; + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + + // It's an unknown constraint for us. Let generic code have a go. + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +std::pair<unsigned, const TargetRegisterClass*> +AArch64TargetLowering::getRegForInlineAsmConstraint( + const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + if (VT.getSizeInBits() <= 32) + return std::make_pair(0U, &AArch64::GPR32RegClass); + else if (VT == MVT::i64) + return std::make_pair(0U, &AArch64::GPR64RegClass); + break; + case 'w': + if (VT == MVT::f16) + return std::make_pair(0U, &AArch64::FPR16RegClass); + else if (VT == MVT::f32) + return std::make_pair(0U, &AArch64::FPR32RegClass); + else if (VT == MVT::f64) + return std::make_pair(0U, &AArch64::FPR64RegClass); + else if (VT.getSizeInBits() == 64) + return std::make_pair(0U, &AArch64::VPR64RegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &AArch64::FPR128RegClass); + else if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &AArch64::VPR128RegClass); + break; + } + } + + // Use the default implementation in TargetLowering to convert the register + // constraint into a member of a register class. + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h new file mode 100644 index 0000000..4960d28 --- /dev/null +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -0,0 +1,247 @@ +//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that AArch64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H +#define LLVM_TARGET_AARCH64_ISELLOWERING_H + +#include "Utils/AArch64BaseInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + + +namespace llvm { +namespace AArch64ISD { + enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // This is a conditional branch which also notes the flag needed + // (eq/sgt/...). A64 puts this information on the branches rather than + // compares as LLVM does. + BR_CC, + + // A node to be selected to an actual call operation: either BL or BLR in + // the absence of tail calls. + Call, + + // Indicates a floating-point immediate which fits into the format required + // by the FMOV instructions. First (and only) operand is the 8-bit encoded + // value of that immediate. + FPMOV, + + // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS + // and an LSB. + EXTR, + + // Wraps a load from the GOT, which should always be performed with a 64-bit + // load instruction. This prevents the DAG combiner folding a truncate to + // form a smaller memory access. + GOTLoad, + + // Performs a bitfield insert. Arguments are: the value being inserted into; + // the value being inserted; least significant bit changed; width of the + // field. + BFI, + + // Simply a convenient node inserted during ISelLowering to represent + // procedure return. Will almost certainly be selected to "RET". + Ret, + + /// Extracts a field of contiguous bits from the source and sign extends + /// them into a single register. Arguments are: source; immr; imms. Note + /// these are pre-encoded since DAG matching can't cope with combining LSB + /// and Width into these values itself. + SBFX, + + /// This is an A64-ification of the standard LLVM SELECT_CC operation. The + /// main difference is that it only has the values and an A64 condition, + /// which will be produced by a setcc instruction. + SELECT_CC, + + /// This serves most of the functions of the LLVM SETCC instruction, for two + /// purposes. First, it prevents optimisations from fiddling with the + /// compare after we've moved the CondCode information onto the SELECT_CC or + /// BR_CC instructions. Second, it gives a legal instruction for the actual + /// comparison. + /// + /// It keeps a record of the condition flags asked for because certain + /// instructions are only valid for a subset of condition codes. + SETCC, + + // Designates a node which is a tail call: both a call and a return + // instruction as far as selction is concerned. It should be selected to an + // unconditional branch. Has the usual plethora of call operands, but: 1st + // is callee, 2nd is stack adjustment required immediately before branch. + TC_RETURN, + + // Designates a call used to support the TLS descriptor ABI. The call itself + // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall + // var") must be attached somehow during code generation. It takes two + // operands: the callee and the symbol to be relocated against. + TLSDESCCALL, + + // Leaf node which will be lowered to an appropriate MRS to obtain the + // thread pointer: TPIDR_EL0. + THREAD_POINTER, + + /// Extracts a field of contiguous bits from the source and zero extends + /// them into a single register. Arguments are: source; immr; imms. Note + /// these are pre-encoded since DAG matching can't cope with combining LSB + /// and Width into these values itself. + UBFX, + + // Wraps an address which the ISelLowering phase has decided should be + // created using the small absolute memory model: i.e. adrp/add or + // adrp/mem-op. This exists to prevent bare TargetAddresses which may never + // get selected. + WrapperSmall + }; +} + + +class AArch64Subtarget; +class AArch64TargetMachine; + +class AArch64TargetLowering : public TargetLowering { +public: + explicit AArch64TargetLowering(AArch64TargetMachine &TM); + + const char *getTargetNodeName(unsigned Opcode) const; + + CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const; + + SDValue LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const; + + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc DL, SDValue &Chain) const; + + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool IsVarArg, + bool IsCalleeStructRet, + bool IsCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; + + /// Finds the incoming stack arguments which overlap the given fixed stack + /// object and incorporates their load into the current chain. This prevents + /// an upcoming store from clobbering the stack argument before it's used. + SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, + MachineFrameInfo *MFI, int ClobberedFI) const; + + EVT getSetCCResultType(EVT VT) const; + + bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; + + bool IsTailCallConvention(CallingConv::ID CallCC) const; + + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + bool isLegalICmpImmediate(int64_t Val) const; + SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + MachineBasicBlock * + emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, + unsigned Size, unsigned Opcode) const; + + MachineBasicBlock * + emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, unsigned CmpOp, + A64CC::CondCodes Cond) const; + MachineBasicBlock * + emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size) const; + + MachineBasicBlock * + EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const; + + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL, + SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } + + ConstraintType getConstraintType(const std::string &Constraint) const; + + ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const; + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const; + + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; +private: + const AArch64Subtarget *Subtarget; + const TargetRegisterInfo *RegInfo; + const InstrItineraryData *Itins; +}; +} // namespace llvm + +#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td new file mode 100644 index 0000000..cb93471 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -0,0 +1,961 @@ +//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This file describes AArch64 instruction formats, down to the level of the +// instruction's overall class. +// ===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// A64 Instruction Format Definitions. +//===----------------------------------------------------------------------===// + +// A64 is currently the only instruction set supported by the AArch64 +// architecture. +class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : Instruction { + // All A64 instructions are 32-bit. This field will be filled in + // gradually going down the hierarchy. + field bits<32> Inst; + + field bits<32> Unpredictable = 0; + // SoftFail is the generic name for this field, but we alias it so + // as to make it more obvious what it means in ARM-land. + field bits<32> SoftFail = Unpredictable; + + // LLVM-level model of the AArch64/A64 distinction. + let Namespace = "AArch64"; + let DecoderNamespace = "A64"; + let Size = 4; + + // Set the templated fields + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asmstr; + let Pattern = patterns; + let Itinerary = itin; +} + +class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction { + let Namespace = "AArch64"; + + let OutOperandList = outs; + let InOperandList= ins; + let Pattern = patterns; + let isCodeGenOnly = 1; + let isPseudo = 1; +} + +// Represents a pseudo-instruction that represents a single A64 instruction for +// whatever reason, the eventual result will be a 32-bit real instruction. +class A64PseudoInst<dag outs, dag ins, list<dag> patterns> + : PseudoInst<outs, ins, patterns> { + let Size = 4; +} + +// As above, this will be a single A64 instruction, but we can actually give the +// expansion in TableGen. +class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result> + : A64PseudoInst<outs, ins, patterns>, + PseudoInstExpansion<Result>; + + +// First, some common cross-hierarchy register formats. + +class A64InstRd<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<5> Rd; + + let Inst{4-0} = Rd; +} + +class A64InstRt<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<5> Rt; + + let Inst{4-0} = Rt; +} + + +class A64InstRdn<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> { + // Inherit rdt + bits<5> Rn; + + let Inst{9-5} = Rn; +} + +class A64InstRtn<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> { + // Inherit rdt + bits<5> Rn; + + let Inst{9-5} = Rn; +} + +// Instructions taking Rt,Rt2,Rn +class A64InstRtt2n<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<5> Rt2; + + let Inst{14-10} = Rt2; +} + +class A64InstRdnm<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bits<5> Rm; + + let Inst{20-16} = Rm; +} + +//===----------------------------------------------------------------------===// +// +// Actual A64 Instruction Formats +// + +// Format for Add-subtract (extended register) instructions. +class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<3> Imm3; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b01011; + let Inst{23-22} = opt; + let Inst{21} = 0b1; + // Rm inherited in 20-16 + let Inst{15-13} = option; + let Inst{12-10} = Imm3; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Add-subtract (immediate) instructions. +class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bits<12> Imm12; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b10001; + let Inst{23-22} = shift; + let Inst{21-10} = Imm12; +} + +// Format for Add-subtract (shifted register) instructions. +class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<6> Imm6; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-24} = 0b01011; + let Inst{23-22} = shift; + let Inst{21} = 0b0; + // Rm inherited in 20-16 + let Inst{15-10} = Imm6; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Add-subtract (with carry) instructions. +class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = S; + let Inst{28-21} = 0b11010000; + // Rm inherited in 20-16 + let Inst{15-10} = opcode2; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + + +// Format for Bitfield instructions +class A64I_bitfield<bit sf, bits<2> opc, bit n, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bits<6> ImmR; + bits<6> ImmS; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100110; + let Inst{22} = n; + let Inst{21-16} = ImmR; + let Inst{15-10} = ImmS; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for compare and branch (immediate) instructions. +class A64I_cmpbr<bit sf, bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> { + bits<19> Label; + + let Inst{31} = sf; + let Inst{30-25} = 0b011010; + let Inst{24} = op; + let Inst{23-5} = Label; + // Inherit Rt in 4-0 +} + +// Format for conditional branch (immediate) instructions. +class A64I_condbr<bit o1, bit o0, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<19> Label; + bits<4> Cond; + + let Inst{31-25} = 0b0101010; + let Inst{24} = o1; + let Inst{23-5} = Label; + let Inst{4} = o0; + let Inst{3-0} = Cond; +} + +// Format for conditional compare (immediate) instructions. +class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<5> Rn; + bits<5> UImm5; + bits<4> NZCVImm; + bits<4> Cond; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010010; + let Inst{20-16} = UImm5; + let Inst{15-12} = Cond; + let Inst{11} = 0b1; + let Inst{10} = o2; + let Inst{9-5} = Rn; + let Inst{4} = o3; + let Inst{3-0} = NZCVImm; +} + +// Format for conditional compare (register) instructions. +class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<5> Rn; + bits<5> Rm; + bits<4> NZCVImm; + bits<4> Cond; + + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010010; + let Inst{20-16} = Rm; + let Inst{15-12} = Cond; + let Inst{11} = 0b0; + let Inst{10} = o2; + let Inst{9-5} = Rn; + let Inst{4} = o3; + let Inst{3-0} = NZCVImm; +} + +// Format for conditional select instructions. +class A64I_condsel<bit sf, bit op, bit s, bits<2> op2, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<4> Cond; + + let Inst{31} = sf; + let Inst{30} = op; + let Inst{29} = s; + let Inst{28-21} = 0b11010100; + // Inherit Rm in 20-16 + let Inst{15-12} = Cond; + let Inst{11-10} = op2; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for data processing (1 source) instructions +class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode, + string asmstr, dag outs, dag ins, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + let Inst{31} = sf; + let Inst{30} = 0b1; + let Inst{29} = S; + let Inst{28-21} = 0b11010110; + let Inst{20-16} = opcode2; + let Inst{15-10} = opcode; +} + +// Format for data processing (2 source) instructions +class A64I_dp_2src<bit sf, bits<6> opcode, bit S, + string asmstr, dag outs, dag ins, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = S; + let Inst{28-21} = 0b11010110; + let Inst{15-10} = opcode; +} + +// Format for data-processing (3 source) instructions + +class A64I_dp3<bit sf, bits<6> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<5> Ra; + + let Inst{31} = sf; + let Inst{30-29} = opcode{5-4}; + let Inst{28-24} = 0b11011; + let Inst{23-21} = opcode{3-1}; + // Inherits Rm in 20-16 + let Inst{15} = opcode{0}; + let Inst{14-10} = Ra; + // Inherits Rn in 9-5 + // Inherits Rd in 4-0 +} + +// Format for exception generation instructions +class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<16> UImm16; + + let Inst{31-24} = 0b11010100; + let Inst{23-21} = opc; + let Inst{20-5} = UImm16; + let Inst{4-2} = op2; + let Inst{1-0} = ll; +} + +// Format for extract (immediate) instructions +class A64I_extract<bit sf, bits<3> op, bit n, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<6> LSB; + + let Inst{31} = sf; + let Inst{30-29} = op{2-1}; + let Inst{28-23} = 0b100111; + let Inst{22} = n; + let Inst{21} = op{0}; + // Inherits Rm in bits 20-16 + let Inst{15-10} = LSB; + // Inherits Rn in 9-5 + // Inherits Rd in 4-0 +} + +// Format for floating-point compare instructions. +class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-14} = op; + let Inst{13-10} = 0b1000; + let Inst{9-5} = Rn; + let Inst{4-0} = opcode2; +} + +// Format for floating-point conditional compare instructions. +class A64I_fpccmp<bit m, bit s, bits<2> type, bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bits<5> Rn; + bits<5> Rm; + bits<4> NZCVImm; + bits<4> Cond; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-12} = Cond; + let Inst{11-10} = 0b01; + let Inst{9-5} = Rn; + let Inst{4} = op; + let Inst{3-0} = NZCVImm; +} + +// Format for floating-point conditional select instructions. +class A64I_fpcondsel<bit m, bit s, bits<2> type, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<4> Cond; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = Cond; + let Inst{11-10} = 0b11; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + + +// Format for floating-point data-processing (1 source) instructions. +class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-15} = opcode; + let Inst{14-10} = 0b10000; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point data-processing (2 sources) instructions. +class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = opcode; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point data-processing (3 sources) instructions. +class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<5> Ra; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11111; + let Inst{23-22} = type; + let Inst{21} = o1; + // Inherit Rm in 20-16 + let Inst{15} = o0; + let Inst{14-10} = Ra; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point <-> fixed-point conversion instructions. +class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bits<6> Scale; + + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b0; + let Inst{20-19} = mode; + let Inst{18-16} = opcode; + let Inst{15-10} = Scale; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format for floating-point <-> integer conversion instructions. +class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + let Inst{31} = sf; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + + +// Format for floating-point immediate instructions. +class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> { + bits<8> Imm8; + + let Inst{31} = m; + let Inst{30} = 0b0; + let Inst{29} = s; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0b1; + let Inst{20-13} = Imm8; + let Inst{12-10} = 0b100; + let Inst{9-5} = imm5; + // Inherit Rd in 4-0 +} + +// Format for load-register (literal) instructions. +class A64I_LDRlit<bits<2> opc, bit v, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> { + bits<19> Imm19; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b011; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-5} = Imm19; + // Inherit Rt in 4-0 +} + +// Format for load-store exclusive instructions. +class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + let Inst{31-30} = size; + let Inst{29-24} = 0b001000; + let Inst{23} = o2; + let Inst{22} = L; + let Inst{21} = o1; + let Inst{15} = o0; +} + +class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin>: + A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ + bits<5> Rt2; + let Inst{14-10} = Rt2; +} + +class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin>: + A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ + bits<5> Rs; + let Inst{20-16} = Rs; +} + +class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0, + dag outs, dag ins, string asmstr, + list <dag> patterns, InstrItinClass itin>: + A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{ + bits<5> Rt2; + let Inst{14-10} = Rt2; +} + +// Format for load-store register (immediate post-indexed) instructions +class A64I_LSpostind<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<9> SImm9; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b01; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store register (immediate pre-indexed) instructions +class A64I_LSpreind<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<9> SImm9; + + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b11; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store register (unprivileged) instructions +class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<9> SImm9; + + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for load-store (unscaled immediate) instructions. +class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<9> SImm9; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = SImm9; + let Inst{11-10} = 0b00; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + + +// Format for load-store (unsigned immediate) instructions. +class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<12> UImm12; + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b01; + let Inst{23-22} = opc; + let Inst{21-10} = UImm12; +} + +// Format for load-store register (register offset) instructions. +class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtn<outs, ins, asmstr, patterns, itin> { + bits<5> Rm; + + // Complex operand selection needed for these instructions, so they + // need an "addr" field for encoding/decoding to be generated. + bits<3> Ext; + // OptionHi = Ext{2-1} + // S = Ext{0} + + let Inst{31-30} = size; + let Inst{29-27} = 0b111; + let Inst{26} = v; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-14} = Ext{2-1}; + let Inst{13} = optionlo; + let Inst{12} = Ext{0}; + let Inst{11-10} = 0b10; + // Inherits Rn in 9-5 + // Inherits Rt in 4-0 + + let AddedComplexity = 50; +} + +// Format for Load-store register pair (offset) instructions +class A64I_LSPoffset<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b010; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store register pair (post-indexed) instructions +class A64I_LSPpostind<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b001; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store register pair (pre-indexed) instructions +class A64I_LSPpreind<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b011; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Load-store non-temporal register pair (offset) instructions +class A64I_LSPnontemp<bits<2> opc, bit v, bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRtt2n<outs, ins, asmstr, patterns, itin> { + bits<7> SImm7; + + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = v; + let Inst{25-23} = 0b000; + let Inst{22} = l; + let Inst{21-15} = SImm7; + // Inherit Rt2 in 14-10 + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format for Logical (immediate) instructions +class A64I_logicalimm<bit sf, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdn<outs, ins, asmstr, patterns, itin> { + bit N; + bits<6> ImmR; + bits<6> ImmS; + + // N, ImmR and ImmS have no separate existence in any assembly syntax (or for + // selection), so we'll combine them into a single field here. + bits<13> Imm; + // N = Imm{12}; + // ImmR = Imm{11-6}; + // ImmS = Imm{5-0}; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100100; + let Inst{22} = Imm{12}; + let Inst{21-16} = Imm{11-6}; + let Inst{15-10} = Imm{5-0}; + // Rn inherited in 9-5 + // Rd inherited in 4-0 +} + +// Format for Logical (shifted register) instructions +class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> { + bits<6> Imm6; + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-24} = 0b01010; + let Inst{23-22} = shift; + let Inst{21} = N; + // Rm inherited + let Inst{15-10} = Imm6; + // Rn inherited + // Rd inherited +} + +// Format for Move wide (immediate) +class A64I_movw<bit sf, bits<2> opc, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> { + bits<16> UImm16; + bits<2> Shift; // Called "hw" officially + + let Inst{31} = sf; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100101; + let Inst{22-21} = Shift; + let Inst{20-5} = UImm16; + // Inherits Rd in 4-0 +} + +// Format for PC-relative addressing instructions, ADR and ADRP. +class A64I_PCADR<bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRd<outs, ins, asmstr, patterns, itin> { + bits<21> Label; + + let Inst{31} = op; + let Inst{30-29} = Label{1-0}; + let Inst{28-24} = 0b10000; + let Inst{23-5} = Label{20-2}; +} + +// Format for system instructions +class A64I_system<bit l, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + bits<2> Op0; + bits<3> Op1; + bits<4> CRn; + bits<4> CRm; + bits<3> Op2; + bits<5> Rt; + + let Inst{31-22} = 0b1101010100; + let Inst{21} = l; + let Inst{20-19} = Op0; + let Inst{18-16} = Op1; + let Inst{15-12} = CRn; + let Inst{11-8} = CRm; + let Inst{7-5} = Op2; + let Inst{4-0} = Rt; + + // These instructions can do horrible things. + let hasSideEffects = 1; +} + +// Format for unconditional branch (immediate) instructions +class A64I_Bimm<bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<26> Label; + + let Inst{31} = op; + let Inst{30-26} = 0b00101; + let Inst{25-0} = Label; +} + +// Format for Test & branch (immediate) instructions +class A64I_TBimm<bit op, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRt<outs, ins, asmstr, patterns, itin> { + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<6> Imm; + bits<14> Label; + + let Inst{31} = Imm{5}; + let Inst{30-25} = 0b011011; + let Inst{24} = op; + let Inst{23-19} = Imm{4-0}; + let Inst{18-5} = Label; + // Inherit Rt in 4-0 +} + +// Format for Unconditional branch (register) instructions, including +// RET. Shares no fields with instructions further up the hierarchy +// so top-level. +class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64Inst<outs, ins, asmstr, patterns, itin> { + // Doubly special in not even sharing register fields with other + // instructions, so we create our own Rn here. + bits<5> Rn; + + let Inst{31-25} = 0b1101011; + let Inst{24-21} = opc; + let Inst{20-16} = op2; + let Inst{15-10} = op3; + let Inst{9-5} = Rn; + let Inst{4-0} = op4; +} + diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp new file mode 100644 index 0000000..7b93463 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -0,0 +1,822 @@ +//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#include <algorithm> + +#define GET_INSTRINFO_CTOR +#include "AArch64GenInstrInfo.inc" + +using namespace llvm; + +AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) + : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), + RI(*this, STI), Subtarget(STI) {} + +void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc = 0; + unsigned ZeroReg = 0; + if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) { + // E.g. ADD xDst, xsp, #0 (, lsl #0) + BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg) + .addReg(SrcReg) + .addImm(0); + return; + } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { + // E.g. ADD wDST, wsp, #0 (, lsl #0) + BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg) + .addReg(SrcReg) + .addImm(0); + return; + } else if (DestReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + // E.g. MSR NZCV, xDST + BuildMI(MBB, I, DL, get(AArch64::MSRix)) + .addImm(A64SysReg::NZCV) + .addReg(SrcReg); + } else if (SrcReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(DestReg)); + // E.g. MRS xDST, NZCV + BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) + .addImm(A64SysReg::NZCV); + } else if (AArch64::GPR64RegClass.contains(DestReg)) { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + Opc = AArch64::ORRxxx_lsl; + ZeroReg = AArch64::XZR; + } else if (AArch64::GPR32RegClass.contains(DestReg)) { + assert(AArch64::GPR32RegClass.contains(SrcReg)); + Opc = AArch64::ORRwww_lsl; + ZeroReg = AArch64::WZR; + } else if (AArch64::FPR32RegClass.contains(DestReg)) { + assert(AArch64::FPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) + .addReg(SrcReg); + return; + } else if (AArch64::FPR64RegClass.contains(DestReg)) { + assert(AArch64::FPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) + .addReg(SrcReg); + return; + } else if (AArch64::FPR128RegClass.contains(DestReg)) { + assert(AArch64::FPR128RegClass.contains(SrcReg)); + + // FIXME: there's no good way to do this, at least without NEON: + // + There's no single move instruction for q-registers + // + We can't create a spill slot and use normal STR/LDR because stack + // allocation has already happened + // + We can't go via X-registers with FMOV because register allocation has + // already happened. + // This may not be efficient, but at least it works. + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) + .addReg(SrcReg) + .addReg(AArch64::XSP) + .addImm(0x1ff & -16); + + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) + .addReg(AArch64::XSP, RegState::Define) + .addReg(AArch64::XSP) + .addImm(16); + return; + } else { + llvm_unreachable("Unknown register class in copyPhysReg"); + } + + // E.g. ORR xDst, xzr, xSrc, lsl #0 + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(ZeroReg) + .addReg(SrcReg) + .addImm(0); +} + +MachineInstr * +AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0) + .addImm(Offset) + .addMetadata(MDPtr); + return &*MIB; +} + +/// Does the Opcode represent a conditional branch that we can remove and re-add +/// at the end of a basic block? +static bool isCondBranch(unsigned Opc) { + return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx || + Opc == AArch64::CBNZw || Opc == AArch64::CBNZx || + Opc == AArch64::TBZwii || Opc == AArch64::TBZxii || + Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii; +} + +/// Takes apart a given conditional branch MachineInstr (see isCondBranch), +/// setting TBB to the destination basic block and populating the Cond vector +/// with data necessary to recreate the conditional branch at a later +/// date. First element will be the opcode, and subsequent ones define the +/// conditions being branched on in an instruction-specific manner. +static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB, + SmallVectorImpl<MachineOperand> &Cond) { + switch(I->getOpcode()) { + case AArch64::Bcc: + case AArch64::CBZw: + case AArch64::CBZx: + case AArch64::CBNZw: + case AArch64::CBNZx: + // These instructions just have one predicate operand in position 0 (either + // a condition code or a register being compared). + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + TBB = I->getOperand(1).getMBB(); + return; + case AArch64::TBZwii: + case AArch64::TBZxii: + case AArch64::TBNZwii: + case AArch64::TBNZxii: + // These have two predicate operands: a register and a bit position. + Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); + Cond.push_back(I->getOperand(0)); + Cond.push_back(I->getOperand(1)); + TBB = I->getOperand(2).getMBB(); + return; + default: + llvm_unreachable("Unknown conditional branch to classify"); + } +} + + +bool +AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastOpc == AArch64::Bimm) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (isCondBranch(LastOpc)) { + classifyCondBranch(LastInst, TBB, Cond); + return false; + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If AllowModify is true and the block ends with two or more unconditional + // branches, delete all but the first unconditional branch. + if (AllowModify && LastOpc == AArch64::Bimm) { + while (SecondLastOpc == AArch64::Bimm) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + // Return now the only terminator is an unconditional branch. + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + SecondLastInst = I; + SecondLastOpc = SecondLastInst->getOpcode(); + } + } + } + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with a B and a Bcc, handle it. + if (LastOpc == AArch64::Bimm) { + if (SecondLastOpc == AArch64::Bcc) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (isCondBranch(SecondLastOpc)) { + classifyCondBranch(SecondLastInst, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed, so remove it. + if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +bool AArch64InstrInfo::ReverseBranchCondition( + SmallVectorImpl<MachineOperand> &Cond) const { + switch (Cond[0].getImm()) { + case AArch64::Bcc: { + A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm()); + CC = A64InvertCondCode(CC); + Cond[1].setImm(CC); + return false; + } + case AArch64::CBZw: + Cond[0].setImm(AArch64::CBNZw); + return false; + case AArch64::CBZx: + Cond[0].setImm(AArch64::CBNZx); + return false; + case AArch64::CBNZw: + Cond[0].setImm(AArch64::CBZw); + return false; + case AArch64::CBNZx: + Cond[0].setImm(AArch64::CBZx); + return false; + case AArch64::TBZwii: + Cond[0].setImm(AArch64::TBNZwii); + return false; + case AArch64::TBZxii: + Cond[0].setImm(AArch64::TBNZxii); + return false; + case AArch64::TBNZwii: + Cond[0].setImm(AArch64::TBZwii); + return false; + case AArch64::TBNZxii: + Cond[0].setImm(AArch64::TBZxii); + return false; + default: + llvm_unreachable("Unknown branch type"); + } +} + + +unsigned +AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + if (FBB == 0 && Cond.empty()) { + BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB); + return 1; + } else if (FBB == 0) { + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + for (int i = 1, e = Cond.size(); i != e; ++i) + MIB.addOperand(Cond[i]); + MIB.addMBB(TBB); + return 1; + } + + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + for (int i = 1, e = Cond.size(); i != e; ++i) + MIB.addOperand(Cond[i]); + MIB.addMBB(TBB); + + BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB); + return 2; +} + +unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } + if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode())) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (!isCondBranch(I->getOpcode())) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +bool +AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AArch64::TLSDESC_BLRx: { + MachineInstr *NewMI = + BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL)) + .addOperand(MI.getOperand(1)); + MI.setDesc(get(AArch64::BLRx)); + + llvm::finalizeBundle(MBB, NewMI, *++MBBI); + return true; + } + default: + return false; + } + + return false; +} + +void +AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, + int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FrameIdx); + + MachineMemOperand *MMO + = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, + MFI.getObjectSize(FrameIdx), + Align); + + unsigned StoreOp = 0; + if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { + switch(RC->getSize()) { + case 4: StoreOp = AArch64::LS32_STR; break; + case 8: StoreOp = AArch64::LS64_STR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } else { + assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || + RC->hasType(MVT::f128)) + && "Expected integer or floating type for store"); + switch (RC->getSize()) { + case 4: StoreOp = AArch64::LSFP32_STR; break; + case 8: StoreOp = AArch64::LSFP64_STR; break; + case 16: StoreOp = AArch64::LSFP128_STR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); + NewMI.addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIdx) + .addImm(0) + .addMemOperand(MMO); + +} + +void +AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MBBI); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FrameIdx); + + MachineMemOperand *MMO + = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FrameIdx), + Align); + + unsigned LoadOp = 0; + if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { + switch(RC->getSize()) { + case 4: LoadOp = AArch64::LS32_LDR; break; + case 8: LoadOp = AArch64::LS64_LDR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } else { + assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) + || RC->hasType(MVT::f128)) + && "Expected integer or floating type for store"); + switch (RC->getSize()) { + case 4: LoadOp = AArch64::LSFP32_LDR; break; + case 8: LoadOp = AArch64::LSFP64_LDR; break; + case 16: LoadOp = AArch64::LSFP128_LDR; break; + default: + llvm_unreachable("Unknown size for regclass"); + } + } + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); + NewMI.addFrameIndex(FrameIdx) + .addImm(0) + .addMemOperand(MMO); +} + +unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const { + unsigned Limit = (1 << 16) - 1; + for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (!I->getOperand(i).isFI()) continue; + + // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff + // is the largest offset guaranteed to fit in the immediate offset. + if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) { + Limit = std::min(Limit, 0xfffu); + break; + } + + int AccessScale, MinOffset, MaxOffset; + getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset); + Limit = std::min(Limit, static_cast<unsigned>(MaxOffset)); + + break; // At most one FI per instruction + } + } + } + + return Limit; +} +void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, + int &AccessScale, int &MinOffset, + int &MaxOffset) const { + switch (MI.getOpcode()) { + default: llvm_unreachable("Unkown load/store kind"); + case TargetOpcode::DBG_VALUE: + AccessScale = 1; + MinOffset = INT_MIN; + MaxOffset = INT_MAX; + return; + case AArch64::LS8_LDR: case AArch64::LS8_STR: + case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR: + case AArch64::LDRSBw: + case AArch64::LDRSBx: + AccessScale = 1; + MinOffset = 0; + MaxOffset = 0xfff; + return; + case AArch64::LS16_LDR: case AArch64::LS16_STR: + case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR: + case AArch64::LDRSHw: + case AArch64::LDRSHx: + AccessScale = 2; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LS32_LDR: case AArch64::LS32_STR: + case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR: + case AArch64::LDRSWx: + case AArch64::LDPSWx: + AccessScale = 4; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LS64_LDR: case AArch64::LS64_STR: + case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR: + case AArch64::PRFM: + AccessScale = 8; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR: + AccessScale = 16; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR: + case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR: + AccessScale = 4; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR: + case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR: + AccessScale = 8; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR: + AccessScale = 16; + MinOffset = -0x40 * AccessScale; + MaxOffset = 0x3f * AccessScale; + return; + } +} + +unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + const MCInstrDesc &MCID = MI.getDesc(); + const MachineBasicBlock &MBB = *MI.getParent(); + const MachineFunction &MF = *MBB.getParent(); + const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); + + if (MCID.getSize()) + return MCID.getSize(); + + if (MI.getOpcode() == AArch64::INLINEASM) + return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); + + if (MI.isLabel()) + return 0; + + switch (MI.getOpcode()) { + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: + return 0; + case AArch64::TLSDESCCALL: + return 0; + default: + llvm_unreachable("Unknown instruction class"); + } +} + +unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += getInstSizeInBytes(*I); + } + return Size; +} + +bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo &TII) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MFI.getObjectOffset(FrameRegIdx); + llvm_unreachable("Unimplemented rewriteFrameIndex"); +} + +void llvm::emitRegUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, + int64_t NumBytes, MachineInstr::MIFlag MIFlags) { + if (NumBytes == 0 && DstReg == SrcReg) + return; + else if (abs(NumBytes) & ~0xffffff) { + // Generically, we have to materialize the offset into a temporary register + // and subtract it. There are a couple of ways this could be done, for now + // we'll use a movz/movk or movn/movk sequence. + uint64_t Bits = static_cast<uint64_t>(abs(NumBytes)); + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg) + .addImm(0xffff & Bits).addImm(0) + .setMIFlags(MIFlags); + + Bits >>= 16; + if (Bits & 0xffff) { + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) + .addReg(ScratchReg) + .addImm(0xffff & Bits).addImm(1) + .setMIFlags(MIFlags); + } + + Bits >>= 16; + if (Bits & 0xffff) { + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) + .addReg(ScratchReg) + .addImm(0xffff & Bits).addImm(2) + .setMIFlags(MIFlags); + } + + Bits >>= 16; + if (Bits & 0xffff) { + BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) + .addReg(ScratchReg) + .addImm(0xffff & Bits).addImm(3) + .setMIFlags(MIFlags); + } + + // ADD DST, SRC, xTMP (, lsl #0) + unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx; + BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addReg(ScratchReg, RegState::Kill) + .addImm(0) + .setMIFlag(MIFlags); + return; + } + + // Now we know that the adjustment can be done in at most two add/sub + // (immediate) instructions, which is always more efficient than a + // literal-pool load, or even a hypothetical movz/movk/add sequence + + // Decide whether we're doing addition or subtraction + unsigned LowOp, HighOp; + if (NumBytes >= 0) { + LowOp = AArch64::ADDxxi_lsl0_s; + HighOp = AArch64::ADDxxi_lsl12_s; + } else { + LowOp = AArch64::SUBxxi_lsl0_s; + HighOp = AArch64::SUBxxi_lsl12_s; + NumBytes = abs(NumBytes); + } + + // If we're here, at the very least a move needs to be produced, which just + // happens to be materializable by an ADD. + if ((NumBytes & 0xfff) || NumBytes == 0) { + BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(NumBytes & 0xfff) + .setMIFlag(MIFlags); + + // Next update should use the register we've just defined. + SrcReg = DstReg; + } + + if (NumBytes & 0xfff000) { + BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(NumBytes >> 12) + .setMIFlag(MIFlags); + } +} + +void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned ScratchReg, int64_t NumBytes, + MachineInstr::MIFlag MIFlags) { + emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16, + NumBytes, MIFlags); +} + + +namespace { + struct LDTLSCleanup : public MachineFunctionPass { + static char ID; + LDTLSCleanup() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + AArch64MachineFunctionInfo* MFI + = MF.getInfo<AArch64MachineFunctionInfo>(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); + return VisitNode(DT->getRootNode(), 0); + } + + // Visit the dominator subtree rooted at Node in pre-order. + // If TLSBaseAddrReg is non-null, then use that to replace any + // TLS_base_addr instructions. Otherwise, create the register + // when the first such instruction is seen, and then use it + // as we encounter more instructions. + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + switch (I->getOpcode()) { + case AArch64::TLSDESC_BLRx: + // Make sure it's a local dynamic access. + if (!I->getOperand(1).isSymbol() || + strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + break; + + if (TLSBaseAddrReg) + I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); + else + I = SetRegister(I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); + I != E; ++I) { + Changed |= VisitNode(*I, TLSBaseAddrReg); + } + + return Changed; + } + + // Replace the TLS_base_addr instruction I with a copy from + // TLSBaseAddrReg, returning the new instruction. + MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const AArch64TargetMachine *TM = + static_cast<const AArch64TargetMachine *>(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); + + // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the + // code sequence assumes the address will be. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + AArch64::X0) + .addReg(TLSBaseAddrReg); + + // Erase the TLS_base_addr instruction. + I->eraseFromParent(); + + return Copy; + } + + // Create a virtal register in *TLSBaseAddrReg, and populate it by + // inserting a copy instruction after I. Returns the new instruction. + MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const AArch64TargetMachine *TM = + static_cast<const AArch64TargetMachine *>(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); + + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); + + // Insert a copy from X0 to TLSBaseAddrReg for later. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + *TLSBaseAddrReg) + .addReg(AArch64::X0); + + return Copy; + } + + virtual const char *getPassName() const { + return "Local Dynamic TLS Access Clean-up"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char LDTLSCleanup::ID = 0; +FunctionPass* +llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h new file mode 100644 index 0000000..22a2ab4 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -0,0 +1,112 @@ +//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64INSTRINFO_H +#define LLVM_TARGET_AARCH64INSTRINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "AArch64RegisterInfo.h" + +#define GET_INSTRINFO_HEADER +#include "AArch64GenInstrInfo.inc" + +namespace llvm { + +class AArch64Subtarget; + +class AArch64InstrInfo : public AArch64GenInstrInfo { + const AArch64RegisterInfo RI; + const AArch64Subtarget &Subtarget; +public: + explicit AArch64InstrInfo(const AArch64Subtarget &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + const TargetRegisterInfo &getRegisterInfo() const { return RI; } + + const AArch64Subtarget &getSubTarget() const { return Subtarget; } + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + unsigned RemoveBranch(MachineBasicBlock &MBB) const; + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + /// Look through the instructions in this function and work out the largest + /// the stack frame can be while maintaining the ability to address local + /// slots with no complexities. + unsigned estimateRSStackLimit(MachineFunction &MF) const; + + /// getAddressConstraints - For loads and stores (and PRFMs) taking an + /// immediate offset, this function determines the constraints required for + /// the immediate. It must satisfy: + /// + MinOffset <= imm <= MaxOffset + /// + imm % OffsetScale == 0 + void getAddressConstraints(const MachineInstr &MI, int &AccessScale, + int &MinOffset, int &MaxOffset) const; + + + unsigned getInstSizeInBytes(const MachineInstr &MI) const; + + unsigned getInstBundleLength(const MachineInstr &MI) const; + +}; + +bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo &TII); + + +void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, + int64_t NumBytes, + MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); + +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc dl, const TargetInstrInfo &TII, + unsigned ScratchReg, int64_t NumBytes, + MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); + +} + +#endif diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td new file mode 100644 index 0000000..562a7f6 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -0,0 +1,5109 @@ +//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the AArch64 scalar instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "AArch64InstrFormats.td" + +//===----------------------------------------------------------------------===// +// Target-specific ISD nodes and profiles +//===----------------------------------------------------------------------===// + +def SDT_A64ret : SDTypeProfile<0, 0, []>; +def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain, + SDNPOptInGlue, + SDNPVariadic]>; + +// (ins NZCV, Condition, Dest) +def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>; +def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>; + +// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition) +def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<2, 3>]>; +def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>; + +// (outs NZCV), (ins LHS, RHS, Condition) +def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>]>; +def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>; + + +// (outs GPR64), (ins) +def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; + +// A64 compares don't care about the cond really (they set all flags) so a +// simple binary operator is useful. +def A64cmp : PatFrag<(ops node:$lhs, node:$rhs), + (A64setcc node:$lhs, node:$rhs, cond)>; + + +// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN +// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C +// and V flags can be set differently by this operation. It comes down to +// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are +// then everything is fine. If not then the optimization is wrong. Thus general +// comparisons are only valid if op2 != 0. + +// So, finally, the only LLVM-native comparisons that don't mention C and V are +// SETEQ and SETNE. They're the only ones we can safely use CMN for in the +// absence of information about op2. +def equality_cond : PatLeaf<(cond), [{ + return N->get() == ISD::SETEQ || N->get() == ISD::SETNE; +}]>; + +def A64cmn : PatFrag<(ops node:$lhs, node:$rhs), + (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>; + +// There are two layers of indirection here, driven by the following +// considerations. +// + TableGen does not know CodeModel or Reloc so that decision should be +// made for a variable/address at ISelLowering. +// + The output of ISelLowering should be selectable (hence the Wrapper, +// rather than a bare target opcode) +def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>, + SDTCisPtrTy<0>]>; + +def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>; + + +def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; +def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad, + [SDNPHasChain]>; + + +// (A64BFI LHS, RHS, LSB, Width) +def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i64>, + SDTCisVT<4, i64>]>; + +def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>; + +// (A64EXTR HiReg, LoReg, LSB) +def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisVT<3, i64>]>; +def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; + +// (A64[SU]BFX Field, ImmR, ImmS). +// +// Note that ImmR and ImmS are already encoded for the actual instructions. The +// more natural LSB and Width mix together to form ImmR and ImmS, something +// which TableGen can't handle. +def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; +def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>; + +def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; + +//===----------------------------------------------------------------------===// +// Call sequence pseudo-instructions +//===----------------------------------------------------------------------===// + + +def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// The TLSDESCCALL node is a variant call which goes to an indirectly calculated +// destination but needs a relocation against a fixed symbol. As such it has two +// certain operands: the callee and the relocated variable. +// +// The TLS ABI only allows it to be selected to a BLR instructin (with +// appropriate relocation). +def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; + +def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; + + +def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>; +def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; + +def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>; +def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + + + +// These pseudo-instructions have special semantics by virtue of being passed to +// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by +// LowerCall to (in our case) tell the back-end about stack adjustments for +// arguments passed on the stack. Here we select those markers to +// pseudo-instructions which explicitly set the stack, and finally in the +// RegisterInfo we convert them to a true stack adjustment. +let Defs = [XSP], Uses = [XSP] in { + def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt), + [(AArch64callseq_start timm:$amt)]>; + + def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2), + [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; +} + +//===----------------------------------------------------------------------===// +// Atomic operation pseudo-instructions +//===----------------------------------------------------------------------===// + +let usesCustomInserter = 1, Defs = [NZCV] in { +multiclass AtomicSizes<string opname> { + def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set GPR32:$dst, (!cast<SDNode>(opname # "_8") GPR64:$ptr, GPR32:$incr))]>; + def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set GPR32:$dst, (!cast<SDNode>(opname # "_16") GPR64:$ptr, GPR32:$incr))]>; + def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), + [(set GPR32:$dst, (!cast<SDNode>(opname # "_32") GPR64:$ptr, GPR32:$incr))]>; + def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr), + [(set GPR64:$dst, (!cast<SDNode>(opname # "_64") GPR64:$ptr, GPR64:$incr))]>; +} +} + +defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">; +defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">; +defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">; +defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">; +defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">; +defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">; +defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; +defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; +defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; +defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; +defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">; + +let usesCustomInserter = 1, Defs = [NZCV] in { +def ATOMIC_CMP_SWAP_I8 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set GPR32:$dst, + (atomic_cmp_swap_8 GPR64:$ptr, GPR32:$old, GPR32:$new))]>; +def ATOMIC_CMP_SWAP_I16 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set GPR32:$dst, + (atomic_cmp_swap_16 GPR64:$ptr, GPR32:$old, GPR32:$new))]>; +def ATOMIC_CMP_SWAP_I32 + : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), + [(set GPR32:$dst, + (atomic_cmp_swap_32 GPR64:$ptr, GPR32:$old, GPR32:$new))]>; +def ATOMIC_CMP_SWAP_I64 + : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new), + [(set GPR64:$dst, + (atomic_cmp_swap_64 GPR64:$ptr, GPR64:$old, GPR64:$new))]>; +} + +//===----------------------------------------------------------------------===// +// Add-subtract (extended register) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP + +// The RHS of these operations is conceptually a sign/zero-extended +// register, optionally shifted left by 1-4. The extension can be a +// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but +// must be specified with one exception: + +// If one of the registers is sp/wsp then LSL is an alias for UXTW in +// 32-bit instructions and UXTX in 64-bit versions, the shift amount +// is not optional in that case (but can explicitly be 0), and the +// entire suffix can be skipped (e.g. "add sp, x3, x2"). + +multiclass extend_operands<string PREFIX, string Diag> { + def _asmoperand : AsmOperandClass { + let Name = PREFIX; + let RenderMethod = "addRegExtendOperands"; + let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">"; + let DiagnosticType = "AddSubRegExtend" # Diag; + } + + def _operand : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> { + let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">"; + let DecoderMethod = "DecodeRegExtendOperand"; + let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand"); + } +} + +defm UXTB : extend_operands<"UXTB", "Small">; +defm UXTH : extend_operands<"UXTH", "Small">; +defm UXTW : extend_operands<"UXTW", "Small">; +defm UXTX : extend_operands<"UXTX", "Large">; +defm SXTB : extend_operands<"SXTB", "Small">; +defm SXTH : extend_operands<"SXTH", "Small">; +defm SXTW : extend_operands<"SXTW", "Small">; +defm SXTX : extend_operands<"SXTX", "Large">; + +def LSL_extasmoperand : AsmOperandClass { + let Name = "RegExtendLSL"; + let RenderMethod = "addRegExtendOperands"; + let DiagnosticType = "AddSubRegExtendLarge"; +} + +def LSL_extoperand : Operand<i64> { + let ParserMatchClass = LSL_extasmoperand; +} + + +// The patterns for various sign-extensions are a little ugly and +// non-uniform because everything has already been promoted to the +// legal i64 and i32 types. We'll wrap the various variants up in a +// class for use later. +class extend_types { + dag uxtb; dag uxth; dag uxtw; dag uxtx; + dag sxtb; dag sxth; dag sxtw; dag sxtx; +} + +def extends_to_i64 : extend_types { + let uxtb = (and (anyext GPR32:$Rm), 255); + let uxth = (and (anyext GPR32:$Rm), 65535); + let uxtw = (zext GPR32:$Rm); + let uxtx = (i64 GPR64:$Rm); + + let sxtb = (sext_inreg (anyext GPR32:$Rm), i8); + let sxth = (sext_inreg (anyext GPR32:$Rm), i16); + let sxtw = (sext GPR32:$Rm); + let sxtx = (i64 GPR64:$Rm); +} + + +def extends_to_i32 : extend_types { + let uxtb = (and GPR32:$Rm, 255); + let uxth = (and GPR32:$Rm, 65535); + let uxtw = (i32 GPR32:$Rm); + let uxtx = (i32 GPR32:$Rm); + + let sxtb = (sext_inreg GPR32:$Rm, i8); + let sxth = (sext_inreg GPR32:$Rm, i16); + let sxtw = (i32 GPR32:$Rm); + let sxtx = (i32 GPR32:$Rm); +} + +// Now, six of the extensions supported are easy and uniform: if the source size +// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate +// those instructions in one block. + +// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me: +// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would +// be impossible. +// + Patterns are very different as well. +// + Passing different registers would be ugly (more fields in extend_types +// would probably be the best option). +multiclass addsub_exts<bit sf, bit op, bit S, string asmop, + SDPatternOperator opfrag, + dag outs, extend_types exts, RegisterClass GPRsp> { + def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, UXTB_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))], + NoItinerary>; + def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, UXTH_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))], + NoItinerary>; + def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, UXTW_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))], + NoItinerary>; + + def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, SXTB_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))], + NoItinerary>; + def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, SXTH_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))], + NoItinerary>; + def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110, + outs, + (ins GPRsp:$Rn, GPR32:$Rm, SXTW_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPRsp:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))], + NoItinerary>; +} + +// These two could be merge in with the above, but their patterns aren't really +// necessary and the naming-scheme would necessarily break: +multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag, + dag outs> { + def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011, + outs, + (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [(opfrag GPR64xsp:$Rn, (shl GPR64:$Rm, UXTX_operand:$Imm3))], + NoItinerary>; + + def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111, + outs, + (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No Pattern: same as uxtx */], + NoItinerary>; +} + +multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> { + def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011, + outs, + (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No pattern: probably same as uxtw */], + NoItinerary>; + + def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111, + outs, + (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3), + !strconcat(asmop, "$Rn, $Rm, $Imm3"), + [/* No Pattern: probably same as uxtw */], + NoItinerary>; +} + +class SetRD<RegisterClass RC, SDPatternOperator op> + : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>; +class SetNZCV<SDPatternOperator op> + : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>; + +defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>, + (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>, + (outs GPR64xsp:$Rd)>; +defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>, + (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b0, 0b0, "add\t$Rd, ", + (outs GPR32wsp:$Rd)>; +defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>, + (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>, + (outs GPR64xsp:$Rd)>; +defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>, + (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ", + (outs GPR32wsp:$Rd)>; + +let Defs = [NZCV] in { +defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>, + (outs GPR64:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>, + (outs GPR64:$Rd)>; +defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>, + (outs GPR32:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ", + (outs GPR32:$Rd)>; +defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>, + (outs GPR64:$Rd), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>, + (outs GPR64:$Rd)>; +defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>, + (outs GPR32:$Rd), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ", + (outs GPR32:$Rd)>; + + +let Rd = 0b11111, isCompare = 1 in { +defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, + (outs), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>; +defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, + (outs), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>; +defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, + (outs), extends_to_i64, GPR64xsp>, + addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>; +defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, + (outs), extends_to_i32, GPR32wsp>, + addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>; +} +} + +// Now patterns for the operation without a shift being needed. No patterns are +// created for uxtx/sxtx since they're non-uniform and it's expected that +// add/sub (shifted register) will handle those cases anyway. +multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop, + RegisterClass GPRsp, extend_types exts> { + def : Pat<(nodeop GPRsp:$Rn, exts.uxtb), + (!cast<Instruction>(prefix # "w_uxtb") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.uxth), + (!cast<Instruction>(prefix # "w_uxth") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.uxtw), + (!cast<Instruction>(prefix # "w_uxtw") GPRsp:$Rn, GPR32:$Rm, 0)>; + + def : Pat<(nodeop GPRsp:$Rn, exts.sxtb), + (!cast<Instruction>(prefix # "w_sxtb") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.sxth), + (!cast<Instruction>(prefix # "w_sxth") GPRsp:$Rn, GPR32:$Rm, 0)>; + def : Pat<(nodeop GPRsp:$Rn, exts.sxtw), + (!cast<Instruction>(prefix # "w_sxtw") GPRsp:$Rn, GPR32:$Rm, 0)>; +} + +defm : addsubext_noshift_patterns<"ADDxx", add, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"ADDww", add, GPR32wsp, extends_to_i32>; +defm : addsubext_noshift_patterns<"SUBxx", sub, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"SUBww", sub, GPR32wsp, extends_to_i32>; + +defm : addsubext_noshift_patterns<"CMNx", A64cmn, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"CMNw", A64cmn, GPR32wsp, extends_to_i32>; +defm : addsubext_noshift_patterns<"CMPx", A64cmp, GPR64xsp, extends_to_i64>; +defm : addsubext_noshift_patterns<"CMPw", A64cmp, GPR32wsp, extends_to_i32>; + +// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is +// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the +// operation. Also permitted in this case is complete omission of the argument, +// which implies "lsl #0". +multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd, + RegisterClass GPR_Rn, RegisterClass GPR_Rm> { + def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), + (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>; + + def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"), + (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>; + +} + +defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>; +defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>; +defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>; +defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>; +defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>; +defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>; +defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>; +defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>; + +// Rd cannot be sp for flag-setting variants so only half of the aliases are +// needed. +defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>; +defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>; +defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>; +defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>; + +// CMP unfortunately has to be different because the instruction doesn't have a +// dest register. +multiclass cmp_lsl_aliases<string asmop, Instruction inst, + RegisterClass GPR_Rn, RegisterClass GPR_Rm> { + def : InstAlias<!strconcat(asmop, " $Rn, $Rm"), + (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>; + + def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"), + (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>; +} + +defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>; +defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>; +defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>; +defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>; + +//===----------------------------------------------------------------------===// +// Add-subtract (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV + +// These instructions accept a 12-bit unsigned immediate, optionally shifted +// left by 12 bits. Official assembly format specifies a 12 bit immediate with +// one of "", "LSL #0", "LSL #12" supplementary operands. + +// There are surprisingly few ways to make this work with TableGen, so this +// implementation has separate instructions for the "LSL #0" and "LSL #12" +// variants. + +// If the MCInst retained a single combined immediate (which could be 0x123000, +// for example) then both components (imm & shift) would have to be delegated to +// a single assembly operand. This would entail a separate operand parser +// (because the LSL would have to live in the same AArch64Operand as the +// immediate to be accessible); assembly parsing is rather complex and +// error-prone C++ code. +// +// By splitting the immediate, we can delegate handling this optional operand to +// an InstAlias. Supporting functions to generate the correct MCInst are still +// required, but these are essentially trivial and parsing can remain generic. +// +// Rejected plans with rationale: +// ------------------------------ +// +// In an ideal world you'de have two first class immediate operands (in +// InOperandList, specifying imm12 and shift). Unfortunately this is not +// selectable by any means I could discover. +// +// An Instruction with two MCOperands hidden behind a single entry in +// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional, +// but required more C++ code to handle encoding/decoding. Parsing (the intended +// main beneficiary) ended up equally complex because of the optional nature of +// "LSL #0". +// +// Attempting to circumvent the need for a custom OperandParser above by giving +// InstAliases without the "lsl #0" failed. add/sub could be accommodated but +// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands +// should be parsed: there was no way to accommodate an "lsl #12". + +let ParserMethod = "ParseImmWithLSLOperand", + RenderMethod = "addImmWithLSLOperands" in { + // Derived PredicateMethod fields are different for each + def addsubimm_lsl0_asmoperand : AsmOperandClass { + let Name = "AddSubImmLSL0"; + // If an error is reported against this operand, instruction could also be a + // register variant. + let DiagnosticType = "AddSubSecondSource"; + } + + def addsubimm_lsl12_asmoperand : AsmOperandClass { + let Name = "AddSubImmLSL12"; + let DiagnosticType = "AddSubSecondSource"; + } +} + +def shr_12_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32); +}]>; + +def shr_12_neg_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32); +}]>; + +def neg_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32); +}]>; + + +multiclass addsub_imm_operands<ValueType ty> { + let PrintMethod = "printAddSubImmLSL0Operand", + EncoderMethod = "getAddSubImmOpValue", + ParserMatchClass = addsubimm_lsl0_asmoperand in { + def _posimm_lsl0 : Operand<ty>, + ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>; + def _negimm_lsl0 : Operand<ty>, + ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }], + neg_XFORM>; + } + + let PrintMethod = "printAddSubImmLSL12Operand", + EncoderMethod = "getAddSubImmOpValue", + ParserMatchClass = addsubimm_lsl12_asmoperand in { + def _posimm_lsl12 : Operand<ty>, + ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }], + shr_12_XFORM>; + + def _negimm_lsl12 : Operand<ty>, + ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }], + shr_12_neg_XFORM>; + } +} + +// The add operands don't need any transformation +defm addsubimm_operand_i32 : addsub_imm_operands<i32>; +defm addsubimm_operand_i64 : addsub_imm_operands<i64>; + +multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift, + string asmop, string cmpasmop, + Operand imm_operand, Operand cmp_imm_operand, + RegisterClass GPR, RegisterClass GPRsp, + AArch64Reg ZR> { + // All registers for non-S variants allow SP + def _s : A64I_addsubimm<sf, op, 0b0, shift, + (outs GPRsp:$Rd), + (ins GPRsp:$Rn, imm_operand:$Imm12), + !strconcat(asmop, "\t$Rd, $Rn, $Imm12"), + [(set GPRsp:$Rd, + (add GPRsp:$Rn, imm_operand:$Imm12))], + NoItinerary>; + + + // S variants can read SP but would write to ZR + def _S : A64I_addsubimm<sf, op, 0b1, shift, + (outs GPR:$Rd), + (ins GPRsp:$Rn, imm_operand:$Imm12), + !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"), + [(set GPR:$Rd, (addc GPRsp:$Rn, imm_operand:$Imm12))], + NoItinerary> { + let Defs = [NZCV]; + } + + // Note that the pattern here for ADDS is subtle. Canonically CMP + // a, b becomes SUBS a, b. If b < 0 then this is equivalent to + // ADDS a, (-b). This is not true in general. + def _cmp : A64I_addsubimm<sf, op, 0b1, shift, + (outs), (ins GPRsp:$Rn, imm_operand:$Imm12), + !strconcat(cmpasmop, " $Rn, $Imm12"), + [(set NZCV, + (A64cmp GPRsp:$Rn, cmp_imm_operand:$Imm12))], + NoItinerary> { + let Rd = 0b11111; + let Defs = [NZCV]; + let isCompare = 1; + } +} + + +multiclass addsubimm_shifts<string prefix, bit sf, bit op, + string asmop, string cmpasmop, string operand, string cmpoperand, + RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR> { + defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00, + asmop, cmpasmop, + !cast<Operand>(operand # "_lsl0"), + !cast<Operand>(cmpoperand # "_lsl0"), + GPR, GPRsp, ZR>; + + defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01, + asmop, cmpasmop, + !cast<Operand>(operand # "_lsl12"), + !cast<Operand>(cmpoperand # "_lsl12"), + GPR, GPRsp, ZR>; +} + +defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn", + "addsubimm_operand_i32_posimm", + "addsubimm_operand_i32_negimm", + GPR32, GPR32wsp, WZR>; +defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn", + "addsubimm_operand_i64_posimm", + "addsubimm_operand_i64_negimm", + GPR64, GPR64xsp, XZR>; +defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp", + "addsubimm_operand_i32_negimm", + "addsubimm_operand_i32_posimm", + GPR32, GPR32wsp, WZR>; +defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp", + "addsubimm_operand_i64_negimm", + "addsubimm_operand_i64_posimm", + GPR64, GPR64xsp, XZR>; + +multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> { + def _fromsp : InstAlias<"mov $Rd, $Rn", + (addop GPRsp:$Rd, SP:$Rn, 0), + 0b1>; + + def _tosp : InstAlias<"mov $Rd, $Rn", + (addop SP:$Rd, GPRsp:$Rn, 0), + 0b1>; +} + +// Recall Rxsp is a RegisterClass containing *just* xsp. +defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>; +defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>; + +//===----------------------------------------------------------------------===// +// Add-subtract (shifted register) instructions +//===----------------------------------------------------------------------===// +// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS + +//===------------------------------- +// 1. The "shifed register" operands. Shared with logical insts. +//===------------------------------- + +multiclass shift_operands<string prefix, string form> { + def _asmoperand_i32 : AsmOperandClass { + let Name = "Shift" # form # "i32"; + let RenderMethod = "addShiftOperands"; + let PredicateMethod = "isShift<A64SE::" # form # ", false>"; + let DiagnosticType = "AddSubRegShift32"; + } + + // Note that the operand type is intentionally i64 because the DAGCombiner + // puts these into a canonical form. + def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> { + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # "_asmoperand_i32"); + let PrintMethod = "printShiftOperand<A64SE::" # form # ">"; + let DecoderMethod = "Decode32BitShiftOperand"; + } + + def _asmoperand_i64 : AsmOperandClass { + let Name = "Shift" # form # "i64"; + let RenderMethod = "addShiftOperands"; + let PredicateMethod = "isShift<A64SE::" # form # ", true>"; + let DiagnosticType = "AddSubRegShift64"; + } + + def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> { + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # "_asmoperand_i64"); + let PrintMethod = "printShiftOperand<A64SE::" # form # ">"; + } +} + +defm lsl_operand : shift_operands<"lsl_operand", "LSL">; +defm lsr_operand : shift_operands<"lsr_operand", "LSR">; +defm asr_operand : shift_operands<"asr_operand", "ASR">; + +// Not used for add/sub, but defined here for completeness. The "logical +// (shifted register)" instructions *do* have an ROR variant. +defm ror_operand : shift_operands<"ror_operand", "ROR">; + +//===------------------------------- +// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions. +//===------------------------------- + +// N.b. the commutable parameter is just !N. It will be first against the wall +// when the revolution comes. +multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable, + string asmop, SDPatternOperator opfrag, string sty, + RegisterClass GPR, list<Register> defs> { + let isCommutable = commutable, Defs = defs in { + def _lsl : A64I_addsubshift<sf, op, s, 0b00, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_addsubshift<sf, op, s, 0b01, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_addsubshift<sf, op, s, 0b10, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn, + GPR:$Rm, 0)>; + + def : Pat<(opfrag GPR:$Rn, GPR:$Rm), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable, + string asmop, SDPatternOperator opfrag, + list<Register> defs> { + defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s, + commutable, asmop, opfrag, "i64", GPR64, defs>; + defm www : addsub_shifts<prefix # "www", 0b0, op, s, + commutable, asmop, opfrag, "i32", GPR32, defs>; +} + + +defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>; +defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>; + +defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>; +defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>; + +//===------------------------------- +// 1. The NEG/NEGS aliases +//===------------------------------- + +multiclass neg_alias<Instruction INST, RegisterClass GPR, + Register ZR, Operand shift_operand, SDNode shiftop> { + def : InstAlias<"neg $Rd, $Rm, $Imm6", + (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; + + def : Pat<(sub 0, (shiftop GPR:$Rm, shift_operand:$Imm6)), + (INST ZR, GPR:$Rm, shift_operand:$Imm6)>; +} + +defm : neg_alias<SUBwww_lsl, GPR32, WZR, lsl_operand_i32, shl>; +defm : neg_alias<SUBwww_lsr, GPR32, WZR, lsr_operand_i32, srl>; +defm : neg_alias<SUBwww_asr, GPR32, WZR, asr_operand_i32, sra>; +def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; +def : Pat<(sub 0, GPR32:$Rm), (SUBwww_lsl WZR, GPR32:$Rm, 0)>; + +defm : neg_alias<SUBxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>; +defm : neg_alias<SUBxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>; +defm : neg_alias<SUBxxx_asr, GPR64, XZR, asr_operand_i64, sra>; +def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; +def : Pat<(sub 0, GPR64:$Rm), (SUBxxx_lsl XZR, GPR64:$Rm, 0)>; + +// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to +// be involved. +class negs_alias<Instruction INST, RegisterClass GPR, + Register ZR, Operand shift_operand, SDNode shiftop> + : InstAlias<"negs $Rd, $Rm, $Imm6", + (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; + +def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>; +def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>; +def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>; +def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; + +def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>; +def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>; +def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>; +def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; + +//===------------------------------- +// 1. The CMP/CMN aliases +//===------------------------------- + +multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable, + string asmop, SDPatternOperator opfrag, string sty, + RegisterClass GPR> { + let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in { + def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag GPR:$Rn, (shl GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag GPR:$Rn, (srl GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_addsubshift<sf, op, 0b1, 0b10, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), + [(set NZCV, (opfrag GPR:$Rn, (sra GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias<!strconcat(asmop, " $Rn, $Rm"), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(opfrag GPR:$Rn, GPR:$Rm), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, "i32", GPR32>; +defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, "i64", GPR64>; + +defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, "i32", GPR32>; +defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, "i64", GPR64>; + +//===----------------------------------------------------------------------===// +// Add-subtract (with carry) instructions +//===----------------------------------------------------------------------===// +// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS + +multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> { + let Uses = [NZCV] in { + def www : A64I_addsubcarry<0b0, op, s, 0b000000, + (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm), + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + [], NoItinerary>; + + def xxx : A64I_addsubcarry<0b1, op, s, 0b000000, + (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + [], NoItinerary>; + } +} + +let isCommutable = 1 in { + defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">; +} + +defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">; + +let Defs = [NZCV] in { + let isCommutable = 1 in { + defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">; + } + + defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">; +} + +def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>; +def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>; +def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>; +def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>; + +// Note that adde and sube can form a chain longer than two (e.g. for 256-bit +// addition). So the flag-setting instructions are appropriate. +def : Pat<(adde GPR32:$Rn, GPR32:$Rm), (ADCSwww GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(adde GPR64:$Rn, GPR64:$Rm), (ADCSxxx GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(sube GPR32:$Rn, GPR32:$Rm), (SBCSwww GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(sube GPR64:$Rn, GPR64:$Rm), (SBCSxxx GPR64:$Rn, GPR64:$Rm)>; + +//===----------------------------------------------------------------------===// +// Bitfield +//===----------------------------------------------------------------------===// +// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL, +// UBFIZ, UBFX + +// Because of the rather complicated nearly-overlapping aliases, the decoding of +// this range of instructions is handled manually. The architectural +// instructions are BFM, SBFM and UBFM but a disassembler should never produce +// these. +// +// In the end, the best option was to use BFM instructions for decoding under +// almost all circumstances, but to create aliasing *Instructions* for each of +// the canonical forms and specify a completely custom decoder which would +// substitute the correct MCInst as needed. +// +// This also simplifies instruction selection, parsing etc because the MCInsts +// have a shape that's closer to their use in code. + +//===------------------------------- +// 1. The architectural BFM instructions +//===------------------------------- + +def uimm5_asmoperand : AsmOperandClass { + let Name = "UImm5"; + let PredicateMethod = "isUImm<5>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm5"; +} + +def uimm6_asmoperand : AsmOperandClass { + let Name = "UImm6"; + let PredicateMethod = "isUImm<6>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm6"; +} + +def bitfield32_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> { + let ParserMatchClass = uimm5_asmoperand; + + let DecoderMethod = "DecodeBitfield32ImmOperand"; +} + + +def bitfield64_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> { + let ParserMatchClass = uimm6_asmoperand; + + // Default decoder works in 64-bit case: the 6-bit field can take any value. +} + +multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> { + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> { + let DecoderMethod = "DecodeBitfieldInstruction"; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> { + let DecoderMethod = "DecodeBitfieldInstruction"; + } +} + +defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">; +defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">; + +// BFM instructions modify the destination register rather than defining it +// completely. +def BFMwwii : + A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + let DecoderMethod = "DecodeBitfieldInstruction"; + let Constraints = "$src = $Rd"; +} + +def BFMxxii : + A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), + "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + let DecoderMethod = "DecodeBitfieldInstruction"; + let Constraints = "$src = $Rd"; +} + + +//===------------------------------- +// 2. Extend aliases to 64-bit dest +//===------------------------------- + +// Unfortunately the extensions that end up as 64-bits cannot be handled by an +// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs +// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is +// not capable of such a map as far as I'm aware + +// Note that these instructions are strictly more specific than the +// BFM ones (in ImmR) so they can handle their own decoding. +class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, string asmop, + bits<6> imms, dag pattern> + : A64I_bitfield<sf, opc, sf, + (outs GPRDest:$Rd), (ins GPR32:$Rn), + !strconcat(asmop, "\t$Rd, $Rn"), + [(set GPRDest:$Rd, pattern)], NoItinerary> { + let ImmR = 0b000000; + let ImmS = imms; +} + +// Signed extensions +def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtb", 7, + (sext_inreg (anyext GPR32:$Rn), i8)>; +def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, "sxtb", 7, + (sext_inreg GPR32:$Rn, i8)>; +def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxth", 15, + (sext_inreg (anyext GPR32:$Rn), i16)>; +def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, "sxth", 15, + (sext_inreg GPR32:$Rn, i16)>; +def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtw", 31, (sext GPR32:$Rn)>; + +// Unsigned extensions +def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, "uxtb", 7, + (and GPR32:$Rn, 255)>; +def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, "uxth", 15, + (and GPR32:$Rn, 65535)>; + +// The 64-bit unsigned variants are not strictly architectural but recommended +// for consistency. +let isAsmParserOnly = 1 in { + def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxtb", 7, + (and (anyext GPR32:$Rn), 255)>; + def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxth", 15, + (and (anyext GPR32:$Rn), 65535)>; +} + +// Extra patterns for when the source register is actually 64-bits +// too. There's no architectural difference here, it's just LLVM +// shinanigans. There's no need for equivalent zero-extension patterns +// because they'll already be caught by logical (immediate) matching. +def : Pat<(sext_inreg GPR64:$Rn, i8), + (SXTBxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>; +def : Pat<(sext_inreg GPR64:$Rn, i16), + (SXTHxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>; +def : Pat<(sext_inreg GPR64:$Rn, i32), + (SXTWxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>; + + +//===------------------------------- +// 3. Aliases for ASR and LSR (the simple shifts) +//===------------------------------- + +// These also handle their own decoding because ImmS being set makes +// them take precedence over BFM. +multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> { + def wwi : A64I_bitfield<0b0, opc, 0b0, + (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), + [(set GPR32:$Rd, (opnode GPR32:$Rn, bitfield32_imm:$ImmR))], + NoItinerary> { + let ImmS = 31; + } + + def xxi : A64I_bitfield<0b1, opc, 0b1, + (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), + [(set GPR64:$Rd, (opnode GPR64:$Rn, bitfield64_imm:$ImmR))], + NoItinerary> { + let ImmS = 63; + } + +} + +defm ASR : A64I_shift<0b00, "asr", sra>; +defm LSR : A64I_shift<0b10, "lsr", srl>; + +//===------------------------------- +// 4. Aliases for LSL +//===------------------------------- + +// Unfortunately LSL and subsequent aliases are much more complicated. We need +// to be able to say certain output instruction fields depend in a complex +// manner on combinations of input assembly fields). +// +// MIOperandInfo *might* have been able to do it, but at the cost of +// significantly more C++ code. + +// N.b. contrary to usual practice these operands store the shift rather than +// the machine bits in an MCInst. The complexity overhead of consistency +// outweighed the benefits in this case (custom asmparser, printer and selection +// vs custom encoder). +def bitfield32_lsl_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> { + let ParserMatchClass = uimm5_asmoperand; + let EncoderMethod = "getBitfield32LSLOpValue"; +} + +def bitfield64_lsl_imm : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> { + let ParserMatchClass = uimm6_asmoperand; + let EncoderMethod = "getBitfield64LSLOpValue"; +} + +class A64I_bitfield_lsl<bit sf, RegisterClass GPR, Operand operand> + : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm), + "lsl\t$Rd, $Rn, $FullImm", + [(set GPR:$Rd, (shl GPR:$Rn, operand:$FullImm))], + NoItinerary> { + bits<12> FullImm; + let ImmR = FullImm{5-0}; + let ImmS = FullImm{11-6}; + + // No disassembler allowed because it would overlap with BFM which does the + // actual work. + let isAsmParserOnly = 1; +} + +def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, bitfield32_lsl_imm>; +def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, bitfield64_lsl_imm>; + +//===------------------------------- +// 5. Aliases for bitfield extract instructions +//===------------------------------- + +def bfx32_width_asmoperand : AsmOperandClass { + let Name = "BFX32Width"; + let PredicateMethod = "isBitfieldWidth<32>"; + let RenderMethod = "addBFXWidthOperands"; + let DiagnosticType = "Width32"; +} + +def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> { + let PrintMethod = "printBFXWidthOperand"; + let ParserMatchClass = bfx32_width_asmoperand; +} + +def bfx64_width_asmoperand : AsmOperandClass { + let Name = "BFX64Width"; + let PredicateMethod = "isBitfieldWidth<64>"; + let RenderMethod = "addBFXWidthOperands"; + let DiagnosticType = "Width64"; +} + +def bfx64_width : Operand<i64> { + let PrintMethod = "printBFXWidthOperand"; + let ParserMatchClass = bfx64_width_asmoperand; +} + + +multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> { + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [(set GPR32:$Rd, (op GPR32:$Rn, imm:$ImmR, imm:$ImmS))], + NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [(set GPR64:$Rd, (op GPR64:$Rn, imm:$ImmR, imm:$ImmS))], + NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } +} + +defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>; +defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; + +// Again, variants based on BFM modify Rd so need it as an input too. +def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), + "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +// SBFX instructions can do a 1-instruction sign-extension of boolean values. +def : Pat<(sext_inreg GPR64:$Rn, i1), (SBFXxxii GPR64:$Rn, 0, 0)>; +def : Pat<(sext_inreg GPR32:$Rn, i1), (SBFXwwii GPR32:$Rn, 0, 0)>; +def : Pat<(i64 (sext_inreg (anyext GPR32:$Rn), i1)), + (SBFXxxii (SUBREG_TO_REG (i64 0), GPR32:$Rn, sub_32), 0, 0)>; + +// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could +// use either 64-bit or 32-bit variant, but 32-bit might be more efficient. +def : Pat<(zext GPR32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii GPR32:$Rn, 0, 31), + sub_32)>; + +//===------------------------------- +// 6. Aliases for bitfield insert instructions +//===------------------------------- + +def bfi32_lsb_asmoperand : AsmOperandClass { + let Name = "BFI32LSB"; + let PredicateMethod = "isUImm<5>"; + let RenderMethod = "addBFILSBOperands<32>"; + let DiagnosticType = "UImm5"; +} + +def bfi32_lsb : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> { + let PrintMethod = "printBFILSBOperand<32>"; + let ParserMatchClass = bfi32_lsb_asmoperand; +} + +def bfi64_lsb_asmoperand : AsmOperandClass { + let Name = "BFI64LSB"; + let PredicateMethod = "isUImm<6>"; + let RenderMethod = "addBFILSBOperands<64>"; + let DiagnosticType = "UImm6"; +} + +def bfi64_lsb : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> { + let PrintMethod = "printBFILSBOperand<64>"; + let ParserMatchClass = bfi64_lsb_asmoperand; +} + +// Width verification is performed during conversion so width operand can be +// shared between 32/64-bit cases. Still needed for the print method though +// because ImmR encodes "width - 1". +def bfi32_width_asmoperand : AsmOperandClass { + let Name = "BFI32Width"; + let PredicateMethod = "isBitfieldWidth<32>"; + let RenderMethod = "addBFIWidthOperands"; + let DiagnosticType = "Width32"; +} + +def bfi32_width : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> { + let PrintMethod = "printBFIWidthOperand"; + let ParserMatchClass = bfi32_width_asmoperand; +} + +def bfi64_width_asmoperand : AsmOperandClass { + let Name = "BFI64Width"; + let PredicateMethod = "isBitfieldWidth<64>"; + let RenderMethod = "addBFIWidthOperands"; + let DiagnosticType = "Width64"; +} + +def bfi64_width : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> { + let PrintMethod = "printBFIWidthOperand"; + let ParserMatchClass = bfi64_width_asmoperand; +} + +multiclass A64I_bitfield_insert<bits<2> opc, string asmop> { + def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), + (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } + + def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), + (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), + !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), + [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + } +} + +defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">; +defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; + + +def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), + (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), + (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), + "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> { + // As above, no disassembler allowed. + let isAsmParserOnly = 1; + let Constraints = "$src = $Rd"; +} + +//===----------------------------------------------------------------------===// +// Compare and branch (immediate) +//===----------------------------------------------------------------------===// +// Contains: CBZ, CBNZ + +class label_asmoperand<int width, int scale> : AsmOperandClass { + let Name = "Label" # width # "_" # scale; + let PredicateMethod = "isLabel<" # width # "," # scale # ">"; + let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">"; + let DiagnosticType = "Label"; +} + +def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>; + +// All conditional immediate branches are the same really: 19 signed bits scaled +// by the instruction-size (4). +def bcc_target : Operand<OtherVT> { + // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. + let ParserMatchClass = label_wid19_scal4_asmoperand; + let PrintMethod = "printLabelOperand<19, 4>"; + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>"; + let OperandType = "OPERAND_PCREL"; +} + +multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> { + let isBranch = 1, isTerminator = 1 in { + def x : A64I_cmpbr<0b1, op, + (outs), + (ins GPR64:$Rt, bcc_target:$Label), + !strconcat(asmop,"\t$Rt, $Label"), + [(A64br_cc (A64cmp GPR64:$Rt, 0), SETOP, bb:$Label)], + NoItinerary>; + + def w : A64I_cmpbr<0b0, op, + (outs), + (ins GPR32:$Rt, bcc_target:$Label), + !strconcat(asmop,"\t$Rt, $Label"), + [(A64br_cc (A64cmp GPR32:$Rt, 0), SETOP, bb:$Label)], + NoItinerary>; + } +} + +defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf<i32, [{ + return Imm == A64CC::EQ; +}]> >; +defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{ + return Imm == A64CC::NE; +}]> >; + +//===----------------------------------------------------------------------===// +// Conditional branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: B.cc + +def cond_code_asmoperand : AsmOperandClass { + let Name = "CondCode"; + let DiagnosticType = "CondCode"; +} + +def cond_code : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm <= 15; +}]> { + let PrintMethod = "printCondCodeOperand"; + let ParserMatchClass = cond_code_asmoperand; +} + +def Bcc : A64I_condbr<0b0, 0b0, (outs), + (ins cond_code:$Cond, bcc_target:$Label), + "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)], + NoItinerary> { + let Uses = [NZCV]; + let isBranch = 1; + let isTerminator = 1; +} + +//===----------------------------------------------------------------------===// +// Conditional compare (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: CCMN, CCMP + +def uimm4_asmoperand : AsmOperandClass { + let Name = "UImm4"; + let PredicateMethod = "isUImm<4>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm4"; +} + +def uimm4 : Operand<i32> { + let ParserMatchClass = uimm4_asmoperand; +} + +def uimm5 : Operand<i32> { + let ParserMatchClass = uimm5_asmoperand; +} + +// The only difference between this operand and the one for instructions like +// B.cc is that it's parsed manually. The other get parsed implicitly as part of +// the mnemonic handling. +def cond_code_op_asmoperand : AsmOperandClass { + let Name = "CondCodeOp"; + let RenderMethod = "addCondCodeOperands"; + let PredicateMethod = "isCondCode"; + let ParserMethod = "ParseCondCodeOperand"; + let DiagnosticType = "CondCode"; +} + +def cond_code_op : Operand<i32> { + let PrintMethod = "printCondCodeOperand"; + let ParserMatchClass = cond_code_op_asmoperand; +} + +class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop> + : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs), + (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"), + [], NoItinerary> { + let Defs = [NZCV]; +} + +def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">; +def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">; +def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">; +def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">; + +//===----------------------------------------------------------------------===// +// Conditional compare (register) instructions +//===----------------------------------------------------------------------===// +// Contains: CCMN, CCMP + +class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop> + : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1, + (outs), + (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), + [], NoItinerary> { + let Defs = [NZCV]; +} + +def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">; +def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">; +def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">; +def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">; + +//===----------------------------------------------------------------------===// +// Conditional select instructions +//===----------------------------------------------------------------------===// +// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG + +// Condition code which is encoded as the inversion (semantically rather than +// bitwise) in the instruction. +def inv_cond_code_op_asmoperand : AsmOperandClass { + let Name = "InvCondCodeOp"; + let RenderMethod = "addInvCondCodeOperands"; + let PredicateMethod = "isCondCode"; + let ParserMethod = "ParseCondCodeOperand"; + let DiagnosticType = "CondCode"; +} + +def inv_cond_code_op : Operand<i32> { + let ParserMatchClass = inv_cond_code_op_asmoperand; +} + +// Having a separate operand for the selectable use-case is debatable, but gives +// consistency with cond_code. +def inv_cond_XFORM : SDNodeXForm<imm, [{ + A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue()); + return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32); +}]>; + +def inv_cond_code + : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>; + + +multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop, + SDPatternOperator select> { + let Uses = [NZCV] in { + def wwwc : A64I_condsel<0b0, op, 0b0, op2, + (outs GPR32:$Rd), + (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), + [(set GPR32:$Rd, (select GPR32:$Rn, GPR32:$Rm))], + NoItinerary>; + + + def xxxc : A64I_condsel<0b1, op, 0b0, op2, + (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), + [(set GPR64:$Rd, (select GPR64:$Rn, GPR64:$Rm))], + NoItinerary>; + } +} + +def simple_select + : PatFrag<(ops node:$lhs, node:$rhs), + (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>; + +class complex_select<SDPatternOperator opnode> + : PatFrag<(ops node:$lhs, node:$rhs), + (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>; + + +defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>; +defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc", + complex_select<PatFrag<(ops node:$val), + (add node:$val, 1)>>>; +defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>; +defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>; + +// Now the instruction aliases, which fit nicely into LLVM's model: + +def : InstAlias<"cset $Rd, $Cond", + (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"cset $Rd, $Cond", + (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"csetm $Rd, $Cond", + (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"csetm $Rd, $Cond", + (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinc $Rd, $Rn, $Cond", + (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinc $Rd, $Rn, $Cond", + (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinv $Rd, $Rn, $Cond", + (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cinv $Rd, $Rn, $Cond", + (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cneg $Rd, $Rn, $Cond", + (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; +def : InstAlias<"cneg $Rd, $Rn, $Cond", + (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; + +// Finally some helper patterns. + +// For CSET (a.k.a. zero-extension of icmp) +def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), + (CSINCwwwc WZR, WZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), + (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>; + +def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), + (CSINCxxxc XZR, XZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), + (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>; + +// For CSETM (a.k.a. sign-extension of icmp) +def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), + (CSINVwwwc WZR, WZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), + (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>; + +def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), + (CSINVxxxc XZR, XZR, cond_code:$Cond)>; +def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), + (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>; + +// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of +// commutativity. The instructions are to complex for isCommutable to be used, +// so we have to create the patterns manually: + +// No commutable pattern for CSEL since the commuted version is isomorphic. + +// CSINC +def :Pat<(A64select_cc NZCV, (add GPR32:$Rm, 1), GPR32:$Rn, + inv_cond_code:$Cond), + (CSINCwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (add GPR64:$Rm, 1), GPR64:$Rn, + inv_cond_code:$Cond), + (CSINCxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>; + +// CSINV +def :Pat<(A64select_cc NZCV, (not GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond), + (CSINVwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (not GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond), + (CSINVxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>; + +// CSNEG +def :Pat<(A64select_cc NZCV, (ineg GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond), + (CSNEGwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>; +def :Pat<(A64select_cc NZCV, (ineg GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond), + (CSNEGxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>; + +//===----------------------------------------------------------------------===// +// Data Processing (1 source) instructions +//===----------------------------------------------------------------------===// +// Contains: RBIT, REV16, REV, REV32, CLZ, CLS. + +// We define an unary operator which always fails. We will use this to +// define unary operators that cannot be matched. + +class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop, + list<dag> patterns, RegisterClass GPRrc, + InstrItinClass itin>: + A64I_dp_1src<sf, + 0, + 0b00000, + opcode, + !strconcat(asmop, "\t$Rd, $Rn"), + (outs GPRrc:$Rd), + (ins GPRrc:$Rn), + patterns, + itin>; + +multiclass A64I_dp_1src <bits<6> opcode, string asmop> { + let hasSideEffects = 0 in { + def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>; + def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>; + } +} + +defm RBIT : A64I_dp_1src<0b000000, "rbit">; +defm CLS : A64I_dp_1src<0b000101, "cls">; +defm CLZ : A64I_dp_1src<0b000100, "clz">; + +def : Pat<(ctlz GPR32:$Rn), (CLZww GPR32:$Rn)>; +def : Pat<(ctlz GPR64:$Rn), (CLZxx GPR64:$Rn)>; +def : Pat<(ctlz_zero_undef GPR32:$Rn), (CLZww GPR32:$Rn)>; +def : Pat<(ctlz_zero_undef GPR64:$Rn), (CLZxx GPR64:$Rn)>; + +def : Pat<(cttz GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>; +def : Pat<(cttz GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>; +def : Pat<(cttz_zero_undef GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>; +def : Pat<(cttz_zero_undef GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>; + + +def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev", + [(set GPR32:$Rd, (bswap GPR32:$Rn))], + GPR32, NoItinerary>; +def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev", + [(set GPR64:$Rd, (bswap GPR64:$Rn))], + GPR64, NoItinerary>; +def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32", + [(set GPR64:$Rd, (bswap (rotr GPR64:$Rn, (i64 32))))], + GPR64, NoItinerary>; +def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16", + [(set GPR32:$Rd, (bswap (rotr GPR32:$Rn, (i64 16))))], + GPR32, + NoItinerary>; +def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>; + +//===----------------------------------------------------------------------===// +// Data Processing (2 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL, +// LSR, ASR, ROR + + +class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns, + RegisterClass GPRsp, + InstrItinClass itin>: + A64I_dp_2src<sf, + opcode, + 0, + !strconcat(asmop, "\t$Rd, $Rn, $Rm"), + (outs GPRsp:$Rd), + (ins GPRsp:$Rn, GPRsp:$Rm), + patterns, + itin>; + +multiclass dp_2src_crc<bit c, string asmop> { + def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0}, + !strconcat(asmop, "b"), [], GPR32, NoItinerary>; + def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1}, + !strconcat(asmop, "h"), [], GPR32, NoItinerary>; + def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0}, + !strconcat(asmop, "w"), [], GPR32, NoItinerary>; + def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0, + !strconcat(asmop, "x\t$Rd, $Rn, $Rm"), + (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [], + NoItinerary>; +} + +multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> { + def www : dp_2src_impl<0b0, + opcode, + asmop, + [(set GPR32:$Rd, + (op GPR32:$Rn, (i64 (zext GPR32:$Rm))))], + GPR32, + NoItinerary>; + def xxx : dp_2src_impl<0b1, + opcode, + asmop, + [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))], + GPR64, + NoItinerary>; +} + + +multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> { + def www : dp_2src_impl<0b0, + opcode, + asmop, + [(set GPR32:$Rd, (op GPR32:$Rn, GPR32:$Rm))], + GPR32, + NoItinerary>; + def xxx : dp_2src_impl<0b1, + opcode, + asmop, + [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))], + GPR64, + NoItinerary>; +} + +// Here we define the data processing 2 source instructions. +defm CRC32 : dp_2src_crc<0b0, "crc32">; +defm CRC32C : dp_2src_crc<0b1, "crc32c">; + +defm UDIV : dp_2src<0b000010, "udiv", udiv>; +defm SDIV : dp_2src<0b000011, "sdiv", sdiv>; + +defm LSLV : dp_2src_zext<0b001000, "lsl", shl>; +defm LSRV : dp_2src_zext<0b001001, "lsr", srl>; +defm ASRV : dp_2src_zext<0b001010, "asr", sra>; +defm RORV : dp_2src_zext<0b001011, "ror", rotr>; + +// Extra patterns for an incoming 64-bit value for a 32-bit +// operation. Since the LLVM operations are undefined (as in C) if the +// RHS is out of range, it's perfectly permissible to discard the high +// bits of the GPR64. +def : Pat<(shl GPR32:$Rn, GPR64:$Rm), + (LSLVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; +def : Pat<(srl GPR32:$Rn, GPR64:$Rm), + (LSRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; +def : Pat<(sra GPR32:$Rn, GPR64:$Rm), + (ASRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; +def : Pat<(rotr GPR32:$Rn, GPR64:$Rm), + (RORVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>; + +// Here we define the aliases for the data processing 2 source instructions. +def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">; +def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">; +def ASR_menmonic : MnemonicAlias<"asrv", "asr">; +def ROR_menmonic : MnemonicAlias<"rorv", "ror">; + +//===----------------------------------------------------------------------===// +// Data Processing (3 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH +// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL + +class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg, + RegisterClass SrcReg, string asmop, dag pattern> + : A64I_dp3<sf, opcode, + (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"), + [(set AccReg:$Rd, pattern)], NoItinerary> { + RegisterClass AccGPR = AccReg; + RegisterClass SrcGPR = SrcReg; +} + +def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, GPR32, "madd", + (add GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>; +def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, GPR64, "madd", + (add GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>; + +def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, GPR32, "msub", + (sub GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>; +def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, GPR64, "msub", + (sub GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>; + +def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, GPR32, "smaddl", + (add GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; +def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, GPR32, "smsubl", + (sub GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; + +def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, GPR32, "umaddl", + (add GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; +def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, GPR32, "umsubl", + (sub GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; + +let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in { + def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm), + "umulh\t$Rd, $Rn, $Rm", + [(set GPR64:$Rd, (mulhu GPR64:$Rn, GPR64:$Rm))], + NoItinerary>; + + def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm), + "smulh\t$Rd, $Rn, $Rm", + [(set GPR64:$Rd, (mulhs GPR64:$Rn, GPR64:$Rm))], + NoItinerary>; +} + +multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST, + Register ZR, dag pattern> { + def : InstAlias<asmop # " $Rd, $Rn, $Rm", + (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>; + + def : Pat<pattern, (INST INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>; +} + +defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul GPR32:$Rn, GPR32:$Rm)>; +defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul GPR64:$Rn, GPR64:$Rm)>; + +defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR, + (sub 0, (mul GPR32:$Rn, GPR32:$Rm))>; +defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR, + (sub 0, (mul GPR64:$Rn, GPR64:$Rm))>; + +defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR, + (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm))>; +defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR, + (sub 0, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>; + +defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR, + (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm))>; +defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR, + (sub 0, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>; + + +//===----------------------------------------------------------------------===// +// Exception generation +//===----------------------------------------------------------------------===// +// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3 + +def uimm16_asmoperand : AsmOperandClass { + let Name = "UImm16"; + let PredicateMethod = "isUImm<16>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm16"; +} + +def uimm16 : Operand<i32> { + let ParserMatchClass = uimm16_asmoperand; +} + +class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop> + : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16), + !strconcat(asmop, "\t$UImm16"), [], NoItinerary> { + let isBranch = 1; + let isTerminator = 1; +} + +def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">; +def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">; +def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">; +def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">; +def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">; + +def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">; +def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">; +def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">; + +// The immediate is optional for the DCPS instructions, defaulting to 0. +def : InstAlias<"dcps1", (DCPS1i 0)>; +def : InstAlias<"dcps2", (DCPS2i 0)>; +def : InstAlias<"dcps3", (DCPS3i 0)>; + +//===----------------------------------------------------------------------===// +// Extract (immediate) +//===----------------------------------------------------------------------===// +// Contains: EXTR + alias ROR + +def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0, + (outs GPR32:$Rd), + (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB), + "extr\t$Rd, $Rn, $Rm, $LSB", + [(set GPR32:$Rd, + (A64Extr GPR32:$Rn, GPR32:$Rm, imm:$LSB))], + NoItinerary>; +def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1, + (outs GPR64:$Rd), + (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB), + "extr\t$Rd, $Rn, $Rm, $LSB", + [(set GPR64:$Rd, + (A64Extr GPR64:$Rn, GPR64:$Rm, imm:$LSB))], + NoItinerary>; + +def : InstAlias<"ror $Rd, $Rs, $LSB", + (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>; +def : InstAlias<"ror $Rd, $Rs, $LSB", + (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>; + +def : Pat<(rotr GPR32:$Rn, bitfield32_imm:$LSB), + (EXTRwwwi GPR32:$Rn, GPR32:$Rn, bitfield32_imm:$LSB)>; +def : Pat<(rotr GPR64:$Rn, bitfield64_imm:$LSB), + (EXTRxxxi GPR64:$Rn, GPR64:$Rn, bitfield64_imm:$LSB)>; + +//===----------------------------------------------------------------------===// +// Floating-point compare instructions +//===----------------------------------------------------------------------===// +// Contains: FCMP, FCMPE + +def fpzero_asmoperand : AsmOperandClass { + let Name = "FPZero"; + let ParserMethod = "ParseFPImmOperand"; + let DiagnosticType = "FPZero"; +} + +def fpz32 : Operand<f32>, + ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> { + let ParserMatchClass = fpzero_asmoperand; + let PrintMethod = "printFPZeroOperand"; +} + +def fpz64 : Operand<f64>, + ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> { + let ParserMatchClass = fpzero_asmoperand; + let PrintMethod = "printFPZeroOperand"; +} + +multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, string asmop2, + dag pattern> { + def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, + (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2), + [pattern], NoItinerary> { + let Defs = [NZCV]; + } + + def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, + (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2), + [], NoItinerary> { + let Defs = [NZCV]; + } +} + +defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), "$Rm", + (set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>; +defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm", + (set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>; + +// What would be Rm should be written as 0, but anything is valid for +// disassembly so we can't set the bits +let PostEncoderMethod = "fixFCMPImm" in { + defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm", + (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>; + + defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Imm), "$Imm", + (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Imm))>; +} + + +//===----------------------------------------------------------------------===// +// Floating-point conditional compare instructions +//===----------------------------------------------------------------------===// +// Contains: FCCMP, FCCMPE + +class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop> + : A64I_fpccmp<0b0, 0b0, type, op, + (outs), + (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), + !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), + [], NoItinerary> { + let Defs = [NZCV]; +} + +def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">; +def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">; +def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">; +def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">; + +//===----------------------------------------------------------------------===// +// Floating-point conditional select instructions +//===----------------------------------------------------------------------===// +// Contains: FCSEL + +let Uses = [NZCV] in { + def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd), + (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond), + "fcsel\t$Rd, $Rn, $Rm, $Cond", + [(set FPR32:$Rd, + (simple_select (f32 FPR32:$Rn), + FPR32:$Rm))], + NoItinerary>; + + + def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd), + (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond), + "fcsel\t$Rd, $Rn, $Rm, $Cond", + [(set FPR64:$Rd, + (simple_select (f64 FPR64:$Rn), + FPR64:$Rm))], + NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (1 source) +//===----------------------------------------------------------------------===// +// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI]. + +def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val), + [{ (void)N; return false; }]>; + +// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d" +// syntax. Default to no pattern because most are odd enough not to have one. +multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr, + SDPatternOperator opnode = FPNoUnop> { + def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn), + !strconcat(asmstr, "\t$Rd, $Rn"), + [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn))], + NoItinerary>; + + def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn), + !strconcat(asmstr, "\t$Rd, $Rn"), + [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn))], + NoItinerary>; +} + +defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">; +defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>; +defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>; +defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>; + +defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">; +defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>; +defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>; +defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>; +defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">; +defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>; +defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>; + +// The FCVT instrucitons have different source and destination register-types, +// but the fields are uniform everywhere a D-register (say) crops up. Package +// this information in a Record. +class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> { + RegisterClass Class = rc; + ValueType VT = vt; + bit t1 = fld{1}; + bit t0 = fld{0}; +} + +def FCVT16 : FCVTRegType<FPR16, 0b11, f16>; +def FCVT32 : FCVTRegType<FPR32, 0b00, f32>; +def FCVT64 : FCVTRegType<FPR64, 0b01, f64>; + +class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode> + : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0}, + {0,0,0,1, DestReg.t1, DestReg.t0}, + (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn), + "fcvt\t$Rd, $Rn", + [(set (DestReg.VT DestReg.Class:$Rd), + (opnode (SrcReg.VT SrcReg.Class:$Rn)))], NoItinerary>; + +def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>; +def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>; +def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>; +def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>; +def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>; +def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>; + + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (2 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL + +def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs), + [{ (void)N; return false; }]>; + +multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr, + SDPatternOperator opnode> { + def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode, + (outs FPR32:$Rd), + (ins FPR32:$Rn, FPR32:$Rm), + !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), + [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn, FPR32:$Rm))], + NoItinerary>; + + def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode, + (outs FPR64:$Rd), + (ins FPR64:$Rn, FPR64:$Rm), + !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), + [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn, FPR64:$Rm))], + NoItinerary>; +} + +let isCommutable = 1 in { + defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>; + defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>; + + // No patterns for these. + defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>; + defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>; + defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>; + defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>; + + defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul", + PatFrag<(ops node:$lhs, node:$rhs), + (fneg (fmul node:$lhs, node:$rhs))> >; +} + +defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>; +defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>; + +//===----------------------------------------------------------------------===// +// Floating-point data-processing (3 sources) instructions +//===----------------------------------------------------------------------===// +// Contains: FMADD, FMSUB, FNMADD, FNMSUB + +def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma (fneg node:$Rn), node:$Rm, node:$Ra)>; +def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma node:$Rn, node:$Rm, (fneg node:$Ra))>; +def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>; + +class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT, + bits<2> type, bit o1, bit o0, SDPatternOperator fmakind> + : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd), + (ins FPR:$Rn, FPR:$Rm, FPR:$Ra), + !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"), + [(set FPR:$Rd, (fmakind (VT FPR:$Rn), FPR:$Rm, FPR:$Ra))], + NoItinerary>; + +def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>; +def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>; +def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>; +def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>; + +def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>; +def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>; +def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>; +def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; + +//===----------------------------------------------------------------------===// +// Floating-point <-> fixed-point conversion instructions +//===----------------------------------------------------------------------===// +// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF + +// #1-#32 allowed, encoded as "64 - <specified imm> +def fixedpos_asmoperand_i32 : AsmOperandClass { + let Name = "CVTFixedPos32"; + let RenderMethod = "addCVTFixedPosOperands"; + let PredicateMethod = "isCVTFixedPos<32>"; + let DiagnosticType = "CVTFixedPos32"; +} + +// Also encoded as "64 - <specified imm>" but #1-#64 allowed. +def fixedpos_asmoperand_i64 : AsmOperandClass { + let Name = "CVTFixedPos64"; + let RenderMethod = "addCVTFixedPosOperands"; + let PredicateMethod = "isCVTFixedPos<64>"; + let DiagnosticType = "CVTFixedPos64"; +} + +// We need the cartesian product of f32/f64 i32/i64 operands for +// conversions: +// + Selection needs to use operands of correct floating type +// + Assembly parsing and decoding depend on integer width +class cvtfix_i32_op<ValueType FloatVT> + : Operand<FloatVT>, + ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> { + let ParserMatchClass = fixedpos_asmoperand_i32; + let DecoderMethod = "DecodeCVT32FixedPosOperand"; + let PrintMethod = "printCVTFixedPosOperand"; +} + +class cvtfix_i64_op<ValueType FloatVT> + : Operand<FloatVT>, + ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> { + let ParserMatchClass = fixedpos_asmoperand_i64; + let PrintMethod = "printCVTFixedPosOperand"; +} + +// Because of the proliferation of weird operands, it's not really +// worth going for a multiclass here. Oh well. + +class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode, + RegisterClass GPR, RegisterClass FPR, Operand scale_op, + string asmop, SDNode cvtop> + : A64I_fpfixed<sf, 0b0, type, 0b11, opcode, + (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale), + !strconcat(asmop, "\t$Rd, $Rn, $Scale"), + [(set GPR:$Rd, (cvtop (fmul FPR:$Rn, scale_op:$Scale)))], + NoItinerary>; + +def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, + cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>; +def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, + cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>; +def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, + cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>; +def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, + cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>; + +def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, + cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>; +def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, + cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>; +def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, + cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>; +def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, + cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>; + + +class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode, + RegisterClass FPR, RegisterClass GPR, Operand scale_op, + string asmop, SDNode cvtop> + : A64I_fpfixed<sf, 0b0, type, 0b00, opcode, + (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale), + !strconcat(asmop, "\t$Rd, $Rn, $Scale"), + [(set FPR:$Rd, (fdiv (cvtop GPR:$Rn), scale_op:$Scale))], + NoItinerary>; + +def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, + cvtfix_i32_op<f32>, "scvtf", sint_to_fp>; +def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, + cvtfix_i64_op<f32>, "scvtf", sint_to_fp>; +def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, + cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>; +def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, + cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>; +def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, + cvtfix_i32_op<f64>, "scvtf", sint_to_fp>; +def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, + cvtfix_i64_op<f64>, "scvtf", sint_to_fp>; +def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, + cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>; +def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, + cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>; + +//===----------------------------------------------------------------------===// +// Floating-point <-> integer conversion instructions +//===----------------------------------------------------------------------===// +// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF + +class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode, + RegisterClass DestPR, RegisterClass SrcPR, string asmop> + : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn), + !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>; + +multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> { + def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, + GPR32, FPR32, asmop # "s">; + def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0}, + GPR64, FPR32, asmop # "s">; + def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1}, + GPR32, FPR32, asmop # "u">; + def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1}, + GPR64, FPR32, asmop # "u">; + + def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0}, + GPR32, FPR64, asmop # "s">; + def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0}, + GPR64, FPR64, asmop # "s">; + def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1}, + GPR32, FPR64, asmop # "u">; + def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1}, + GPR64, FPR64, asmop # "u">; +} + +defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">; +defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">; +defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">; +defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">; +defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">; + +def : Pat<(i32 (fp_to_sint FPR32:$Rn)), (FCVTZSws FPR32:$Rn)>; +def : Pat<(i64 (fp_to_sint FPR32:$Rn)), (FCVTZSxs FPR32:$Rn)>; +def : Pat<(i32 (fp_to_uint FPR32:$Rn)), (FCVTZUws FPR32:$Rn)>; +def : Pat<(i64 (fp_to_uint FPR32:$Rn)), (FCVTZUxs FPR32:$Rn)>; +def : Pat<(i32 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSwd FPR64:$Rn)>; +def : Pat<(i64 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSxd FPR64:$Rn)>; +def : Pat<(i32 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUwd FPR64:$Rn)>; +def : Pat<(i64 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUxd FPR64:$Rn)>; + +multiclass A64I_inttofp<bit o0, string asmop> { + def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>; + def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>; + def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>; + def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>; +} + +defm S : A64I_inttofp<0b0, "scvtf">; +defm U : A64I_inttofp<0b1, "ucvtf">; + +def : Pat<(f32 (sint_to_fp GPR32:$Rn)), (SCVTFsw GPR32:$Rn)>; +def : Pat<(f32 (sint_to_fp GPR64:$Rn)), (SCVTFsx GPR64:$Rn)>; +def : Pat<(f64 (sint_to_fp GPR32:$Rn)), (SCVTFdw GPR32:$Rn)>; +def : Pat<(f64 (sint_to_fp GPR64:$Rn)), (SCVTFdx GPR64:$Rn)>; +def : Pat<(f32 (uint_to_fp GPR32:$Rn)), (UCVTFsw GPR32:$Rn)>; +def : Pat<(f32 (uint_to_fp GPR64:$Rn)), (UCVTFsx GPR64:$Rn)>; +def : Pat<(f64 (uint_to_fp GPR32:$Rn)), (UCVTFdw GPR32:$Rn)>; +def : Pat<(f64 (uint_to_fp GPR64:$Rn)), (UCVTFdx GPR64:$Rn)>; + +def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">; +def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">; +def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">; +def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">; + +def : Pat<(i32 (bitconvert (f32 FPR32:$Rn))), (FMOVws FPR32:$Rn)>; +def : Pat<(f32 (bitconvert (i32 GPR32:$Rn))), (FMOVsw GPR32:$Rn)>; +def : Pat<(i64 (bitconvert (f64 FPR64:$Rn))), (FMOVxd FPR64:$Rn)>; +def : Pat<(f64 (bitconvert (i64 GPR64:$Rn))), (FMOVdx GPR64:$Rn)>; + +def lane1_asmoperand : AsmOperandClass { + let Name = "Lane1"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "Lane1"; +} + +def lane1 : Operand<i32> { + let ParserMatchClass = lane1_asmoperand; + let PrintMethod = "printBareImmOperand"; +} + +let DecoderMethod = "DecodeFMOVLaneInstruction" in { + def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110, + (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane), + "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>; + + def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111, + (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane), + "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>; +} + +def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]", + (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>; + +def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", + (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>; + +//===----------------------------------------------------------------------===// +// Floating-point immediate instructions +//===----------------------------------------------------------------------===// +// Contains: FMOV + +def fpimm_asmoperand : AsmOperandClass { + let Name = "FMOVImm"; + let ParserMethod = "ParseFPImmOperand"; + let DiagnosticType = "FPImm"; +} + +// The MCOperand for these instructions are the encoded 8-bit values. +def SDXF_fpimm : SDNodeXForm<fpimm, [{ + uint32_t Imm8; + A64Imms::isFPImm(N->getValueAPF(), Imm8); + return CurDAG->getTargetConstant(Imm8, MVT::i32); +}]>; + +class fmov_operand<ValueType FT> + : Operand<i32>, + PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }], + SDXF_fpimm> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = fpimm_asmoperand; +} + +def fmov32_operand : fmov_operand<f32>; +def fmov64_operand : fmov_operand<f64>; + +class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT, + Operand fmov_operand> + : A64I_fpimm<0b0, 0b0, type, 0b00000, + (outs Reg:$Rd), + (ins fmov_operand:$Imm8), + "fmov\t$Rd, $Imm8", + [(set (VT Reg:$Rd), fmov_operand:$Imm8)], + NoItinerary>; + +def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>; +def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>; + +//===----------------------------------------------------------------------===// +// Load-register (literal) instructions +//===----------------------------------------------------------------------===// +// Contains: LDR, LDRSW, PRFM + +def ldrlit_label_asmoperand : AsmOperandClass { + let Name = "LoadLitLabel"; + let RenderMethod = "addLabelOperands<19, 4>"; + let DiagnosticType = "Label"; +} + +def ldrlit_label : Operand<i64> { + let EncoderMethod = "getLoadLitLabelOpValue"; + + // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<19, 4>"; + let ParserMatchClass = ldrlit_label_asmoperand; + let OperandType = "OPERAND_PCREL"; +} + +// Various instructions take an immediate value (which can always be used), +// where some numbers have a symbolic name to make things easier. These operands +// and the associated functions abstract away the differences. +multiclass namedimm<string prefix, string mapper> { + def _asmoperand : AsmOperandClass { + let Name = "NamedImm" # prefix; + let PredicateMethod = "isUImm"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "ParseNamedImmOperand<" # mapper # ">"; + let DiagnosticType = "NamedImm_" # prefix; + } + + def _op : Operand<i32> { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); + let PrintMethod = "printNamedImmOperand<" # mapper # ">"; + let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">"; + } +} + +defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">; + +class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg, + list<dag> patterns = []> + : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19), + "ldr\t$Rt, $Imm19", patterns, NoItinerary>; + +let mayLoad = 1 in { + def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; + def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; +} + +def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>; +def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>; + +let mayLoad = 1 in { + def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; + + + def LDRSWx_lit : A64I_LDRlit<0b10, 0b0, + (outs GPR64:$Rt), + (ins ldrlit_label:$Imm19), + "ldrsw\t$Rt, $Imm19", + [], NoItinerary>; + + def PRFM_lit : A64I_LDRlit<0b11, 0b0, + (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19), + "prfm\t$Rt, $Imm19", + [], NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// Load-store exclusive instructions +//===----------------------------------------------------------------------===// +// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB, +// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB, +// STLRH, STLR, LDARB, LDARH, LDAR + +// Since these instructions have the undefined register bits set to 1 in +// their canonical form, we need a post encoder method to set those bits +// to 1 when encoding these instructions. We do this using the +// fixLoadStoreExclusive function. This function has template parameters: +// +// fixLoadStoreExclusive<int hasRs, int hasRt2> +// +// hasRs indicates that the instruction uses the Rs field, so we won't set +// it to 1 (and the same for Rt2). We don't need template parameters for +// the other register fiels since Rt and Rn are always used. + +// This operand parses a GPR64xsp register, followed by an optional immediate +// #0. +def GPR64xsp0_asmoperand : AsmOperandClass { + let Name = "GPR64xsp0"; + let PredicateMethod = "isWrappedReg"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "ParseLSXAddressOperand"; + // Diagnostics are provided by ParserMethod +} + +def GPR64xsp0 : RegisterOperand<GPR64xsp> { + let ParserMatchClass = GPR64xsp0_asmoperand; +} + +//===---------------------------------- +// Store-exclusive (releasing & normal) +//===---------------------------------- + +class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_stn <size, + opcode{2}, 0, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rs, $Rt, [$Rn]"), + pat, itin> { + let mayStore = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; +} + +multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> { + def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; + + def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [],NoItinerary>; + + def _word: A64I_SRexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_SRexs_impl<0b11, opcode, asmstr, + (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm STXR : A64I_SRex<"stxr", 0b000, "STXR">; +defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">; + +//===---------------------------------- +// Loads +//===---------------------------------- + +class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_tn <size, + opcode{2}, 1, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rt, [$Rn]"), + pat, itin> { + let mayLoad = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +multiclass A64I_LRex<string asmstr, bits<3> opcode> { + def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _word: A64I_LRexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_LRexs_impl<0b11, opcode, asmstr, + (outs GPR64:$Rt), (ins GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm LDXR : A64I_LRex<"ldxr", 0b000>; +defm LDAXR : A64I_LRex<"ldaxr", 0b001>; +defm LDAR : A64I_LRex<"ldar", 0b101>; + +class acquiring_load<PatFrag base> + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + return cast<AtomicSDNode>(N)->getOrdering() == Acquire; +}]>; + +def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; +def atomic_load_acquire_16 : acquiring_load<atomic_load_16>; +def atomic_load_acquire_32 : acquiring_load<atomic_load_32>; +def atomic_load_acquire_64 : acquiring_load<atomic_load_64>; + +def : Pat<(atomic_load_acquire_8 GPR64xsp:$Rn), (LDAR_byte GPR64xsp0:$Rn)>; +def : Pat<(atomic_load_acquire_16 GPR64xsp:$Rn), (LDAR_hword GPR64xsp0:$Rn)>; +def : Pat<(atomic_load_acquire_32 GPR64xsp:$Rn), (LDAR_word GPR64xsp0:$Rn)>; +def : Pat<(atomic_load_acquire_64 GPR64xsp:$Rn), (LDAR_dword GPR64xsp0:$Rn)>; + +//===---------------------------------- +// Store-release (no exclusivity) +//===---------------------------------- + +class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_tn <size, + opcode{2}, 0, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rt, [$Rn]"), + pat, itin> { + let mayStore = 1; + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +class releasing_store<PatFrag base> + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + return cast<AtomicSDNode>(N)->getOrdering() == Release; +}]>; + +def atomic_store_release_8 : releasing_store<atomic_store_8>; +def atomic_store_release_16 : releasing_store<atomic_store_16>; +def atomic_store_release_32 : releasing_store<atomic_store_32>; +def atomic_store_release_64 : releasing_store<atomic_store_64>; + +multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> { + def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"), + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_8 GPR64xsp0:$Rn, GPR32:$Rt)], + NoItinerary>; + + def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"), + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_16 GPR64xsp0:$Rn, GPR32:$Rt)], + NoItinerary>; + + def _word: A64I_SLexs_impl<0b10, opcode, asmstr, + (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_32 GPR64xsp0:$Rn, GPR32:$Rt)], + NoItinerary>; + + def _dword: A64I_SLexs_impl<0b11, opcode, asmstr, + (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn), + [(atomic_store_release_64 GPR64xsp0:$Rn, GPR64:$Rt)], + NoItinerary>; +} + +defm STLR : A64I_SLex<"stlr", 0b101, "STLR">; + +//===---------------------------------- +// Store-exclusive pair (releasing & normal) +//===---------------------------------- + +class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_stt2n <size, + opcode{2}, 0, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"), + pat, itin> { + let mayStore = 1; +} + + +multiclass A64I_SPex<string asmstr, bits<3> opcode> { + def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs), + (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2, + GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs), + (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2, + GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm STXP : A64I_SPex<"stxp", 0b010>; +defm STLXP : A64I_SPex<"stlxp", 0b011>; + +//===---------------------------------- +// Load-exclusive pair (acquiring & normal) +//===---------------------------------- + +class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, + dag ins, list<dag> pat, + InstrItinClass itin> : + A64I_LDSTex_tt2n <size, + opcode{2}, 1, opcode{1}, opcode{0}, + outs, ins, + !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"), + pat, itin>{ + let mayLoad = 1; + let DecoderMethod = "DecodeLoadPairExclusiveInstruction"; + let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; +} + +multiclass A64I_LPex<string asmstr, bits<3> opcode> { + def _word: A64I_LPexs_impl<0b10, opcode, asmstr, + (outs GPR32:$Rt, GPR32:$Rt2), + (ins GPR64xsp0:$Rn), + [], NoItinerary>; + + def _dword: A64I_LPexs_impl<0b11, opcode, asmstr, + (outs GPR64:$Rt, GPR64:$Rt2), + (ins GPR64xsp0:$Rn), + [], NoItinerary>; +} + +defm LDXP : A64I_LPex<"ldxp", 0b010>; +defm LDAXP : A64I_LPex<"ldaxp", 0b011>; + +//===----------------------------------------------------------------------===// +// Load-store register (unscaled immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (register offset) instructions +//===----------------------------------------------------------------------===// +// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (unsigned immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (immediate post-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register (immediate pre-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW + +// Note that patterns are much later on in a completely separate section (they +// need ADRPxi to be defined). + +//===------------------------------- +// 1. Various operands needed +//===------------------------------- + +//===------------------------------- +// 1.1 Unsigned 12-bit immediate operands +//===------------------------------- +// The addressing mode for these instructions consists of an unsigned 12-bit +// immediate which is scaled by the size of the memory access. +// +// We represent this in the MC layer by two operands: +// 1. A base register. +// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]" +// would have '1' in this field. +// This means that separate functions are needed for converting representations +// which *are* aware of the intended access size. + +// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to +// know the access size via some means. An isolated operand does not have this +// information unless told from here, which means we need separate tablegen +// Operands for each access size. This multiclass takes care of instantiating +// the correct template functions in the rest of the backend. + +//===------------------------------- +// 1.1 Unsigned 12-bit immediate operands +//===------------------------------- + +multiclass offsets_uimm12<int MemSize, string prefix> { + def uimm12_asmoperand : AsmOperandClass { + let Name = "OffsetUImm12_" # MemSize; + let PredicateMethod = "isOffsetUImm12<" # MemSize # ">"; + let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">"; + let DiagnosticType = "LoadStoreUImm12_" # MemSize; + } + + // Pattern is really no more than an ImmLeaf, but predicated on MemSize which + // complicates things beyond TableGen's ken. + def uimm12 : Operand<i64>, + ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> { + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # uimm12_asmoperand); + + let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">"; + let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">"; + } +} + +defm byte_ : offsets_uimm12<1, "byte_">; +defm hword_ : offsets_uimm12<2, "hword_">; +defm word_ : offsets_uimm12<4, "word_">; +defm dword_ : offsets_uimm12<8, "dword_">; +defm qword_ : offsets_uimm12<16, "qword_">; + +//===------------------------------- +// 1.1 Signed 9-bit immediate operands +//===------------------------------- + +// The MCInst is expected to store the bit-wise encoding of the value, +// which amounts to lopping off the extended sign bits. +def SDXF_simm9 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32); +}]>; + +def simm9_asmoperand : AsmOperandClass { + let Name = "SImm9"; + let PredicateMethod = "isSImm<9>"; + let RenderMethod = "addSImmOperands<9>"; + let DiagnosticType = "LoadStoreSImm9"; +} + +def simm9 : Operand<i64>, + ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }], + SDXF_simm9> { + let PrintMethod = "printOffsetSImm9Operand"; + let ParserMatchClass = simm9_asmoperand; +} + + +//===------------------------------- +// 1.3 Register offset extensions +//===------------------------------- + +// The assembly-syntax for these addressing-modes is: +// [<Xn|SP>, <R><m> {, <extend> {<amount>}}] +// +// The essential semantics are: +// + <amount> is a shift: #<log(transfer size)> or #0 +// + <R> can be W or X. +// + If <R> is W, <extend> can be UXTW or SXTW +// + If <R> is X, <extend> can be LSL or SXTX +// +// The trickiest of those constraints is that Rm can be either GPR32 or GPR64, +// which will need separate instructions for LLVM type-consistency. We'll also +// need separate operands, of course. +multiclass regexts<int MemSize, int RmSize, RegisterClass GPR, + string Rm, string prefix> { + def regext_asmoperand : AsmOperandClass { + let Name = "AddrRegExtend_" # MemSize # "_" # Rm; + let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">"; + let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">"; + let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize; + } + + def regext : Operand<i64> { + let PrintMethod + = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">"; + + let DecoderMethod = "DecodeAddrRegExtendOperand"; + let ParserMatchClass + = !cast<AsmOperandClass>(prefix # regext_asmoperand); + } +} + +multiclass regexts_wx<int MemSize, string prefix> { + // Rm is an X-register if LSL or SXTX are specified as the shift. + defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">; + + // Rm is a W-register if UXTW or SXTW are specified as the shift. + defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">; +} + +defm byte_ : regexts_wx<1, "byte_">; +defm hword_ : regexts_wx<2, "hword_">; +defm word_ : regexts_wx<4, "word_">; +defm dword_ : regexts_wx<8, "dword_">; +defm qword_ : regexts_wx<16, "qword_">; + + +//===------------------------------ +// 2. The instructions themselves. +//===------------------------------ + +// We have the following instructions to implement: +// | | B | H | W | X | +// |-----------------+-------+-------+-------+--------| +// | unsigned str | STRB | STRH | STR | STR | +// | unsigned ldr | LDRB | LDRH | LDR | LDR | +// | signed ldr to W | LDRSB | LDRSH | - | - | +// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) | + +// This will instantiate the LDR/STR instructions you'd expect to use for an +// unsigned datatype (first two rows above) or floating-point register, which is +// reasonably uniform across all access sizes. + + +//===------------------------------ +// 2.1 Regular instructions +//===------------------------------ + +// This class covers the basic unsigned or irrelevantly-signed loads and stores, +// to general-purpose and floating-point registers. + +class AddrParams<string prefix> { + Operand uimm12 = !cast<Operand>(prefix # "_uimm12"); + + Operand regextWm = !cast<Operand>(prefix # "_Wm_regext"); + Operand regextXm = !cast<Operand>(prefix # "_Xm_regext"); +} + +def byte_addrparams : AddrParams<"byte">; +def hword_addrparams : AddrParams<"hword">; +def word_addrparams : AddrParams<"word">; +def dword_addrparams : AddrParams<"dword">; +def qword_addrparams : AddrParams<"qword">; + +multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v, + bit high_opc, string asmsuffix, + RegisterClass GPR, AddrParams params> { + // Unsigned immediate + def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0}, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12), + "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayStore = 1; + } + def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1}, + (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayLoad = 1; + } + def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + // Register offset (four of these: load/store and Wm/Xm). + let mayLoad = 1 in { + def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0, + (outs GPR:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1, + (outs GPR:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + } + def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + let mayStore = 1 in { + def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm, + params.regextWm:$Ext), + "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm, + params.regextXm:$Ext), + "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + } + def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + // Unaligned immediate + def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0}, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayStore = 1; + } + def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1}, + (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayLoad = 1; + } + def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + // Post-indexed + def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0}, + (outs GPR64xsp:$Rn_wb), + (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let mayStore = 1; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1}, + (outs GPR:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + // Pre-indexed + def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0}, + (outs GPR64xsp:$Rn_wb), + (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let mayStore = 1; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1}, + (outs GPR:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + +} + +// STRB/LDRB: First define the instructions +defm LS8 + : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>; + +// STRH/LDRH +defm LS16 + : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>; + + +// STR/LDR to/from a W register +defm LS32 + : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>; + +// STR/LDR to/from an X register +defm LS64 + : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>; + +// STR/LDR to/from a B register +defm LSFP8 + : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>; + +// STR/LDR to/from an H register +defm LSFP16 + : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>; + +// STR/LDR to/from an S register +defm LSFP32 + : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>; +// STR/LDR to/from a D register +defm LSFP64 + : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>; +// STR/LDR to/from a Q register +defm LSFP128 + : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, + qword_addrparams>; + +//===------------------------------ +// 2.3 Signed loads +//===------------------------------ + +// Byte and half-word signed loads can both go into either an X or a W register, +// so it's worth factoring out. Signed word loads don't fit because there is no +// W version. +multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params, + string prefix> { + // Unsigned offset + def w : A64I_LSunsigimm<size, 0b0, 0b11, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, params.uimm12:$UImm12), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayLoad = 1; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>; + + def x : A64I_LSunsigimm<size, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, params.uimm12:$UImm12), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayLoad = 1; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>; + + // Register offset + let mayLoad = 1 in { + def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + } + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", + (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + + let mayLoad = 1 in { + // Unaligned offset + def w_U : A64I_LSunalimm<size, 0b0, 0b11, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + + def x_U : A64I_LSunalimm<size, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + + + // Post-indexed + def w_PostInd : A64I_LSpostind<size, 0b0, 0b11, + (outs GPR32:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def x_PostInd : A64I_LSpostind<size, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + // Pre-indexed + def w_PreInd : A64I_LSpreind<size, 0b0, 0b11, + (outs GPR32:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + + def x_PreInd : A64I_LSpreind<size, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> { + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; + } + } // let mayLoad = 1 +} + +// LDRSB +defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">; +// LDRSH +defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">; + +// LDRSW: load a 32-bit register, sign-extending to 64-bits. +def LDRSWx + : A64I_LSunsigimm<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, word_uimm12:$UImm12), + "ldrsw\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayLoad = 1; +} +def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +let mayLoad = 1 in { + def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext), + "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + + def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext), + "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; +} +def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]", + (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>; + + +def LDURSWx + : A64I_LSunalimm<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldursw\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayLoad = 1; +} +def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +def LDRSWx_PostInd + : A64I_LSpostind<0b10, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrsw\t$Rt, [$Rn], $SImm9", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; +} + +def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, + (outs GPR64:$Rt, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldrsw\t$Rt, [$Rn, $SImm9]!", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeSingleIndexedInstruction"; +} + +//===------------------------------ +// 2.4 Prefetch operations +//===------------------------------ + +def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12), + "prfm\t$Rt, [$Rn, $UImm12]", + [], NoItinerary> { + let mayLoad = 1; +} +def : InstAlias<"prfm $Rt, [$Rn]", + (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; + +let mayLoad = 1 in { + def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, + GPR32:$Rm, dword_Wm_regext:$Ext), + "prfm\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; + def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, dword_Xm_regext:$Ext), + "prfm\t$Rt, [$Rn, $Rm, $Ext]", + [], NoItinerary>; +} + +def : InstAlias<"prfm $Rt, [$Rn, $Rm]", + (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn, + GPR64:$Rm, 2)>; + + +def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs), + (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "prfum\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayLoad = 1; +} +def : InstAlias<"prfum $Rt, [$Rn]", + (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; + +//===----------------------------------------------------------------------===// +// Load-store register (unprivileged) instructions +//===----------------------------------------------------------------------===// +// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH + +// These instructions very much mirror the "unscaled immediate" loads, but since +// there are no floating-point variants we need to split them out into their own +// section to avoid instantiation of "ldtr d0, [sp]" etc. + +multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR, + string prefix> { + def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00, + (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), + "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayStore = 1; + } + + def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + + def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01, + (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayLoad = 1; + } + + def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; + +} + +// STTRB/LDTRB: First define the instructions +defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">; + +// STTRH/LDTRH +defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">; + +// STTR/LDTR to/from a W register +defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">; + +// STTR/LDTR to/from an X register +defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">; + +// Now a class for the signed instructions that can go to either 32 or 64 +// bits... +multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> { + let mayLoad = 1 in { + def w : A64I_LSunpriv<size, 0b0, 0b11, + (outs GPR32:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + + def x : A64I_LSunpriv<size, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]", + [], NoItinerary>; + } + + def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>; + + def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", + (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +} + +// LDTRSB +defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">; +// LDTRSH +defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">; + +// And finally LDTRSW which only goes to 64 bits. +def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10, + (outs GPR64:$Rt), + (ins GPR64xsp:$Rn, simm9:$SImm9), + "ldtrsw\t$Rt, [$Rn, $SImm9]", + [], NoItinerary> { + let mayLoad = 1; +} +def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; + +//===----------------------------------------------------------------------===// +// Load-store register pair (offset) instructions +//===----------------------------------------------------------------------===// +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register pair (post-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STP, LDP, LDPSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store register pair (pre-indexed) instructions +//===----------------------------------------------------------------------===// +// Contains: STP, LDP, LDPSW +// +// and +// +//===----------------------------------------------------------------------===// +// Load-store non-temporal register pair (offset) instructions +//===----------------------------------------------------------------------===// +// Contains: STNP, LDNP + + +// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to +// know the access size via some means. An isolated operand does not have this +// information unless told from here, which means we need separate tablegen +// Operands for each access size. This multiclass takes care of instantiating +// the correct template functions in the rest of the backend. + +multiclass offsets_simm7<string MemSize, string prefix> { + // The bare signed 7-bit immediate is used in post-indexed instructions, but + // because of the scaling performed a generic "simm7" operand isn't + // appropriate here either. + def simm7_asmoperand : AsmOperandClass { + let Name = "SImm7_Scaled" # MemSize; + let PredicateMethod = "isSImm7Scaled<" # MemSize # ">"; + let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">"; + let DiagnosticType = "LoadStoreSImm7_" # MemSize; + } + + def simm7 : Operand<i64> { + let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">"; + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand"); + } +} + +defm word_ : offsets_simm7<"4", "word_">; +defm dword_ : offsets_simm7<"8", "dword_">; +defm qword_ : offsets_simm7<"16", "qword_">; + +multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg, + Operand simm7, string prefix> { + def _STR : A64I_LSPoffset<opc, v, 0b0, (outs), + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + let mayStore = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"stp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _LDR : A64I_LSPoffset<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"ldp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0, + (outs GPR64xsp:$Rn_wb), + (ins SomeReg:$Rt, SomeReg:$Rt2, + GPR64xsp:$Rn, + simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn], $SImm7", + [], NoItinerary> { + let mayStore = 1; + let Constraints = "$Rn = $Rn_wb"; + + // Decoder only needed for unpredictability checking (FIXME). + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldp\t$Rt, $Rt2, [$Rn], $SImm7", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb), + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary> { + let mayStore = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + + def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs), + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + let mayStore = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"stnp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + + def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1, + (outs SomeReg:$Rt, SomeReg:$Rt2), + (ins GPR64xsp:$Rn, simm7:$SImm7), + "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; + } + def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]", + (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt, + SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; + +} + + +defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">; +defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">; +defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">; +defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">; +defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, + "LSFPPair128">; + + +def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> { + let mayLoad = 1; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} +def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]", + (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>; + +def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} + +def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, + (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), + (ins GPR64xsp:$Rn, word_simm7:$SImm7), + "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary> { + let mayLoad = 1; + let Constraints = "$Rn = $Rn_wb"; + let DecoderMethod = "DecodeLDSTPairInstruction"; +} + +//===----------------------------------------------------------------------===// +// Logical (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV + +multiclass logical_imm_operands<string prefix, string note, + int size, ValueType VT> { + def _asmoperand : AsmOperandClass { + let Name = "LogicalImm" # note # size; + let PredicateMethod = "isLogicalImm" # note # "<" # size # ">"; + let RenderMethod = "addLogicalImmOperands<" # size # ">"; + let DiagnosticType = "LogicalSecondSource"; + } + + def _operand + : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); + let PrintMethod = "printLogicalImmOperand<" # size # ">"; + let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">"; + } +} + +defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>; +defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>; + +// The mov versions only differ in assembly parsing, where they +// exclude values representable with either MOVZ or MOVN. +defm logical_imm32_mov + : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>; +defm logical_imm64_mov + : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>; + + +multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> { + def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd), + (ins GPR32:$Rn, logical_imm32_operand:$Imm), + !strconcat(asmop, "\t$Rd, $Rn, $Imm"), + [(set GPR32wsp:$Rd, + (opnode GPR32:$Rn, logical_imm32_operand:$Imm))], + NoItinerary>; + + def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd), + (ins GPR64:$Rn, logical_imm64_operand:$Imm), + !strconcat(asmop, "\t$Rd, $Rn, $Imm"), + [(set GPR64xsp:$Rd, + (opnode GPR64:$Rn, logical_imm64_operand:$Imm))], + NoItinerary>; +} + +defm AND : A64I_logimmSizes<0b00, "and", and>; +defm ORR : A64I_logimmSizes<0b01, "orr", or>; +defm EOR : A64I_logimmSizes<0b10, "eor", xor>; + +let Defs = [NZCV] in { + def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd), + (ins GPR32:$Rn, logical_imm32_operand:$Imm), + "ands\t$Rd, $Rn, $Imm", + [], NoItinerary>; + + def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd), + (ins GPR64:$Rn, logical_imm64_operand:$Imm), + "ands\t$Rd, $Rn, $Imm", + [], NoItinerary>; +} + + +def : InstAlias<"tst $Rn, $Imm", + (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>; +def : InstAlias<"tst $Rn, $Imm", + (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>; +def : InstAlias<"mov $Rd, $Imm", + (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>; +def : InstAlias<"mov $Rd, $Imm", + (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>; + +//===----------------------------------------------------------------------===// +// Logical (shifted register) instructions +//===----------------------------------------------------------------------===// +// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV + +// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS" +// behaves differently for unsigned comparisons, so we defensively only allow +// signed or n/a as the operand. In practice "unsigned greater than 0" is "not +// equal to 0" and LLVM gives us this. +def signed_cond : PatLeaf<(cond), [{ + return !isUnsignedIntSetCC(N->get()); +}]>; + + +// These instructions share their "shift" operands with add/sub (shifted +// register instructions). They are defined there. + +// N.b. the commutable parameter is just !N. It will be first against the wall +// when the revolution comes. +multiclass logical_shifts<string prefix, bit sf, bits<2> opc, + bit N, bit commutable, + string asmop, SDPatternOperator opfrag, string sty, + RegisterClass GPR, list<Register> defs> { + let isCommutable = commutable, Defs = defs in { + def _lsl : A64I_logicalshift<sf, opc, 0b00, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _lsr : A64I_logicalshift<sf, opc, 0b01, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _asr : A64I_logicalshift<sf, opc, 0b10, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6)) + )], + NoItinerary>; + + def _ror : A64I_logicalshift<sf, opc, 0b11, N, + (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("ror_operand_" # sty):$Imm6), + !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), + [(set GPR:$Rd, (opfrag GPR:$Rn, (rotr GPR:$Rm, + !cast<Operand>("ror_operand_" # sty):$Imm6)) + )], + NoItinerary>; + } + + def _noshift + : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn, + GPR:$Rm, 0)>; + + def : Pat<(opfrag GPR:$Rn, GPR:$Rm), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable, + string asmop, SDPatternOperator opfrag, + list<Register> defs> { + defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N, + commutable, asmop, opfrag, "i64", GPR64, defs>; + defm www : logical_shifts<prefix # "www", 0b0, opc, N, + commutable, asmop, opfrag, "i32", GPR32, defs>; +} + + +defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>; +defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>; +defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>; +defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands", + PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), + [{ (void)N; return false; }]>, + [NZCV]>; + +defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic", + PatFrag<(ops node:$lhs, node:$rhs), + (and node:$lhs, (not node:$rhs))>, []>; +defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn", + PatFrag<(ops node:$lhs, node:$rhs), + (or node:$lhs, (not node:$rhs))>, []>; +defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon", + PatFrag<(ops node:$lhs, node:$rhs), + (xor node:$lhs, (not node:$rhs))>, []>; +defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics", + PatFrag<(ops node:$lhs, node:$rhs), + (and node:$lhs, (not node:$rhs)), + [{ (void)N; return false; }]>, + [NZCV]>; + +multiclass tst_shifts<string prefix, bit sf, string sty, RegisterClass GPR> { + let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in { + def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (shl GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + + def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (srl GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (sra GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + + def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0, + (outs), + (ins GPR:$Rn, GPR:$Rm, + !cast<Operand>("ror_operand_" # sty):$Imm6), + "tst\t$Rn, $Rm, $Imm6", + [(set NZCV, (A64setcc (and GPR:$Rn, (rotr GPR:$Rm, + !cast<Operand>("ror_operand_" # sty):$Imm6)), + 0, signed_cond))], + NoItinerary>; + } + + def _noshift : InstAlias<"tst $Rn, $Rm", + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(A64setcc (and GPR:$Rn, GPR:$Rm), 0, signed_cond), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; +} + +defm TSTxx : tst_shifts<"TSTxx", 0b1, "i64", GPR64>; +defm TSTww : tst_shifts<"TSTww", 0b0, "i32", GPR32>; + + +multiclass mvn_shifts<string prefix, bit sf, string sty, RegisterClass GPR> { + let isCommutable = 0, Rn = 0b11111 in { + def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (shl GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)))], + NoItinerary>; + + + def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (srl GPR:$Rm, + !cast<Operand>("lsr_operand_" # sty):$Imm6)))], + NoItinerary>; + + def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (sra GPR:$Rm, + !cast<Operand>("asr_operand_" # sty):$Imm6)))], + NoItinerary>; + + def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1, + (outs GPR:$Rd), + (ins GPR:$Rm, + !cast<Operand>("ror_operand_" # sty):$Imm6), + "mvn\t$Rd, $Rm, $Imm6", + [(set GPR:$Rd, (not (rotr GPR:$Rm, + !cast<Operand>("lsl_operand_" # sty):$Imm6)))], + NoItinerary>; + } + + def _noshift : InstAlias<"mvn $Rn, $Rm", + (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; + + def : Pat<(not GPR:$Rm), + (!cast<Instruction>(prefix # "_lsl") GPR:$Rm, 0)>; +} + +defm MVNxx : mvn_shifts<"MVNxx", 0b1, "i64", GPR64>; +defm MVNww : mvn_shifts<"MVNww", 0b0, "i32", GPR32>; + +def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; +def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; + +//===----------------------------------------------------------------------===// +// Move wide (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: MOVN, MOVZ, MOVK + MOV aliases + +// A wide variety of different relocations are needed for variants of these +// instructions, so it turns out that we need a different operand for all of +// them. +multiclass movw_operands<string prefix, string instname, int width> { + def _imm_asmoperand : AsmOperandClass { + let Name = instname # width # "Shifted" # shift; + let PredicateMethod = "is" # instname # width # "Imm"; + let RenderMethod = "addMoveWideImmOperands"; + let ParserMethod = "ParseImmWithLSLOperand"; + let DiagnosticType = "MOVWUImm16"; + } + + def _imm : Operand<i32> { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand"); + let PrintMethod = "printMoveWideImmOperand"; + let EncoderMethod = "getMoveWideImmOpValue"; + let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">"; + + let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); + } +} + +defm movn32 : movw_operands<"movn32", "MOVN", 32>; +defm movn64 : movw_operands<"movn64", "MOVN", 64>; +defm movz32 : movw_operands<"movz32", "MOVZ", 32>; +defm movz64 : movw_operands<"movz64", "MOVZ", 64>; +defm movk32 : movw_operands<"movk32", "MOVK", 32>; +defm movk64 : movw_operands<"movk64", "MOVK", 64>; + +multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit, + dag ins64bit> { + + def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit, + !strconcat(asmop, "\t$Rd, $FullImm"), + [], NoItinerary> { + bits<18> FullImm; + let UImm16 = FullImm{15-0}; + let Shift = FullImm{17-16}; + } + + def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit, + !strconcat(asmop, "\t$Rd, $FullImm"), + [], NoItinerary> { + bits<18> FullImm; + let UImm16 = FullImm{15-0}; + let Shift = FullImm{17-16}; + } +} + +let isMoveImm = 1, isReMaterializable = 1, + isAsCheapAsAMove = 1, hasSideEffects = 0 in { + defm MOVN : A64I_movwSizes<0b00, "movn", + (ins movn32_imm:$FullImm), + (ins movn64_imm:$FullImm)>; + + // Some relocations are able to convert between a MOVZ and a MOVN. If these + // are applied the instruction must be emitted with the corresponding bits as + // 0, which means a MOVZ needs to override that bit from the default. + let PostEncoderMethod = "fixMOVZ" in + defm MOVZ : A64I_movwSizes<0b10, "movz", + (ins movz32_imm:$FullImm), + (ins movz64_imm:$FullImm)>; +} + +let Constraints = "$src = $Rd" in +defm MOVK : A64I_movwSizes<0b11, "movk", + (ins GPR32:$src, movk32_imm:$FullImm), + (ins GPR64:$src, movk64_imm:$FullImm)>; + + +// And now the "MOV" aliases. These also need their own operands because what +// they accept is completely different to what the base instructions accept. +multiclass movalias_operand<string prefix, string basename, + string immpredicate, int width> { + def _asmoperand : AsmOperandClass { + let Name = basename # width # "MovAlias"; + let PredicateMethod + = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">"; + let RenderMethod + = "addMoveWideMovAliasOperands<" # width # ", " + # "A64Imms::" # immpredicate # ">"; + } + + def _movimm : Operand<i32> { + let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); + + let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); + } +} + +defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>; +defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>; +defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>; +defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>; + +// FIXME: these are officially canonical aliases, but TableGen is too limited to +// print them at the moment. I believe in this case an "AliasPredicate" method +// will need to be implemented. to allow it, as well as the more generally +// useful handling of non-register, non-constant operands. +class movalias<Instruction INST, RegisterClass GPR, Operand operand> + : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>; + +def : movalias<MOVZwii, GPR32, movz32_movimm>; +def : movalias<MOVZxii, GPR64, movz64_movimm>; +def : movalias<MOVNwii, GPR32, movn32_movimm>; +def : movalias<MOVNxii, GPR64, movn64_movimm>; + +//===----------------------------------------------------------------------===// +// PC-relative addressing instructions +//===----------------------------------------------------------------------===// +// Contains: ADR, ADRP + +def adr_label : Operand<i64> { + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>"; + + // This label is a 21-bit offset from PC, unscaled + let PrintMethod = "printLabelOperand<21, 1>"; + let ParserMatchClass = label_asmoperand<21, 1>; + let OperandType = "OPERAND_PCREL"; +} + +def adrp_label_asmoperand : AsmOperandClass { + let Name = "AdrpLabel"; + let RenderMethod = "addLabelOperands<21, 4096>"; + let DiagnosticType = "Label"; +} + +def adrp_label : Operand<i64> { + let EncoderMethod = "getAdrpLabelOpValue"; + + // This label is a 21-bit offset from PC, scaled by the page-size: 4096. + let PrintMethod = "printLabelOperand<21, 4096>"; + let ParserMatchClass = adrp_label_asmoperand; + let OperandType = "OPERAND_PCREL"; +} + +let hasSideEffects = 0 in { + def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label), + "adr\t$Rd, $Label", [], NoItinerary>; + + def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label), + "adrp\t$Rd, $Label", [], NoItinerary>; +} + +//===----------------------------------------------------------------------===// +// System instructions +//===----------------------------------------------------------------------===// +// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS +// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL + +// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values. +def uimm3_asmoperand : AsmOperandClass { + let Name = "UImm3"; + let PredicateMethod = "isUImm<3>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm3"; +} + +def uimm3 : Operand<i32> { + let ParserMatchClass = uimm3_asmoperand; +} + +// The HINT alias can accept a simple unsigned 7-bit immediate. +def uimm7_asmoperand : AsmOperandClass { + let Name = "UImm7"; + let PredicateMethod = "isUImm<7>"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "UImm7"; +} + +def uimm7 : Operand<i32> { + let ParserMatchClass = uimm7_asmoperand; +} + +// Multiclass namedimm is defined with the prefetch operands. Most of these fit +// into the NamedImmMapper scheme well: they either accept a named operand or +// any immediate under a particular value (which may be 0, implying no immediate +// is allowed). +defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">; +defm isb : namedimm<"isb", "A64ISB::ISBMapper">; +defm ic : namedimm<"ic", "A64IC::ICMapper">; +defm dc : namedimm<"dc", "A64DC::DCMapper">; +defm at : namedimm<"at", "A64AT::ATMapper">; +defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">; + +// However, MRS and MSR are more complicated for a few reasons: +// * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an +// implementation-defined effect +// * Most registers are shared, but some are read-only or write-only. +// * There is a variant of MSR which accepts the same register name (SPSel), +// but which would have a different encoding. + +// In principle these could be resolved in with more complicated subclasses of +// NamedImmMapper, however that imposes an overhead on other "named +// immediates". Both in concrete terms with virtual tables and in unnecessary +// abstraction. + +// The solution adopted here is to take the MRS/MSR Mappers out of the usual +// hierarchy (they're not derived from NamedImmMapper) and to add logic for +// their special situation. +def mrs_asmoperand : AsmOperandClass { + let Name = "MRS"; + let ParserMethod = "ParseSysRegOperand"; + let DiagnosticType = "MRS"; +} + +def mrs_op : Operand<i32> { + let ParserMatchClass = mrs_asmoperand; + let PrintMethod = "printMRSOperand"; + let DecoderMethod = "DecodeMRSOperand"; +} + +def msr_asmoperand : AsmOperandClass { + let Name = "MSRWithReg"; + + // Note that SPSel is valid for both this and the pstate operands, but with + // different immediate encodings. This is why these operands provide a string + // AArch64Operand rather than an immediate. The overlap is small enough that + // it could be resolved with hackery now, but who can say in future? + let ParserMethod = "ParseSysRegOperand"; + let DiagnosticType = "MSR"; +} + +def msr_op : Operand<i32> { + let ParserMatchClass = msr_asmoperand; + let PrintMethod = "printMSROperand"; + let DecoderMethod = "DecodeMSROperand"; +} + +def pstate_asmoperand : AsmOperandClass { + let Name = "MSRPState"; + // See comment above about parser. + let ParserMethod = "ParseSysRegOperand"; + let DiagnosticType = "MSR"; +} + +def pstate_op : Operand<i32> { + let ParserMatchClass = pstate_asmoperand; + let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>"; + let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>"; +} + +// When <CRn> is specified, an assembler should accept something like "C4", not +// the usual "#4" immediate. +def CRx_asmoperand : AsmOperandClass { + let Name = "CRx"; + let PredicateMethod = "isUImm<4>"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "ParseCRxOperand"; + // Diagnostics are handled in all cases by ParseCRxOperand. +} + +def CRx : Operand<i32> { + let ParserMatchClass = CRx_asmoperand; + let PrintMethod = "printCRxOperand"; +} + + +// Finally, we can start defining the instructions. + +// HINT is straightforward, with a few aliases. +def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7", + [], NoItinerary> { + bits<7> UImm7; + let CRm = UImm7{6-3}; + let Op2 = UImm7{2-0}; + + let Op0 = 0b00; + let Op1 = 0b011; + let CRn = 0b0010; + let Rt = 0b11111; +} + +def : InstAlias<"nop", (HINTi 0)>; +def : InstAlias<"yield", (HINTi 1)>; +def : InstAlias<"wfe", (HINTi 2)>; +def : InstAlias<"wfi", (HINTi 3)>; +def : InstAlias<"sev", (HINTi 4)>; +def : InstAlias<"sevl", (HINTi 5)>; + +// Quite a few instructions then follow a similar pattern of fixing common +// fields in the bitpattern, we'll define a helper-class for them. +class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2, + Operand operand, string asmop> + : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"), + [], NoItinerary> { + let Op0 = op0; + let Op1 = op1; + let CRn = crn; + let Op2 = op2; + let Rt = 0b11111; +} + + +def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">; +def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">; +def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">; +def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">; + +def : InstAlias<"clrex", (CLREXi 0b1111)>; +def : InstAlias<"isb", (ISBi 0b1111)>; + +// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP +// configurations at least. +def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>; + +// Any SYS bitpattern can be represented with a complex and opaque "SYS" +// instruction. +def SYSiccix : A64I_system<0b0, (outs), + (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, + uimm3:$Op2, GPR64:$Rt), + "sys\t$Op1, $CRn, $CRm, $Op2, $Rt", + [], NoItinerary> { + let Op0 = 0b01; +} + +// You can skip the Xt argument whether it makes sense or not for the generic +// SYS instruction. +def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2", + (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>; + + +// But many have aliases, which obviously don't fit into +class SYSalias<dag ins, string asmstring> + : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> { + let isAsmParserOnly = 1; + + bits<14> SysOp; + let Op0 = 0b01; + let Op1 = SysOp{13-11}; + let CRn = SysOp{10-7}; + let CRm = SysOp{6-3}; + let Op2 = SysOp{2-0}; +} + +def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">; + +def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> { + let Rt = 0b11111; +} + +def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">; +def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">; + +def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">; + +def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> { + let Rt = 0b11111; +} + + +def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt), + (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2), + "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2", + [], NoItinerary> { + let Op0 = 0b01; +} + +// The instructions themselves are rather simple for MSR and MRS. +def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt), + "msr\t$SysReg, $Rt", [], NoItinerary> { + bits<16> SysReg; + let Op0 = SysReg{15-14}; + let Op1 = SysReg{13-11}; + let CRn = SysReg{10-7}; + let CRm = SysReg{6-3}; + let Op2 = SysReg{2-0}; +} + +def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg), + "mrs\t$Rt, $SysReg", [], NoItinerary> { + bits<16> SysReg; + let Op0 = SysReg{15-14}; + let Op1 = SysReg{13-11}; + let CRn = SysReg{10-7}; + let CRm = SysReg{6-3}; + let Op2 = SysReg{2-0}; +} + +def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm), + "msr\t$PState, $CRm", [], NoItinerary> { + bits<6> PState; + + let Op0 = 0b00; + let Op1 = PState{5-3}; + let CRn = 0b0100; + let Op2 = PState{2-0}; + let Rt = 0b11111; +} + +//===----------------------------------------------------------------------===// +// Test & branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: TBZ, TBNZ + +// The bit to test is a simple unsigned 6-bit immediate in the X-register +// versions. +def uimm6 : Operand<i64> { + let ParserMatchClass = uimm6_asmoperand; +} + +def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>; + +def tbimm_target : Operand<OtherVT> { + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>"; + + // This label is a 14-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<14, 4>"; + let ParserMatchClass = label_wid14_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>; +def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>; + +// These instructions correspond to patterns involving "and" with a power of +// two, which we need to be able to select. +def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">; +def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">; + +let isBranch = 1, isTerminator = 1 in { + def TBZxii : A64I_TBimm<0b0, (outs), + (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), + "tbz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0), + A64eq, bb:$Label)], + NoItinerary>; + + def TBNZxii : A64I_TBimm<0b1, (outs), + (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), + "tbnz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0), + A64ne, bb:$Label)], + NoItinerary>; + + + // Note, these instructions overlap with the above 64-bit patterns. This is + // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both + // do the same thing and are both permitted assembly. They also both have + // sensible DAG patterns. + def TBZwii : A64I_TBimm<0b0, (outs), + (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), + "tbz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0), + A64eq, bb:$Label)], + NoItinerary> { + let Imm{5} = 0b0; + } + + def TBNZwii : A64I_TBimm<0b1, (outs), + (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), + "tbnz\t$Rt, $Imm, $Label", + [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0), + A64ne, bb:$Label)], + NoItinerary> { + let Imm{5} = 0b0; + } +} + +//===----------------------------------------------------------------------===// +// Unconditional branch (immediate) instructions +//===----------------------------------------------------------------------===// +// Contains: B, BL + +def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>; + +def bimm_target : Operand<OtherVT> { + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>"; + + // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<26, 4>"; + let ParserMatchClass = label_wid26_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +def blimm_target : Operand<i64> { + let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>"; + + // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. + let PrintMethod = "printLabelOperand<26, 4>"; + let ParserMatchClass = label_wid26_scal4_asmoperand; + + let OperandType = "OPERAND_PCREL"; +} + +class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type> + : A64I_Bimm<op, (outs), (ins lbl_type:$Label), + !strconcat(asmop, "\t$Label"), patterns, + NoItinerary>; + +let isBranch = 1 in { + def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> { + let isTerminator = 1; + let isBarrier = 1; + } + + def BLimm : A64I_BimmImpl<0b1, "bl", + [(AArch64Call tglobaladdr:$Label)], blimm_target> { + let isCall = 1; + let Defs = [X30]; + } +} + +def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>; + +//===----------------------------------------------------------------------===// +// Unconditional branch (register) instructions +//===----------------------------------------------------------------------===// +// Contains: BR, BLR, RET, ERET, DRP. + +// Most of the notional opcode fields in the A64I_Breg format are fixed in A64 +// at the moment. +class A64I_BregImpl<bits<4> opc, + dag outs, dag ins, string asmstr, list<dag> patterns, + InstrItinClass itin = NoItinerary> + : A64I_Breg<opc, 0b11111, 0b000000, 0b00000, + outs, ins, asmstr, patterns, itin> { + let isBranch = 1; + let isIndirectBranch = 1; +} + +// Note that these are not marked isCall or isReturn because as far as LLVM is +// concerned they're not. "ret" is just another jump unless it has been selected +// by LLVM as the function's return. + +let isBranch = 1 in { + def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn), + "br\t$Rn", [(brind GPR64:$Rn)]> { + let isBarrier = 1; + let isTerminator = 1; + } + + def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn), + "blr\t$Rn", [(AArch64Call GPR64:$Rn)]> { + let isBarrier = 0; + let isCall = 1; + let Defs = [X30]; + } + + def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn), + "ret\t$Rn", []> { + let isBarrier = 1; + let isTerminator = 1; + let isReturn = 1; + } + + // Create a separate pseudo-instruction for codegen to use so that we don't + // flag x30 as used in every function. It'll be restored before the RET by the + // epilogue if it's legitimately used. + def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; + } + + def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> { + let Rn = 0b11111; + let isBarrier = 1; + let isTerminator = 1; + let isReturn = 1; + } + + def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> { + let Rn = 0b11111; + let isBarrier = 1; + } +} + +def RETAlias : InstAlias<"ret", (RETx X30)>; + + +//===----------------------------------------------------------------------===// +// Address generation patterns +//===----------------------------------------------------------------------===// + +// Primary method of address generation for the small/absolute memory model is +// an ADRP/ADR pair: +// ADRP x0, some_variable +// ADD x0, x0, #:lo12:some_variable +// +// The load/store elision of the ADD is accomplished when selecting +// addressing-modes. This just mops up the cases where that doesn't work and we +// really need an address in some register. + +// This wrapper applies a LO12 modifier to the address. Otherwise we could just +// use the same address. + +class ADRP_ADD<SDNode Wrapper, SDNode addrop> + : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)), + (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>; + +def : ADRP_ADD<A64WrapperSmall, tblockaddress>; +def : ADRP_ADD<A64WrapperSmall, texternalsym>; +def : ADRP_ADD<A64WrapperSmall, tglobaladdr>; +def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>; +def : ADRP_ADD<A64WrapperSmall, tjumptable>; + +//===----------------------------------------------------------------------===// +// GOT access patterns +//===----------------------------------------------------------------------===// + +// FIXME: Wibble + +class GOTLoadSmall<SDNode addrfrag> + : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)), + (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>; + +def : GOTLoadSmall<texternalsym>; +def : GOTLoadSmall<tglobaladdr>; +def : GOTLoadSmall<tglobaltlsaddr>; + +//===----------------------------------------------------------------------===// +// Tail call handling +//===----------------------------------------------------------------------===// + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in { + def TC_RETURNdi + : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff), + [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>; + + def TC_RETURNxi + : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), + [(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff))]>; +} + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + Uses = [XSP] in { + def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [], + (Bimm bimm_target:$Label)>; + + def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [], + (BRx GPR64:$Rd)>; +} + + +def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), + (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>; + +//===----------------------------------------------------------------------===// +// Thread local storage +//===----------------------------------------------------------------------===// + +// This is a pseudo-instruction representing the ".tlsdesccall" directive in +// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the +// current location. It should always be immediately followed by a BLR +// instruction, and is intended solely for relaxation by the linker. + +def : Pat<(A64threadpointer), (MRSxi 0xde82)>; + +def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> { + let hasSideEffects = 1; +} + +def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var), + [(A64tlsdesc_blr GPR64:$Rn, tglobaltlsaddr:$Var)]> { + let isCall = 1; + let Defs = [X30]; +} + +def : Pat<(A64tlsdesc_blr GPR64:$Rn, texternalsym:$Var), + (TLSDESC_BLRx GPR64:$Rn, texternalsym:$Var)>; + +//===----------------------------------------------------------------------===// +// Bitfield patterns +//===----------------------------------------------------------------------===// + +def bfi32_lsb_to_immr : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64); +}]>; + +def bfi64_lsb_to_immr : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64); +}]>; + +def bfi_width_to_imms : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64); +}]>; + + +// The simpler patterns deal with cases where no AND mask is actually needed +// (either all bits are used or the low 32 bits are used). +let AddedComplexity = 10 in { + +def : Pat<(A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS), + (BFIxxii GPR64:$src, GPR64:$Rn, + (bfi64_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS)))>; + +def : Pat<(A64Bfi GPR32:$src, GPR32:$Rn, imm:$ImmR, imm:$ImmS), + (BFIwwii GPR32:$src, GPR32:$Rn, + (bfi32_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS)))>; + + +def : Pat<(and (A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS), + (i64 4294967295)), + (SUBREG_TO_REG (i64 0), + (BFIwwii (EXTRACT_SUBREG GPR64:$src, sub_32), + (EXTRACT_SUBREG GPR64:$Rn, sub_32), + (bfi32_lsb_to_immr (i64 imm:$ImmR)), + (bfi_width_to_imms (i64 imm:$ImmS))), + sub_32)>; + +} + +//===----------------------------------------------------------------------===// +// Miscellaneous patterns +//===----------------------------------------------------------------------===// + +// Truncation from 64 to 32-bits just involves renaming your register. +def : Pat<(i32 (trunc (i64 GPR64:$val))), (EXTRACT_SUBREG GPR64:$val, sub_32)>; + +// Similarly, extension where we don't care about the high bits is +// just a rename. +def : Pat<(i64 (anyext (i32 GPR32:$val))), + (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$val, sub_32)>; + +// SELECT instructions providing f128 types need to be handled by a +// pseudo-instruction since the eventual code will need to introduce basic +// blocks and control flow. +def F128CSEL : PseudoInst<(outs FPR128:$Rd), + (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond), + [(set FPR128:$Rd, (simple_select (f128 FPR128:$Rn), + FPR128:$Rm))]> { + let Uses = [NZCV]; + let usesCustomInserter = 1; +} + +//===----------------------------------------------------------------------===// +// Load/store patterns +//===----------------------------------------------------------------------===// + +// There are lots of patterns here, because we need to allow at least three +// parameters to vary independently. +// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ... +// 2. LLVM source: zextloadi8, anyextloadi8, ... +// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ... +// +// The biggest problem turns out to be the address-generation variable. At the +// point of instantiation we need to produce two DAGs, one for the pattern and +// one for the instruction. Doing this at the lowest level of classes doesn't +// work. +// +// Consider the simple uimm12 addressing mode, and the desire to match both (add +// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the +// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or +// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this +// operation, and PatFrags are for selection not output. +// +// As a result, the address-generation patterns are the final +// instantiations. However, we do still need to vary the operand for the address +// further down (At the point we're deciding A64WrapperSmall, we don't know +// the memory width of the operation). + +//===------------------------------ +// 1. Basic infrastructural defs +//===------------------------------ + +// First, some simple classes for !foreach and !subst to use: +class Decls { + dag pattern; +} + +def decls : Decls; +def ALIGN; +def INST; +def OFFSET; +def SHIFT; + +// You can't use !subst on an actual immediate, but you *can* use it on an +// operand record that happens to match a single immediate. So we do. +def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>; +def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>; +def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>; +def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>; +def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>; + +// If the low bits of a pointer are known to be 0 then an "or" is just as good +// as addition for computing an offset. This fragment forwards that check for +// TableGen's use. +def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs), +[{ + return CurDAG->isBaseWithConstantOffset(SDValue(N, 0)); +}]>; + +// Load/store (unsigned immediate) operations with relocations against global +// symbols (for lo12) are only valid if those symbols have correct alignment +// (since the immediate offset is divided by the access scale, it can't have a +// remainder). +// +// The guaranteed alignment is provided as part of the WrapperSmall +// operation, and checked against one of these. +def any_align : ImmLeaf<i32, [{ (void)Imm; return true; }]>; +def min_align2 : ImmLeaf<i32, [{ return Imm >= 2; }]>; +def min_align4 : ImmLeaf<i32, [{ return Imm >= 4; }]>; +def min_align8 : ImmLeaf<i32, [{ return Imm >= 8; }]>; +def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>; + +// "Normal" load/store instructions can be used on atomic operations, provided +// the ordering parameter is at most "monotonic". Anything above that needs +// special handling with acquire/release instructions. +class simple_load<PatFrag base> + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic; +}]>; + +def atomic_load_simple_i8 : simple_load<atomic_load_8>; +def atomic_load_simple_i16 : simple_load<atomic_load_16>; +def atomic_load_simple_i32 : simple_load<atomic_load_32>; +def atomic_load_simple_i64 : simple_load<atomic_load_64>; + +class simple_store<PatFrag base> + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic; +}]>; + +def atomic_store_simple_i8 : simple_store<atomic_store_8>; +def atomic_store_simple_i16 : simple_store<atomic_store_16>; +def atomic_store_simple_i32 : simple_store<atomic_store_32>; +def atomic_store_simple_i64 : simple_store<atomic_store_64>; + +//===------------------------------ +// 2. UImm12 and SImm9 +//===------------------------------ + +// These instructions have two operands providing the address so they can be +// treated similarly for most purposes. + +//===------------------------------ +// 2.1 Base patterns covering extend/truncate semantics +//===------------------------------ + +// Atomic patterns can be shared between integer operations of all sizes, a +// quick multiclass here allows reuse. +multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag address, RegisterClass TPR, + ValueType sty> { + def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address), + (LOAD Base, Offset)>; + + def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, TPR:$Rt), + (STORE TPR:$Rt, Base, Offset)>; +} + +// Instructions accessing a memory chunk smaller than a register (or, in a +// pinch, the same size) have a characteristic set of patterns they want to +// match: extending loads and truncating stores. This class deals with the +// sign-neutral version of those patterns. +// +// It will be instantiated across multiple addressing-modes. +multiclass ls_small_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, + dag address, ValueType sty> + : ls_atomic_pats<LOAD, STORE, Base, Offset, address, GPR32, sty> { + def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>; + + def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>; + + // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit + // register was actually set. + def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; + + def : Pat<(i64 (!cast<SDNode>(extload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; + + def : Pat<(!cast<SDNode>(truncstore # sty) GPR32:$Rt, address), + (STORE GPR32:$Rt, Base, Offset)>; + + // For truncating store from 64-bits, we have to manually tell LLVM to + // ignore the high bits of the x register. + def : Pat<(!cast<SDNode>(truncstore # sty) GPR64:$Rt, address), + (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>; +} + +// Next come patterns for sign-extending loads. +multiclass load_signed_pats<string T, string U, dag Base, dag Offset, + dag address, ValueType sty> { + def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>; + + def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>; + +} + +// and finally "natural-width" loads and stores come next. +multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag address, RegisterClass TPR, + ValueType sty> { + def : Pat<(sty (load address)), (LOAD Base, Offset)>; + def : Pat<(store (sty TPR:$Rt), address), (STORE TPR:$Rt, Base, Offset)>; +} + +// Integer operations also get atomic instructions to select for. +multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag address, RegisterClass TPR, + ValueType sty> + : ls_neutral_pats<LOAD, STORE, Base, Offset, address, TPR, sty>, + ls_atomic_pats<LOAD, STORE, Base, Offset, address, TPR, sty>; + +//===------------------------------ +// 2.2. Addressing-mode instantiations +//===------------------------------ + +multiclass uimm12_pats<dag address, dag Base, dag Offset> { + defm : ls_small_pats<LS8_LDR, LS8_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, byte_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, byte_uimm12, + !subst(ALIGN, any_align, decls.pattern))), + i8>; + defm : ls_small_pats<LS16_LDR, LS16_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, hword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, hword_uimm12, + !subst(ALIGN, min_align2, decls.pattern))), + i16>; + defm : ls_small_pats<LS32_LDR, LS32_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern))), + i32>; + + defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern))), + GPR32, i32>; + + defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, dword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, dword_uimm12, + !subst(ALIGN, min_align8, decls.pattern))), + GPR64, i64>; + + defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, hword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, hword_uimm12, + !subst(ALIGN, min_align2, decls.pattern))), + FPR16, f16>; + + defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern))), + FPR32, f32>; + + defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, dword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, dword_uimm12, + !subst(ALIGN, min_align8, decls.pattern))), + FPR64, f64>; + + defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, qword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, qword_uimm12, + !subst(ALIGN, min_align16, decls.pattern))), + FPR128, f128>; + + defm : load_signed_pats<"B", "", Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, byte_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, byte_uimm12, + !subst(ALIGN, any_align, decls.pattern))), + i8>; + + defm : load_signed_pats<"H", "", Base, + !foreach(decls.pattern, Offset, + !subst(OFFSET, hword_uimm12, decls.pattern)), + !foreach(decls.pattern, address, + !subst(OFFSET, hword_uimm12, + !subst(ALIGN, min_align2, decls.pattern))), + i16>; + + def : Pat<(sextloadi32 !foreach(decls.pattern, address, + !subst(OFFSET, word_uimm12, + !subst(ALIGN, min_align4, decls.pattern)))), + (LDRSWx Base, !foreach(decls.pattern, Offset, + !subst(OFFSET, word_uimm12, decls.pattern)))>; +} + +// Straightforward patterns of last resort: a pointer with or without an +// appropriate offset. +defm : uimm12_pats<(i64 GPR64xsp:$Rn), (i64 GPR64xsp:$Rn), (i64 0)>; +defm : uimm12_pats<(add GPR64xsp:$Rn, OFFSET:$UImm12), + (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>; + +// The offset could be hidden behind an "or", of course: +defm : uimm12_pats<(add_like_or GPR64xsp:$Rn, OFFSET:$UImm12), + (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>; + +// Global addresses under the small-absolute model should use these +// instructions. There are ELF relocations specifically for it. +defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN), + (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>; + +defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12, + ALIGN), + (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>; + +// External symbols that make it this far should also get standard relocations. +defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, + ALIGN), + (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>; + +defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN), + (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; + +// We also want to use uimm12 instructions for local variables at the moment. +def tframeindex_XFORM : SDNodeXForm<frameindex, [{ + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + return CurDAG->getTargetFrameIndex(FI, MVT::i64); +}]>; + +defm : uimm12_pats<(i64 frameindex:$Rn), + (tframeindex_XFORM tframeindex:$Rn), (i64 0)>; + +// These can be much simpler than uimm12 because we don't to change the operand +// type (e.g. LDURB and LDURH take the same operands). +multiclass simm9_pats<dag address, dag Base, dag Offset> { + defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>; + defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>; + + defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, + GPR32, i32>; + defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, + GPR64, i64>; + + defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, + FPR16, f16>; + defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, + FPR32, f32>; + defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, + FPR64, f64>; + defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address, + FPR128, f128>; + + def : Pat<(i64 (zextloadi32 address)), + (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>; + + def : Pat<(truncstorei32 GPR64:$Rt, address), + (LS32_STUR (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>; + + defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>; + defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>; + def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>; +} + +defm : simm9_pats<(add GPR64xsp:$Rn, simm9:$SImm9), + (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>; + +defm : simm9_pats<(add_like_or GPR64xsp:$Rn, simm9:$SImm9), + (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>; + + +//===------------------------------ +// 3. Register offset patterns +//===------------------------------ + +// Atomic patterns can be shared between integer operations of all sizes, a +// quick multiclass here allows reuse. +multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base, + dag Offset, dag Extend, dag address, + RegisterClass TPR, ValueType sty> { + def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address), + (LOAD Base, Offset, Extend)>; + + def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, TPR:$Rt), + (STORE TPR:$Rt, Base, Offset, Extend)>; +} + +// The register offset instructions take three operands giving the instruction, +// and have an annoying split between instructions where Rm is 32-bit and +// 64-bit. So we need a special hierarchy to describe them. Other than that the +// same operations should be supported as for simm9 and uimm12 addressing. + +multiclass ro_small_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, dag Extend, + dag address, ValueType sty> + : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, GPR32, sty> { + def : Pat<(!cast<SDNode>(zextload # sty) address), + (LOAD Base, Offset, Extend)>; + + def : Pat<(!cast<SDNode>(extload # sty) address), + (LOAD Base, Offset, Extend)>; + + // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit + // register was actually set. + def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; + + def : Pat<(i64 (!cast<SDNode>(extload # sty) address)), + (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; + + def : Pat<(!cast<SDNode>(truncstore # sty) GPR32:$Rt, address), + (STORE GPR32:$Rt, Base, Offset, Extend)>; + + // For truncating store from 64-bits, we have to manually tell LLVM to + // ignore the high bits of the x register. + def : Pat<(!cast<SDNode>(truncstore # sty) GPR64:$Rt, address), + (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset, Extend)>; + +} + +// Next come patterns for sign-extending loads. +multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend, + dag address, ValueType sty> { + def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset") + Base, Offset, Extend)>; + + def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)), + (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset") + Base, Offset, Extend)>; +} + +// and finally "natural-width" loads and stores come next. +multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, dag Extend, dag address, + RegisterClass TPR, ValueType sty> { + def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>; + def : Pat<(store (sty TPR:$Rt), address), + (STORE TPR:$Rt, Base, Offset, Extend)>; +} + +multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE, + dag Base, dag Offset, dag Extend, dag address, + RegisterClass TPR, ValueType sty> + : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, TPR, sty>, + ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, TPR, sty>; + +multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset, + dag Extend> { + defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq0, decls.pattern)), + i8>; + defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + i16>; + defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + i32>; + + defm : ro_int_neutral_pats< + !cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + GPR32, i32>; + + defm : ro_int_neutral_pats< + !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq3, decls.pattern)), + GPR64, i64>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + FPR16, f16>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern)), + FPR32, f32>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq3, decls.pattern)), + FPR64, f64>; + + defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"), + !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"), + Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq4, decls.pattern)), + FPR128, f128>; + + defm : ro_signed_pats<"B", Rm, Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq0, decls.pattern)), + i8>; + + defm : ro_signed_pats<"H", Rm, Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq1, decls.pattern)), + i16>; + + def : Pat<(sextloadi32 !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq2, decls.pattern))), + (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset") + Base, Offset, Extend)>; +} + + +// Finally we're in a position to tell LLVM exactly what addresses are reachable +// using register-offset instructions. Essentially a base plus a possibly +// extended, possibly shifted (by access size) offset. + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (sext GPR32:$Rm)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 6)>; + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (sext GPR32:$Rm), SHIFT)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 7)>; + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (zext GPR32:$Rm)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 2)>; + +defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (zext GPR32:$Rm), SHIFT)), + (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 3)>; + +defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, GPR64:$Rm), + (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 2)>; + +defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, (shl GPR64:$Rm, SHIFT)), + (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 3)>; diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp new file mode 100644 index 0000000..c96bf85 --- /dev/null +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -0,0 +1,140 @@ +//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower AArch64 MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "AArch64AsmPrinter.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +MCOperand +AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO, + const MCSymbol *Sym) const { + const MCExpr *Expr = 0; + + Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext); + + switch (MO.getTargetFlags()) { + case AArch64II::MO_GOT: + Expr = AArch64MCExpr::CreateGOT(Expr, OutContext); + break; + case AArch64II::MO_GOT_LO12: + Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext); + break; + case AArch64II::MO_LO12: + Expr = AArch64MCExpr::CreateLo12(Expr, OutContext); + break; + case AArch64II::MO_DTPREL_G1: + Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext); + break; + case AArch64II::MO_DTPREL_G0_NC: + Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext); + break; + case AArch64II::MO_GOTTPREL: + Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext); + break; + case AArch64II::MO_GOTTPREL_LO12: + Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext); + break; + case AArch64II::MO_TLSDESC: + Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext); + break; + case AArch64II::MO_TLSDESC_LO12: + Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext); + break; + case AArch64II::MO_TPREL_G1: + Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext); + break; + case AArch64II::MO_TPREL_G0_NC: + Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext); + break; + case AArch64II::MO_NO_FLAG: + // Expr is already correct + break; + default: + llvm_unreachable("Unexpected MachineOperand flag"); + } + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), + OutContext), + OutContext); + + return MCOperand::CreateExpr(Expr); +} + +bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO, + MCOperand &MCOp) const { + switch (MO.getType()) { + default: llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + if (MO.isImplicit()) + return false; + assert(!MO.getSubReg() && "Subregs should be eliminated!"); + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_BlockAddress: + MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress())); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal())); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), OutContext)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex())); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex())); + break; + case MachineOperand::MO_RegisterMask: + // Ignore call clobbers + return false; + + } + + return true; +} + +void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, + MCInst &OutMI, + AArch64AsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + if (AP.lowerOperand(MO, MCOp)) + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp new file mode 100644 index 0000000..f45d8f7 --- /dev/null +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp @@ -0,0 +1,18 @@ +//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file just contains the anchor for the AArch64MachineFunctionInfo to +// force vtable emission. +// +//===----------------------------------------------------------------------===// +#include "AArch64MachineFunctionInfo.h" + +using namespace llvm; + +void AArch64MachineFunctionInfo::anchor() { } diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h new file mode 100644 index 0000000..33da54f --- /dev/null +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -0,0 +1,149 @@ +//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares AArch64-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef AARCH64MACHINEFUNCTIONINFO_H +#define AARCH64MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// This class is derived from MachineFunctionInfo and contains private AArch64 +/// target-specific information for each MachineFunction. +class AArch64MachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + + /// Number of bytes of arguments this function has on the stack. If the callee + /// is expected to restore the argument stack this should be a multiple of 16, + /// all usable during a tail call. + /// + /// The alternative would forbid tail call optimisation in some cases: if we + /// want to transfer control from a function with 8-bytes of stack-argument + /// space to a function with 16-bytes then misalignment of this value would + /// make a stack adjustment necessary, which could not be undone by the + /// callee. + unsigned BytesInStackArgArea; + + /// The number of bytes to restore to deallocate space for incoming + /// arguments. Canonically 0 in the C calling convention, but non-zero when + /// callee is expected to pop the args. + unsigned ArgumentStackToRestore; + + /// If the stack needs to be adjusted on frame entry in two stages, this + /// records the size of the first adjustment just prior to storing + /// callee-saved registers. The callee-saved slots are addressed assuming + /// SP == <incoming-SP> - InitialStackAdjust. + unsigned InitialStackAdjust; + + /// Number of local-dynamic TLS accesses. + unsigned NumLocalDynamics; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of the area where LowerFormalArguments puts the + /// general-purpose registers that might contain variadic parameters. + int VariadicGPRIdx; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The size of the frame object used to store the general-purpose registers + /// which might contain variadic arguments. This is the offset from + /// VariadicGPRIdx to what's stored in __gr_top. + unsigned VariadicGPRSize; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of the area where LowerFormalArguments puts the + /// floating-point registers that might contain variadic parameters. + int VariadicFPRIdx; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The size of the frame object used to store the floating-point registers + /// which might contain variadic arguments. This is the offset from + /// VariadicFPRIdx to what's stored in __vr_top. + unsigned VariadicFPRSize; + + /// @see AArch64 Procedure Call Standard, B.3 + /// + /// The Frame index of an object pointing just past the last known stacked + /// argument on entry to a variadic function. This goes into the __stack field + /// of the va_list type. + int VariadicStackIdx; + + /// The offset of the frame pointer from the stack pointer on function + /// entry. This is expected to be negative. + int FramePointerOffset; + +public: + AArch64MachineFunctionInfo() + : BytesInStackArgArea(0), + ArgumentStackToRestore(0), + InitialStackAdjust(0), + NumLocalDynamics(0), + VariadicGPRIdx(0), + VariadicGPRSize(0), + VariadicFPRIdx(0), + VariadicFPRSize(0), + VariadicStackIdx(0), + FramePointerOffset(0) {} + + explicit AArch64MachineFunctionInfo(MachineFunction &MF) + : BytesInStackArgArea(0), + ArgumentStackToRestore(0), + InitialStackAdjust(0), + NumLocalDynamics(0), + VariadicGPRIdx(0), + VariadicGPRSize(0), + VariadicFPRIdx(0), + VariadicFPRSize(0), + VariadicStackIdx(0), + FramePointerOffset(0) {} + + unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } + void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;} + + unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; } + void setArgumentStackToRestore(unsigned bytes) { + ArgumentStackToRestore = bytes; + } + + unsigned getInitialStackAdjust() const { return InitialStackAdjust; } + void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; } + + unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + + int getVariadicGPRIdx() const { return VariadicGPRIdx; } + void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; } + + unsigned getVariadicGPRSize() const { return VariadicGPRSize; } + void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; } + + int getVariadicFPRIdx() const { return VariadicFPRIdx; } + void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; } + + unsigned getVariadicFPRSize() const { return VariadicFPRSize; } + void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; } + + int getVariadicStackIdx() const { return VariadicStackIdx; } + void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; } + + int getFramePointerOffset() const { return FramePointerOffset; } + void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; } + +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp new file mode 100644 index 0000000..20b0dcf --- /dev/null +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -0,0 +1,171 @@ +//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + + +#include "AArch64RegisterInfo.h" +#include "AArch64FrameLowering.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/ADT/BitVector.h" + +#define GET_REGINFO_TARGET_DESC +#include "AArch64GenRegisterInfo.inc" + +using namespace llvm; + +AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii, + const AArch64Subtarget &sti) + : AArch64GenRegisterInfo(AArch64::X30), TII(tii) { +} + +const uint16_t * +AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + return CSR_PCS_SaveList; +} + +const uint32_t* +AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const { + return CSR_PCS_RegMask; +} + +const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const { + return TLSDesc_RegMask; +} + +const TargetRegisterClass * +AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &AArch64::FlagClassRegClass) + return &AArch64::GPR64RegClass; + + return RC; +} + + + +BitVector +AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + Reserved.set(AArch64::XSP); + Reserved.set(AArch64::WSP); + + Reserved.set(AArch64::XZR); + Reserved.set(AArch64::WZR); + + if (TFI->hasFP(MF)) { + Reserved.set(AArch64::X29); + Reserved.set(AArch64::W29); + } + + return Reserved; +} + +void +AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, + int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet"); + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64FrameLowering *TFI = + static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering()); + + // In order to work out the base and offset for addressing, the FrameLowering + // code needs to know (sometimes) whether the instruction is storing/loading a + // callee-saved register, or whether it's a more generic + // operation. Fortunately the frame indices are used *only* for that purpose + // and are contiguous, so we can check here. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI; + + unsigned FrameReg; + int64_t Offset; + Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj, + IsCalleeSaveOp); + + Offset += MI.getOperand(FIOperandNum + 1).getImm(); + + // DBG_VALUE instructions have no real restrictions so they can be handled + // easily. + if (MI.isDebugValue()) { + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return; + } + + int MinOffset, MaxOffset, OffsetScale; + if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) { + MinOffset = 0; + MaxOffset = 0xfff; + OffsetScale = 1; + } else { + // Load/store of a stack object + TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset); + } + + // The frame lowering has told us a base and offset it thinks we should use to + // access this variable, but it's still up to us to make sure the values are + // legal for the instruction in question. + if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) { + unsigned BaseReg = + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII, + BaseReg, FrameReg, BaseReg, Offset); + FrameReg = BaseReg; + Offset = 0; + } + + // Negative offsets are expected if we address from FP, but for + // now this checks nothing has gone horribly wrong. + assert(Offset >= 0 && "Unexpected negative offset from SP"); + + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale); +} + +unsigned +AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (TFI->hasFP(MF)) + return AArch64::X29; + else + return AArch64::XSP; +} + +bool +AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const AArch64FrameLowering *AFI + = static_cast<const AArch64FrameLowering*>(TFI); + return AFI->useFPForAddressing(MF); +} diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h new file mode 100644 index 0000000..bb64fd5 --- /dev/null +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -0,0 +1,76 @@ +//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the MCRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H +#define LLVM_TARGET_AARCH64REGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "AArch64GenRegisterInfo.inc" + +namespace llvm { + +class AArch64InstrInfo; +class AArch64Subtarget; + +struct AArch64RegisterInfo : public AArch64GenRegisterInfo { +private: + const AArch64InstrInfo &TII; + +public: + AArch64RegisterInfo(const AArch64InstrInfo &tii, + const AArch64Subtarget &sti); + + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint32_t *getCallPreservedMask(CallingConv::ID) const; + + const uint32_t *getTLSDescCallPreservedMask() const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, + RegScavenger *Rs = NULL) const; + + /// getCrossCopyRegClass - Returns a legal register class to copy a register + /// in the specified class to or from. Returns original class if it is + /// possible to copy between a two registers of the specified class. + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const; + + /// getLargestLegalSuperClass - Returns the largest super class of RC that is + /// legal to use in the current sub-target and has the same spill size. + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const { + if (RC == &AArch64::tcGPR64RegClass) + return &AArch64::GPR64RegClass; + + return RC; + } + + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } + + bool useFPForScavengingIndex(const MachineFunction &MF) const; +}; + +} // end namespace llvm + +#endif // LLVM_TARGET_AARCH64REGISTERINFO_H diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td new file mode 100644 index 0000000..bd79546 --- /dev/null +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -0,0 +1,203 @@ +//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains declarations that describe the AArch64 register file +// +//===----------------------------------------------------------------------===// + +let Namespace = "AArch64" in { +def sub_128 : SubRegIndex; +def sub_64 : SubRegIndex; +def sub_32 : SubRegIndex; +def sub_16 : SubRegIndex; +def sub_8 : SubRegIndex; + +// The VPR registers are handled as sub-registers of FPR equivalents, but +// they're really the same thing. We give this concept a special index. +def sub_alias : SubRegIndex; +} + +// Registers are identified with 5-bit ID numbers. +class AArch64Reg<bits<16> enc, string n> : Register<n> { + let HWEncoding = enc; + let Namespace = "AArch64"; +} + +class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [], + list<SubRegIndex> inds = []> + : AArch64Reg<enc, n> { + let SubRegs = subregs; + let SubRegIndices = inds; +} + +//===----------------------------------------------------------------------===// +// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp +//===----------------------------------------------------------------------===// + +foreach Index = 0-30 in { + def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>; +} + +def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; +def WZR : AArch64Reg<31, "wzr">; + +// Could be combined with previous loop, but this way leaves w and x registers +// consecutive as LLVM register numbers, which makes for easier debugging. +foreach Index = 0-30 in { + def X#Index : AArch64RegWithSubs<Index, "x"#Index, + [!cast<Register>("W"#Index)], [sub_32]>, + DwarfRegNum<[Index]>; +} + +def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>; +def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>; + +// Most instructions treat register 31 as zero for reads and a black-hole for +// writes. + +// Note that the order of registers is important for the Disassembler here: +// tablegen uses it to form MCRegisterClass::getRegister, which we assume can +// take an encoding value. +def GPR32 : RegisterClass<"AArch64", [i32], 32, + (add (sequence "W%u", 0, 30), WZR)> { +} + +def GPR64 : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 30), XZR)> { +} + +def GPR32nowzr : RegisterClass<"AArch64", [i32], 32, + (sequence "W%u", 0, 30)> { +} + +def GPR64noxzr : RegisterClass<"AArch64", [i64], 64, + (sequence "X%u", 0, 30)> { +} + +// For tail calls, we can't use callee-saved registers or the structure-return +// register, as they are supposed to be live across function calls and may be +// clobbered by the epilogue. +def tcGPR64 : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 7), + (sequence "X%u", 9, 18))> { +} + + +// Certain addressing-useful instructions accept sp directly. Again the order of +// registers is important to the Disassembler. +def GPR32wsp : RegisterClass<"AArch64", [i32], 32, + (add (sequence "W%u", 0, 30), WSP)> { +} + +def GPR64xsp : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 30), XSP)> { +} + +// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and +// non-SP variants). We can't use a bare register in those patterns because +// TableGen doesn't like it, so we need a class containing just stack registers +def Rxsp : RegisterClass<"AArch64", [i64], 64, + (add XSP)> { +} + +def Rwsp : RegisterClass<"AArch64", [i32], 32, + (add WSP)> { +} + +//===----------------------------------------------------------------------===// +// Scalar registers in the vector unit: +// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31 +//===----------------------------------------------------------------------===// + +foreach Index = 0-31 in { + def B # Index : AArch64Reg< Index, "b" # Index>, + DwarfRegNum<[!add(Index, 64)]>; + + def H # Index : AArch64RegWithSubs<Index, "h" # Index, + [!cast<Register>("B" # Index)], [sub_8]>, + DwarfRegNum<[!add(Index, 64)]>; + + def S # Index : AArch64RegWithSubs<Index, "s" # Index, + [!cast<Register>("H" # Index)], [sub_16]>, + DwarfRegNum<[!add(Index, 64)]>; + + def D # Index : AArch64RegWithSubs<Index, "d" # Index, + [!cast<Register>("S" # Index)], [sub_32]>, + DwarfRegNum<[!add(Index, 64)]>; + + def Q # Index : AArch64RegWithSubs<Index, "q" # Index, + [!cast<Register>("D" # Index)], [sub_64]>, + DwarfRegNum<[!add(Index, 64)]>; +} + + +def FPR8 : RegisterClass<"AArch64", [i8], 8, + (sequence "B%u", 0, 31)> { +} + +def FPR16 : RegisterClass<"AArch64", [f16], 16, + (sequence "H%u", 0, 31)> { +} + +def FPR32 : RegisterClass<"AArch64", [f32], 32, + (sequence "S%u", 0, 31)> { +} + +def FPR64 : RegisterClass<"AArch64", [f64], 64, + (sequence "D%u", 0, 31)> { +} + +def FPR128 : RegisterClass<"AArch64", [f128], 128, + (sequence "Q%u", 0, 31)> { +} + + +//===----------------------------------------------------------------------===// +// Vector registers: +//===----------------------------------------------------------------------===// + +// NEON registers simply specify the overall vector, and it's expected that +// Instructions will individually specify the acceptable data layout. In +// principle this leaves two approaches open: +// + An operand, giving a single ADDvvv instruction (for example). This turns +// out to be unworkable in the assembly parser (without every Instruction +// having a "cvt" function, at least) because the constraints can't be +// properly enforced. It also complicates specifying patterns since each +// instruction will accept many types. +// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific +// details about NEON registers, but simplifies most other details. +// +// The second approach was taken. + +foreach Index = 0-31 in { + def V # Index : AArch64RegWithSubs<Index, "v" # Index, + [!cast<Register>("Q" # Index)], + [sub_alias]>, + DwarfRegNum<[!add(Index, 64)]>; +} + +// These two classes contain the same registers, which should be reasonably +// sensible for MC and allocation purposes, but allows them to be treated +// separately for things like stack spilling. +def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64, + (sequence "V%u", 0, 31)>; + +def VPR128 : RegisterClass<"AArch64", + [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128, + (sequence "V%u", 0, 31)>; + +// Flags register +def NZCV : Register<"nzcv"> { + let Namespace = "AArch64"; +} + +def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { + let CopyCost = -1; + let isAllocatable = 0; +} diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td new file mode 100644 index 0000000..e17cdaa --- /dev/null +++ b/lib/Target/AArch64/AArch64Schedule.td @@ -0,0 +1,10 @@ +//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def GenericItineraries : ProcessorItineraries<[], [], []>; diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp new file mode 100644 index 0000000..6bbe075 --- /dev/null +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -0,0 +1,25 @@ +//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-selectiondag-info" +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<AArch64Subtarget>()) { +} + +AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() { +} diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h new file mode 100644 index 0000000..d412ed2 --- /dev/null +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -0,0 +1,32 @@ +//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AArch64 subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64SELECTIONDAGINFO_H +#define LLVM_AARCH64SELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class AArch64TargetMachine; + +class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { + const AArch64Subtarget *Subtarget; +public: + explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM); + ~AArch64SelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp new file mode 100644 index 0000000..d17b738 --- /dev/null +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -0,0 +1,43 @@ +//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64 specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "AArch64Subtarget.h" +#include "AArch64RegisterInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallVector.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "AArch64GenSubtargetInfo.inc" + +using namespace llvm; + +AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS) + : AArch64GenSubtargetInfo(TT, CPU, FS) + , HasNEON(true) + , HasCrypto(true) + , TargetTriple(TT) { + + ParseSubtargetFeatures(CPU, FS); +} + +bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV, + Reloc::Model RelocM) const { + if (RelocM == Reloc::Static) + return false; + + return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility(); +} diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h new file mode 100644 index 0000000..2e9205f --- /dev/null +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -0,0 +1,54 @@ +//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the AArch64 specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H +#define LLVM_TARGET_AARCH64_SUBTARGET_H + +#include "llvm/ADT/Triple.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "AArch64GenSubtargetInfo.inc" + +#include <string> + +namespace llvm { +class StringRef; +class GlobalValue; + +class AArch64Subtarget : public AArch64GenSubtargetInfo { +protected: + bool HasNEON; + bool HasCrypto; + + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; +public: + /// This constructor initializes the data members to match that + /// of the specified triple. + /// + AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; + + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } + +}; +} // End llvm namespace + +#endif // LLVM_TARGET_AARCH64_SUBTARGET_H diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp new file mode 100644 index 0000000..df599d5 --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -0,0 +1,81 @@ +//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the AArch64TargetMachine +// methods. Principally just setting up the passes needed to generate correct +// code on this architecture. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +extern "C" void LLVMInitializeAArch64Target() { + RegisterTargetMachine<AArch64TargetMachine> X(TheAArch64Target); +} + +AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS), + InstrInfo(Subtarget), + DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"), + TLInfo(*this), + TSInfo(*this), + FrameLowering(Subtarget) { +} + +namespace { +/// AArch64 Code Generator Pass Configuration Options. +class AArch64PassConfig : public TargetPassConfig { +public: + AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + AArch64TargetMachine &getAArch64TargetMachine() const { + return getTM<AArch64TargetMachine>(); + } + + const AArch64Subtarget &getAArch64Subtarget() const { + return *getAArch64TargetMachine().getSubtargetImpl(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { + return new AArch64PassConfig(this, PM); +} + +bool AArch64PassConfig::addPreEmitPass() { + addPass(&UnpackMachineBundlesID); + addPass(createAArch64BranchFixupPass()); + return true; +} + +bool AArch64PassConfig::addInstSelector() { + addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel())); + + // For ELF, cleanup any local-dynamic TLS accesses. + if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) + addPass(createAArch64CleanupLocalDynamicTLSPass()); + + return false; +} diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h new file mode 100644 index 0000000..c1f47c2 --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -0,0 +1,69 @@ +//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the AArch64 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64TARGETMACHINE_H +#define LLVM_AARCH64TARGETMACHINE_H + +#include "AArch64FrameLowering.h" +#include "AArch64ISelLowering.h" +#include "AArch64InstrInfo.h" +#include "AArch64SelectionDAGInfo.h" +#include "AArch64Subtarget.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class AArch64TargetMachine : public LLVMTargetMachine { + AArch64Subtarget Subtarget; + AArch64InstrInfo InstrInfo; + const DataLayout DL; + AArch64TargetLowering TLInfo; + AArch64SelectionDAGInfo TSInfo; + AArch64FrameLowering FrameLowering; + +public: + AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + const AArch64InstrInfo *getInstrInfo() const { + return &InstrInfo; + } + + const AArch64FrameLowering *getFrameLowering() const { + return &FrameLowering; + } + + const AArch64TargetLowering *getTargetLowering() const { + return &TLInfo; + } + + const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + + const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; } + + const DataLayout *getDataLayout() const { return &DL; } + + const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + TargetPassConfig *createPassConfig(PassManagerBase &PM); +}; + +} + +#endif diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp new file mode 100644 index 0000000..b4452f5 --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -0,0 +1,24 @@ +//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file deals with any AArch64 specific requirements on object files. +// +//===----------------------------------------------------------------------===// + + +#include "AArch64TargetObjectFile.h" + +using namespace llvm; + +void +AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h new file mode 100644 index 0000000..bf0565a --- /dev/null +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -0,0 +1,31 @@ +//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file deals with any AArch64 specific requirements on object files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H +#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + + /// AArch64LinuxTargetObjectFile - This implementation is used for linux + /// AArch64. + class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp new file mode 100644 index 0000000..c1695da --- /dev/null +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -0,0 +1,2188 @@ +//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the (GNU-style) assembly parser for the AArch64 +// architecture. +// +//===----------------------------------------------------------------------===// + + +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +namespace { + +class AArch64Operand; + +class AArch64AsmParser : public MCTargetAsmParser { + MCSubtargetInfo &STI; + MCAsmParser &Parser; + +#define GET_ASSEMBLER_HEADER +#include "AArch64GenAsmMatcher.inc" + +public: + enum AArch64MatchResultTy { + Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "AArch64GenAsmMatcher.inc" + }; + + AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + MCAsmParserExtension::Initialize(_Parser); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + + // These are the public interface of the MCTargetAsmParser + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + bool ParseDirective(AsmToken DirectiveID); + bool ParseDirectiveTLSDescCall(SMLoc L); + bool ParseDirectiveWord(unsigned Size, SMLoc L); + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer&Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); + + // The rest of the sub-parsers have more freedom over interface: they return + // an OperandMatchResultTy because it's less ambiguous than true/false or + // -1/0/1 even if it is more verbose + OperandMatchResultTy + ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic); + + OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal); + + OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind); + + OperandMatchResultTy + ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t NumLanes); + + OperandMatchResultTy + ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t &NumLanes); + + OperandMatchResultTy + ParseImmWithLSLOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseCondCodeOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseCRxOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseFPImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + template<typename SomeNamedImmMapper> OperandMatchResultTy + ParseNamedImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return ParseNamedImmOperand(SomeNamedImmMapper(), Operands); + } + + OperandMatchResultTy + ParseNamedImmOperand(const NamedImmMapper &Mapper, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseLSXAddressOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseShiftExtend(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + OperandMatchResultTy + ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + bool validateInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + /// Scan the next token (which had better be an identifier) and determine + /// whether it represents a general-purpose or vector register. It returns + /// true if an identifier was found and populates its reference arguments. It + /// does not consume the token. + bool + IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec, + SMLoc &LayoutLoc) const; + +}; + +} + +namespace { + +/// Instances of this class represent a parsed AArch64 machine instruction. +class AArch64Operand : public MCParsedAsmOperand { +private: + enum KindTy { + k_ImmWithLSL, // #uimm {, LSL #amt } + k_CondCode, // eq/ne/... + k_FPImmediate, // Limited-precision floating-point imm + k_Immediate, // Including expressions referencing symbols + k_Register, + k_ShiftExtend, + k_SysReg, // The register operand of MRS and MSR instructions + k_Token, // The mnemonic; other raw tokens the auto-generated + k_WrappedRegister // Load/store exclusive permit a wrapped register. + } Kind; + + SMLoc StartLoc, EndLoc; + + union { + struct { + const MCExpr *Val; + unsigned ShiftAmount; + bool ImplicitAmount; + } ImmWithLSL; + + struct { + A64CC::CondCodes Code; + } CondCode; + + struct { + double Val; + } FPImm; + + struct { + const MCExpr *Val; + } Imm; + + struct { + unsigned RegNum; + } Reg; + + struct { + A64SE::ShiftExtSpecifiers ShiftType; + unsigned Amount; + bool ImplicitAmount; + } ShiftExtend; + + struct { + const char *Data; + unsigned Length; + } SysReg; + + struct { + const char *Data; + unsigned Length; + } Tok; + }; + + AArch64Operand(KindTy K, SMLoc S, SMLoc E) + : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {} + +public: + AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() { + } + + SMLoc getStartLoc() const { return StartLoc; } + SMLoc getEndLoc() const { return EndLoc; } + void print(raw_ostream&) const; + void dump() const; + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const { + assert((Kind == k_Register || Kind == k_WrappedRegister) + && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert(Kind == k_Immediate && "Invalid access!"); + return Imm.Val; + } + + A64CC::CondCodes getCondCode() const { + assert(Kind == k_CondCode && "Invalid access!"); + return CondCode.Code; + } + + static bool isNonConstantExpr(const MCExpr *E, + AArch64MCExpr::VariantKind &Variant) { + if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) { + Variant = A64E->getKind(); + return true; + } else if (!isa<MCConstantExpr>(E)) { + Variant = AArch64MCExpr::VK_AARCH64_None; + return true; + } + + return false; + } + + bool isCondCode() const { return Kind == k_CondCode; } + bool isToken() const { return Kind == k_Token; } + bool isReg() const { return Kind == k_Register; } + bool isImm() const { return Kind == k_Immediate; } + bool isMem() const { return false; } + bool isFPImm() const { return Kind == k_FPImmediate; } + bool isShiftOrExtend() const { return Kind == k_ShiftExtend; } + bool isSysReg() const { return Kind == k_SysReg; } + bool isImmWithLSL() const { return Kind == k_ImmWithLSL; } + bool isWrappedReg() const { return Kind == k_WrappedRegister; } + + bool isAddSubImmLSL0() const { + if (!isImmWithLSL()) return false; + if (ImmWithLSL.ShiftAmount != 0) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12 + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC + || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12; + } + + // Otherwise it should be a real immediate in range: + const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val); + return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + } + + bool isAddSubImmLSL12() const { + if (!isImmWithLSL()) return false; + if (ImmWithLSL.ShiftAmount != 12) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12 + || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12; + } + + // Otherwise it should be a real immediate in range: + const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val); + return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + } + + template<unsigned MemSize, unsigned RmSize> bool isAddrRegExtend() const { + if (!isShiftOrExtend()) return false; + + A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType; + if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW)) + return false; + + if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX)) + return false; + + return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0; + } + + bool isAdrpLabel() const { + if (!isImm()) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(getImm(), Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_None + || Variant == AArch64MCExpr::VK_AARCH64_GOT + || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL + || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC; + } + + return isLabel<21, 4096>(); + } + + template<unsigned RegWidth> bool isBitfieldWidth() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= 1 && CE->getValue() <= RegWidth; + } + + template<int RegWidth> + bool isCVTFixedPos() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= 1 && CE->getValue() <= RegWidth; + } + + bool isFMOVImm() const { + if (!isFPImm()) return false; + + APFloat RealVal(FPImm.Val); + uint32_t ImmVal; + return A64Imms::isFPImm(RealVal, ImmVal); + } + + bool isFPZero() const { + if (!isFPImm()) return false; + + APFloat RealVal(FPImm.Val); + return RealVal.isPosZero(); + } + + template<unsigned field_width, unsigned scale> + bool isLabel() const { + if (!isImm()) return false; + + if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) { + return true; + } else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Min = - (scale * (1LL << (field_width - 1))); + int64_t Max = scale * ((1LL << (field_width - 1)) - 1); + return (Val % scale) == 0 && Val >= Min && Val <= Max; + } + + // N.b. this disallows explicit relocation specifications via an + // AArch64MCExpr. Users needing that behaviour + return false; + } + + bool isLane1() const { + if (!isImm()) return false; + + // Because it's come through custom assembly parsing, it must always be a + // constant expression. + return cast<MCConstantExpr>(getImm())->getValue() == 1; + } + + bool isLoadLitLabel() const { + if (!isImm()) return false; + + AArch64MCExpr::VariantKind Variant; + if (isNonConstantExpr(getImm(), Variant)) { + return Variant == AArch64MCExpr::VK_AARCH64_None + || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL; + } + + return isLabel<19, 4>(); + } + + template<unsigned RegWidth> bool isLogicalImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val); + if (!CE) return false; + + uint32_t Bits; + return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); + } + + template<unsigned RegWidth> bool isLogicalImmMOV() const { + if (!isLogicalImm<RegWidth>()) return false; + + const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val); + + // The move alias for ORR is only valid if the immediate cannot be + // represented with a move (immediate) instruction; they take priority. + int UImm16, Shift; + return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift) + && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift); + } + + template<int MemSize> + bool isOffsetUImm12() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + + // Assume they know what they're doing for now if they've given us a + // non-constant expression. In principle we could check for ridiculous + // things that can't possibly work or relocations that would almost + // certainly break resulting code. + if (!CE) + return true; + + int64_t Val = CE->getValue(); + + // Must be a multiple of the access size in bytes. + if ((Val & (MemSize - 1)) != 0) return false; + + // Must be 12-bit unsigned + return Val >= 0 && Val <= 0xfff * MemSize; + } + + template<A64SE::ShiftExtSpecifiers SHKind, bool is64Bit> + bool isShift() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != SHKind) + return false; + + return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31; + } + + bool isMOVN32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVN64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G2, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + + bool isMOVZ32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0, + AArch64MCExpr::VK_AARCH64_ABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVZ64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0, + AArch64MCExpr::VK_AARCH64_ABS_G1, + AArch64MCExpr::VK_AARCH64_ABS_G2, + AArch64MCExpr::VK_AARCH64_ABS_G3, + AArch64MCExpr::VK_AARCH64_SABS_G0, + AArch64MCExpr::VK_AARCH64_SABS_G1, + AArch64MCExpr::VK_AARCH64_SABS_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G2, + AArch64MCExpr::VK_AARCH64_DTPREL_G1, + AArch64MCExpr::VK_AARCH64_DTPREL_G0, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G2, + AArch64MCExpr::VK_AARCH64_TPREL_G1, + AArch64MCExpr::VK_AARCH64_TPREL_G0, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + bool isMOVK32Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0_NC, + AArch64MCExpr::VK_AARCH64_ABS_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(32, PermittedModifiers, NumModifiers); + } + + bool isMOVK64Imm() const { + static AArch64MCExpr::VariantKind PermittedModifiers[] = { + AArch64MCExpr::VK_AARCH64_ABS_G0_NC, + AArch64MCExpr::VK_AARCH64_ABS_G1_NC, + AArch64MCExpr::VK_AARCH64_ABS_G2_NC, + AArch64MCExpr::VK_AARCH64_ABS_G3, + AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, + AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, + }; + unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + + return isMoveWideImm(64, PermittedModifiers, NumModifiers); + } + + bool isMoveWideImm(unsigned RegWidth, + AArch64MCExpr::VariantKind *PermittedModifiers, + unsigned NumModifiers) const { + if (!isImmWithLSL()) return false; + + if (ImmWithLSL.ShiftAmount % 16 != 0) return false; + if (ImmWithLSL.ShiftAmount >= RegWidth) return false; + + AArch64MCExpr::VariantKind Modifier; + if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) { + // E.g. "#:abs_g0:sym, lsl #16" makes no sense. + if (!ImmWithLSL.ImplicitAmount) return false; + + for (unsigned i = 0; i < NumModifiers; ++i) + if (PermittedModifiers[i] == Modifier) return true; + + return false; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmWithLSL.Val); + return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff; + } + + template<int RegWidth, bool (*isValidImm)(int, uint64_t, int&, int&)> + bool isMoveWideMovAlias() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + int UImm16, Shift; + uint64_t Value = CE->getValue(); + + // If this is a 32-bit instruction then all bits above 32 should be the + // same: either of these is fine because signed/unsigned values should be + // permitted. + if (RegWidth == 32) { + if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff) + return false; + + Value &= 0xffffffffULL; + } + + return isValidImm(RegWidth, Value, UImm16, Shift); + } + + bool isMSRWithReg() const { + if (!isSysReg()) return false; + + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64SysReg::MSRMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isMSRPState() const { + if (!isSysReg()) return false; + + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64PState::PStateMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isMRS() const { + if (!isSysReg()) return false; + + // First check against specific MSR-only (write-only) registers + bool IsKnownRegister; + StringRef Name(SysReg.Data, SysReg.Length); + A64SysReg::MRSMapper().fromString(Name, IsKnownRegister); + + return IsKnownRegister; + } + + bool isPRFM() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + + if (!CE) + return false; + + return CE->getValue() >= 0 && CE->getValue() <= 31; + } + + template<A64SE::ShiftExtSpecifiers SHKind> bool isRegExtend() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != SHKind) + return false; + + return ShiftExtend.Amount <= 4; + } + + bool isRegExtendLSL() const { + if (!isShiftOrExtend()) return false; + + if (ShiftExtend.ShiftType != A64SE::LSL) + return false; + + return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4; + } + + template<int MemSize> bool isSImm7Scaled() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + int64_t Val = CE->getValue(); + if (Val % MemSize != 0) return false; + + Val /= MemSize; + + return Val >= -64 && Val < 64; + } + + template<int BitWidth> + bool isSImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= -(1LL << (BitWidth - 1)) + && CE->getValue() < (1LL << (BitWidth - 1)); + } + + template<int bitWidth> + bool isUImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + + return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth); + } + + bool isUImm() const { + if (!isImm()) return false; + + return isa<MCConstantExpr>(getImm()); + } + + static AArch64Operand *CreateImmWithLSL(const MCExpr *Val, + unsigned ShiftAmount, + bool ImplicitAmount, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E); + Op->ImmWithLSL.Val = Val; + Op->ImmWithLSL.ShiftAmount = ShiftAmount; + Op->ImmWithLSL.ImplicitAmount = ImplicitAmount; + return Op; + } + + static AArch64Operand *CreateCondCode(A64CC::CondCodes Code, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E); + Op->CondCode.Code = Code; + return Op; + } + + static AArch64Operand *CreateFPImm(double Val, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E); + Op->FPImm.Val = Val; + return Op; + } + + static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E); + Op->Imm.Val = Val; + return Op; + } + + static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_Register, S, E); + Op->Reg.RegNum = RegNum; + return Op; + } + + static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E); + Op->Reg.RegNum = RegNum; + return Op; + } + + static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp, + unsigned Amount, + bool ImplicitAmount, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E); + Op->ShiftExtend.ShiftType = ShiftTyp; + Op->ShiftExtend.Amount = Amount; + Op->ShiftExtend.ImplicitAmount = ImplicitAmount; + return Op; + } + + static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) { + AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + return Op; + } + + static AArch64Operand *CreateToken(StringRef Str, SMLoc S) { + AArch64Operand *Op = new AArch64Operand(k_Token, S, S); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + return Op; + } + + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + + template<unsigned RegWidth> + void addBFILSBOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth; + Inst.addOperand(MCOperand::CreateImm(EncodedVal)); + } + + void addBFIWidthOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); + } + + void addBFXWidthOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm(); + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + + Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1)); + } + + void addCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getCondCode())); + } + + void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue())); + } + + void addFMOVImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + APFloat RealVal(FPImm.Val); + uint32_t ImmVal; + A64Imms::isFPImm(RealVal, ImmVal); + + Inst.addOperand(MCOperand::CreateImm(ImmVal)); + } + + void addFPZeroOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + Inst.addOperand(MCOperand::CreateImm(0)); + } + + void addInvCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + unsigned Encoded = A64InvertCondCode(getCondCode()); + Inst.addOperand(MCOperand::CreateImm(Encoded)); + } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + template<int MemSize> + void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + uint64_t Val = CE->getValue() / MemSize; + Inst.addOperand(MCOperand::CreateImm(Val & 0x7f)); + } + + template<int BitWidth> + void addSImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + uint64_t Val = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1))); + } + + void addImmWithLSLOperands(MCInst &Inst, unsigned N) const { + assert (N == 1 && "Invalid number of operands!"); + + addExpr(Inst, ImmWithLSL.Val); + } + + template<unsigned field_width, unsigned scale> + void addLabelOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val); + + if (!CE) { + addExpr(Inst, Imm.Val); + return; + } + + int64_t Val = CE->getValue(); + assert(Val % scale == 0 && "Unaligned immediate in instruction"); + Val /= scale; + + Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1))); + } + + template<int MemSize> + void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { + Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize)); + } else { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + } + } + + template<unsigned RegWidth> + void addLogicalImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val); + + uint32_t Bits; + A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMRSOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMSRWithRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMSRPStateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + StringRef Name(SysReg.Data, SysReg.Length); + uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); + } + + void addMoveWideImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + addExpr(Inst, ImmWithLSL.Val); + + AArch64MCExpr::VariantKind Variant; + if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) { + Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16)); + return; + } + + // We know it's relocated + switch (Variant) { + case AArch64MCExpr::VK_AARCH64_ABS_G0: + case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G0: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: + Inst.addOperand(MCOperand::CreateImm(0)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G1: + case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: + Inst.addOperand(MCOperand::CreateImm(1)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G2: + case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: + case AArch64MCExpr::VK_AARCH64_SABS_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + Inst.addOperand(MCOperand::CreateImm(2)); + break; + case AArch64MCExpr::VK_AARCH64_ABS_G3: + Inst.addOperand(MCOperand::CreateImm(3)); + break; + default: llvm_unreachable("Inappropriate move wide relocation"); + } + } + + template<int RegWidth, bool isValidImm(int, uint64_t, int&, int&)> + void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int UImm16, Shift; + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + uint64_t Value = CE->getValue(); + + if (RegWidth == 32) { + Value &= 0xffffffffULL; + } + + bool Valid = isValidImm(RegWidth, Value, UImm16, Shift); + (void)Valid; + assert(Valid && "Invalid immediates should have been weeded out by now"); + + Inst.addOperand(MCOperand::CreateImm(UImm16)); + Inst.addOperand(MCOperand::CreateImm(Shift)); + } + + void addPRFMOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + const MCConstantExpr *CE = cast<MCConstantExpr>(getImm()); + assert(CE->getValue() >= 0 && CE->getValue() <= 31 + && "PRFM operand should be 5-bits"); + + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + } + + // For Add-sub (extended register) operands. + void addRegExtendOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); + } + + // For the extend in load-store (register offset) instructions. + template<unsigned MemSize> + void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const { + addAddrRegExtendOperands(Inst, N, MemSize); + } + + void addAddrRegExtendOperands(MCInst &Inst, unsigned N, + unsigned MemSize) const { + assert(N == 1 && "Invalid number of operands!"); + + // First bit of Option is set in instruction classes, the high two bits are + // as follows: + unsigned OptionHi = 0; + switch (ShiftExtend.ShiftType) { + case A64SE::UXTW: + case A64SE::LSL: + OptionHi = 1; + break; + case A64SE::SXTW: + case A64SE::SXTX: + OptionHi = 3; + break; + default: + llvm_unreachable("Invalid extend type for register offset"); + } + + unsigned S = 0; + if (MemSize == 1 && !ShiftExtend.ImplicitAmount) + S = 1; + else if (MemSize != 1 && ShiftExtend.Amount != 0) + S = 1; + + Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S)); + } + void addShiftOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); + } +}; + +} // end anonymous namespace. + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic) { + + // See if the operand has a custom parser + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + + // It could either succeed, fail or just not care. + if (ResTy != MatchOperand_NoMatch) + return ResTy; + + switch (getLexer().getKind()) { + default: + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return MatchOperand_ParseFail; + case AsmToken::Identifier: { + // It might be in the LSL/UXTB family ... + OperandMatchResultTy GotShift = ParseShiftExtend(Operands); + + // We can only continue if no tokens were eaten. + if (GotShift != MatchOperand_NoMatch) + return GotShift; + + // ... or it might be a register ... + uint32_t NumLanes = 0; + OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes); + assert(GotReg != MatchOperand_ParseFail + && "register parsing shouldn't partially succeed"); + + if (GotReg == MatchOperand_Success) { + if (Parser.getTok().is(AsmToken::LBrac)) + return ParseNEONLane(Operands, NumLanes); + else + return MatchOperand_Success; + } + + // ... or it might be a symbolish thing + } + // Fall through + case AsmToken::LParen: // E.g. (strcmp-4) + case AsmToken::Integer: // 1f, 2b labels + case AsmToken::String: // quoted labels + case AsmToken::Dot: // . is Current location + case AsmToken::Dollar: // $ is PC + case AsmToken::Colon: { + SMLoc StartLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + const MCExpr *ImmVal = 0; + + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); + return MatchOperand_Success; + } + case AsmToken::Hash: { // Immediates + SMLoc StartLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + const MCExpr *ImmVal = 0; + Parser.Lex(); + + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); + return MatchOperand_Success; + } + case AsmToken::LBrac: { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("[", Loc)); + Parser.Lex(); // Eat '[' + + // There's no comma after a '[', so we can parse the next operand + // immediately. + return ParseOperand(Operands, Mnemonic); + } + // The following will likely be useful later, but not in very early cases + case AsmToken::LCurly: // Weird SIMD lists + llvm_unreachable("Don't know how to deal with '{' in operand"); + return MatchOperand_ParseFail; + } +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) { + if (getLexer().is(AsmToken::Colon)) { + AArch64MCExpr::VariantKind RefKind; + + OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind); + if (ResTy != MatchOperand_Success) + return ResTy; + + const MCExpr *SubExprVal; + if (getParser().parseExpression(SubExprVal)) + return MatchOperand_ParseFail; + + ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext()); + return MatchOperand_Success; + } + + // No weird AArch64MCExpr prefix + return getParser().parseExpression(ExprVal) + ? MatchOperand_ParseFail : MatchOperand_Success; +} + +// A lane attached to a NEON register. "[N]", which should yield three tokens: +// '[', N, ']'. A hash is not allowed to precede the immediate here. +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t NumLanes) { + SMLoc Loc = Parser.getTok().getLoc(); + + assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand"); + Operands.push_back(AArch64Operand::CreateToken("[", Loc)); + Parser.Lex(); // Eat '[' + + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), "expected lane number"); + return MatchOperand_ParseFail; + } + + if (Parser.getTok().getIntVal() >= NumLanes) { + Error(Parser.getTok().getLoc(), "lane number incompatible with layout"); + return MatchOperand_ParseFail; + } + + const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(), + getContext()); + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat actual lane + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImm(Lane, S, E)); + + + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(Parser.getTok().getLoc(), "expected ']' after lane"); + return MatchOperand_ParseFail; + } + + Operands.push_back(AArch64Operand::CreateToken("]", Loc)); + Parser.Lex(); // Eat ']' + + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) { + assert(getLexer().is(AsmToken::Colon) && "expected a ':'"); + Parser.Lex(); + + if (getLexer().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), + "expected relocation specifier in operand after ':'"); + return MatchOperand_ParseFail; + } + + std::string LowerCase = Parser.getTok().getIdentifier().lower(); + RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase) + .Case("got", AArch64MCExpr::VK_AARCH64_GOT) + .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12) + .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12) + .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0) + .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC) + .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1) + .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC) + .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2) + .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC) + .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3) + .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0) + .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1) + .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2) + .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2) + .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1) + .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC) + .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0) + .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC) + .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12) + .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12) + .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC) + .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1) + .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC) + .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL) + .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12) + .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2) + .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1) + .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC) + .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0) + .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC) + .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12) + .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12) + .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC) + .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC) + .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12) + .Default(AArch64MCExpr::VK_AARCH64_None); + + if (RefKind == AArch64MCExpr::VK_AARCH64_None) { + Error(Parser.getTok().getLoc(), + "expected relocation specifier in operand after ':'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat identifier + + if (getLexer().isNot(AsmToken::Colon)) { + Error(Parser.getTok().getLoc(), + "expected ':' after relocation specifier"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseImmWithLSLOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // FIXME?: I want to live in a world where immediates must start with + // #. Please don't dash my hopes (well, do if you have a good reason). + if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '#' + + const MCExpr *Imm; + if (ParseImmediate(Imm) != MatchOperand_Success) + return MatchOperand_ParseFail; + else if (Parser.getTok().isNot(AsmToken::Comma)) { + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E)); + return MatchOperand_Success; + } + + // Eat ',' + Parser.Lex(); + + // The optional operand must be "lsl #N" where N is non-negative. + if (Parser.getTok().is(AsmToken::Identifier) + && Parser.getTok().getIdentifier().lower() == "lsl") { + Parser.Lex(); + + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate"); + return MatchOperand_ParseFail; + } + } + } + + int64_t ShiftAmount = Parser.getTok().getIntVal(); + + if (ShiftAmount < 0) { + Error(Parser.getTok().getLoc(), "positive shift amount required"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat the number + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount, + false, S, E)); + return MatchOperand_Success; +} + + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseCondCodeOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + if (Parser.getTok().isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + + StringRef Tok = Parser.getTok().getIdentifier(); + A64CC::CondCodes CondCode = A64StringToCondCode(Tok); + + if (CondCode == A64CC::Invalid) + return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat condition code + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseCRxOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + std::string LowerTok = Parser.getTok().getIdentifier().lower(); + StringRef Tok(LowerTok); + if (Tok[0] != 'c') { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + uint32_t CRNum; + bool BadNum = Tok.drop_front().getAsInteger(10, CRNum); + if (BadNum || CRNum > 15) { + Error(S, "Expected cN operand where 0 <= N <= 15"); + return MatchOperand_ParseFail; + } + + const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext()); + + Parser.Lex(); + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseFPImmOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + + // FIXME?: I want to live in a world where immediates must start with + // #. Please don't dash my hopes (well, do if you have a good reason). + if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch; + + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '#' + + bool Negative = false; + if (Parser.getTok().is(AsmToken::Minus)) { + Negative = true; + Parser.Lex(); // Eat '-' + } else if (Parser.getTok().is(AsmToken::Plus)) { + Parser.Lex(); // Eat '+' + } + + if (Parser.getTok().isNot(AsmToken::Real)) { + Error(S, "Expected floating-point immediate"); + return MatchOperand_ParseFail; + } + + APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString()); + if (Negative) RealVal.changeSign(); + double DblVal = RealVal.convertToDouble(); + + Parser.Lex(); // Eat real number + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E)); + return MatchOperand_Success; +} + + +// Automatically generated +static unsigned MatchRegisterName(StringRef Name); + +bool +AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, + StringRef &Layout, + SMLoc &LayoutLoc) const { + const AsmToken &Tok = Parser.getTok(); + + if (Tok.isNot(AsmToken::Identifier)) + return false; + + std::string LowerReg = Tok.getString().lower(); + size_t DotPos = LowerReg.find('.'); + + RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); + if (RegNum == AArch64::NoRegister) { + RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos)) + .Case("ip0", AArch64::X16) + .Case("ip1", AArch64::X17) + .Case("fp", AArch64::X29) + .Case("lr", AArch64::X30) + .Default(AArch64::NoRegister); + } + if (RegNum == AArch64::NoRegister) + return false; + + SMLoc S = Tok.getLoc(); + RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); + + if (DotPos == StringRef::npos) { + Layout = StringRef(); + } else { + // Everything afterwards needs to be a literal token, expected to be + // '.2d','.b' etc for vector registers. + + // This StringSwitch validates the input and (perhaps more importantly) + // gives us a permanent string to use in the token (a pointer into LowerReg + // would go out of scope when we return). + LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); + std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos); + Layout = StringSwitch<const char *>(LayoutText) + .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d") + .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s") + .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h") + .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b") + .Default(""); + + if (Layout.size() == 0) { + // Malformed register + return false; + } + } + + return true; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + uint32_t &NumLanes) { + unsigned RegNum; + StringRef Layout; + SMLoc RegEndLoc, LayoutLoc; + SMLoc S = Parser.getTok().getLoc(); + + if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) + return MatchOperand_NoMatch; + + Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc)); + + if (Layout.size() != 0) { + unsigned long long TmpLanes = 0; + llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes); + if (TmpLanes != 0) { + NumLanes = TmpLanes; + } else { + // If the number of lanes isn't specified explicitly, a valid instruction + // will have an element specifier and be capable of acting on the entire + // vector register. + switch (Layout.back()) { + default: llvm_unreachable("Invalid layout specifier"); + case 'b': NumLanes = 16; break; + case 'h': NumLanes = 8; break; + case 's': NumLanes = 4; break; + case 'd': NumLanes = 2; break; + } + } + + Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc)); + } + + Parser.Lex(); + return MatchOperand_Success; +} + +bool +AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + // This callback is used for things like DWARF frame directives in + // assembly. They don't care about things like NEON layouts or lanes, they + // just want to be able to produce the DWARF register number. + StringRef LayoutSpec; + SMLoc RegEndLoc, LayoutLoc; + StartLoc = Parser.getTok().getLoc(); + + if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc)) + return true; + + Parser.Lex(); + EndLoc = Parser.getTok().getLoc(); + + return false; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Since these operands occur in very limited circumstances, without + // alternatives, we actually signal an error if there is no match. If relaxing + // this, beware of unintended consequences: an immediate will be accepted + // during matching, no matter how it gets into the AArch64Operand. + const AsmToken &Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + + if (Tok.is(AsmToken::Identifier)) { + bool ValidName; + uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName); + + if (!ValidName) { + Error(S, "operand specifier not recognised"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // We're done with the identifier. Eat it + + SMLoc E = Parser.getTok().getLoc(); + const MCExpr *Imm = MCConstantExpr::Create(Code, getContext()); + Operands.push_back(AArch64Operand::CreateImm(Imm, S, E)); + return MatchOperand_Success; + } else if (Tok.is(AsmToken::Hash)) { + Parser.Lex(); + + const MCExpr *ImmVal; + if (ParseImmediate(ImmVal) != MatchOperand_Success) + return MatchOperand_ParseFail; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); + if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) { + Error(S, "Invalid immediate for instruction"); + return MatchOperand_ParseFail; + } + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E)); + return MatchOperand_Success; + } + + Error(S, "unexpected operand for instruction"); + return MatchOperand_ParseFail; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseSysRegOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + const AsmToken &Tok = Parser.getTok(); + + // Any MSR/MRS operand will be an identifier, and we want to store it as some + // kind of string: SPSel is valid for two different forms of MSR with two + // different encodings. There's no collision at the moment, but the potential + // is there. + if (!Tok.is(AsmToken::Identifier)) { + return MatchOperand_NoMatch; + } + + SMLoc S = Tok.getLoc(); + Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S)); + Parser.Lex(); // Eat identifier + + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseLSXAddressOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + + unsigned RegNum; + SMLoc RegEndLoc, LayoutLoc; + StringRef Layout; + if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc) + || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum) + || Layout.size() != 0) { + // Check Layout.size because we don't want to let "x3.4s" or similar + // through. + return MatchOperand_NoMatch; + } + Parser.Lex(); // Eat register + + if (Parser.getTok().is(AsmToken::RBrac)) { + // We're done + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); + return MatchOperand_Success; + } + + // Otherwise, only ", #0" is valid + + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "expected ',' or ']' after register"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat ',' + + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "expected '#0'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat '#' + + if (Parser.getTok().isNot(AsmToken::Integer) + || Parser.getTok().getIntVal() != 0 ) { + Error(Parser.getTok().getLoc(), "expected '#0'"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat '0' + + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); + return MatchOperand_Success; +} + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::ParseShiftExtend( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + StringRef IDVal = Parser.getTok().getIdentifier(); + std::string LowerID = IDVal.lower(); + + A64SE::ShiftExtSpecifiers Spec = + StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID) + .Case("lsl", A64SE::LSL) + .Case("lsr", A64SE::LSR) + .Case("asr", A64SE::ASR) + .Case("ror", A64SE::ROR) + .Case("uxtb", A64SE::UXTB) + .Case("uxth", A64SE::UXTH) + .Case("uxtw", A64SE::UXTW) + .Case("uxtx", A64SE::UXTX) + .Case("sxtb", A64SE::SXTB) + .Case("sxth", A64SE::SXTH) + .Case("sxtw", A64SE::SXTW) + .Case("sxtx", A64SE::SXTX) + .Default(A64SE::Invalid); + + if (Spec == A64SE::Invalid) + return MatchOperand_NoMatch; + + // Eat the shift + SMLoc S, E; + S = Parser.getTok().getLoc(); + Parser.Lex(); + + if (Spec != A64SE::LSL && Spec != A64SE::LSR && + Spec != A64SE::ASR && Spec != A64SE::ROR) { + // The shift amount can be omitted for the extending versions, but not real + // shifts: + // add x0, x0, x0, uxtb + // is valid, and equivalent to + // add x0, x0, x0, uxtb #0 + + if (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::EndOfStatement) || + Parser.getTok().is(AsmToken::RBrac)) { + Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true, + S, E)); + return MatchOperand_Success; + } + } + + // Eat # at beginning of immediate + if (!Parser.getTok().is(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), + "expected #imm after shift specifier"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + + // Make sure we do actually have a number + if (!Parser.getTok().is(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), + "expected integer shift amount"); + return MatchOperand_ParseFail; + } + unsigned Amount = Parser.getTok().getIntVal(); + Parser.Lex(); + E = Parser.getTok().getLoc(); + + Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false, + S, E)); + + return MatchOperand_Success; +} + +// FIXME: We would really like to be able to tablegen'erate this. +bool AArch64AsmParser:: +validateInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + switch (Inst.getOpcode()) { + case AArch64::BFIwwii: + case AArch64::BFIxxii: + case AArch64::SBFIZwwii: + case AArch64::SBFIZxxii: + case AArch64::UBFIZwwii: + case AArch64::UBFIZxxii: { + unsigned ImmOps = Inst.getNumOperands() - 2; + int64_t ImmR = Inst.getOperand(ImmOps).getImm(); + int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); + + if (ImmR != 0 && ImmS >= ImmR) { + return Error(Operands[4]->getStartLoc(), + "requested insert overflows register"); + } + return false; + } + case AArch64::BFXILwwii: + case AArch64::BFXILxxii: + case AArch64::SBFXwwii: + case AArch64::SBFXxxii: + case AArch64::UBFXwwii: + case AArch64::UBFXxxii: { + unsigned ImmOps = Inst.getNumOperands() - 2; + int64_t ImmR = Inst.getOperand(ImmOps).getImm(); + int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); + int64_t RegWidth = 0; + switch (Inst.getOpcode()) { + case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii: + RegWidth = 64; + break; + case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii: + RegWidth = 32; + break; + } + + if (ImmS >= RegWidth || ImmS < ImmR) { + return Error(Operands[4]->getStartLoc(), + "requested extract overflows register"); + } + return false; + } + case AArch64::ICix: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal); + if (!A64IC::NeedsRegister(ICOp)) { + return Error(Operands[1]->getStartLoc(), + "specified IC op does not use a register"); + } + return false; + } + case AArch64::ICi: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal); + if (A64IC::NeedsRegister(ICOp)) { + return Error(Operands[1]->getStartLoc(), + "specified IC op requires a register"); + } + return false; + } + case AArch64::TLBIix: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal); + if (!A64TLBI::NeedsRegister(TLBIOp)) { + return Error(Operands[1]->getStartLoc(), + "specified TLBI op does not use a register"); + } + return false; + } + case AArch64::TLBIi: { + int64_t ImmVal = Inst.getOperand(0).getImm(); + A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal); + if (A64TLBI::NeedsRegister(TLBIOp)) { + return Error(Operands[1]->getStartLoc(), + "specified TLBI op requires a register"); + } + return false; + } + } + + return false; +} + + +// Parses the instruction *together with* all operands, appending each parsed +// operand to the "Operands" list +bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + size_t CondCodePos = Name.find('.'); + + StringRef Mnemonic = Name.substr(0, CondCodePos); + Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc)); + + if (CondCodePos != StringRef::npos) { + // We have a condition code + SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1); + StringRef CondStr = Name.substr(CondCodePos + 1, StringRef::npos); + A64CC::CondCodes Code; + + Code = A64StringToCondCode(CondStr); + + if (Code == A64CC::Invalid) { + Error(S, "invalid condition code"); + Parser.eatToEndOfStatement(); + return true; + } + + SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos); + + Operands.push_back(AArch64Operand::CreateToken(".", DotL)); + SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3); + Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E)); + } + + // Now we parse the operands of this instruction + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (ParseOperand(Operands, Mnemonic)) { + Parser.eatToEndOfStatement(); + return true; + } + + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + + // Parse and remember the operand. + if (ParseOperand(Operands, Mnemonic)) { + Parser.eatToEndOfStatement(); + return true; + } + + + // After successfully parsing some operands there are two special cases to + // consider (i.e. notional operands not separated by commas). Both are due + // to memory specifiers: + // + An RBrac will end an address for load/store/prefetch + // + An '!' will indicate a pre-indexed operation. + // + // It's someone else's responsibility to make sure these tokens are sane + // in the given context! + if (Parser.getTok().is(AsmToken::RBrac)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("]", Loc)); + Parser.Lex(); + } + + if (Parser.getTok().is(AsmToken::Exclaim)) { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("!", Loc)); + Parser.Lex(); + } + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.eatToEndOfStatement(); + return Error(Loc, "expected comma before next operand"); + } + + // Eat the EndOfStatement + Parser.Lex(); + + return false; +} + +bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".hword") + return ParseDirectiveWord(2, DirectiveID.getLoc()); + else if (IDVal == ".word") + return ParseDirectiveWord(4, DirectiveID.getLoc()); + else if (IDVal == ".xword") + return ParseDirectiveWord(8, DirectiveID.getLoc()); + else if (IDVal == ".tlsdesccall") + return ParseDirectiveTLSDescCall(DirectiveID.getLoc()); + + return true; +} + +/// parseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().parseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + +// parseDirectiveTLSDescCall: +// ::= .tlsdesccall symbol +bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) { + StringRef Name; + if (getParser().parseIdentifier(Name)) + return Error(L, "expected symbol after directive"); + + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); + + MCInst Inst; + Inst.setOpcode(AArch64::TLSDESCCALL); + Inst.addOperand(MCOperand::CreateExpr(Expr)); + + getParser().getStreamer().EmitInstruction(Inst); + return false; +} + + +bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult; + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); + + if (ErrorInfo != ~0U && ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + switch (MatchResult) { + default: break; + case Match_Success: + if (validateInstruction(Inst, Operands)) + return true; + + Out.EmitInstruction(Inst); + return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + + case Match_AddSubRegExtendSmall: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]"); + case Match_AddSubRegExtendLarge: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]"); + case Match_AddSubRegShift32: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]"); + case Match_AddSubRegShift64: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]"); + case Match_AddSubSecondSource: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected compatible register, symbol or integer in range [0, 4095]"); + case Match_CVTFixedPos32: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 32]"); + case Match_CVTFixedPos64: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 64]"); + case Match_CondCode: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected AArch64 condition code"); + case Match_FPImm: + // Any situation which allows a nontrivial floating-point constant also + // allows a register. + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected compatible register or floating-point constant"); + case Match_FPZero: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected floating-point constant #0.0"); + case Match_Label: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected label or encodable integer pc offset"); + case Match_Lane1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected lane specifier '[1]'"); + case Match_LoadStoreExtend32_1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0"); + case Match_LoadStoreExtend32_2: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1"); + case Match_LoadStoreExtend32_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2"); + case Match_LoadStoreExtend32_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3"); + case Match_LoadStoreExtend32_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtw' with optional shift of #0 or #4"); + case Match_LoadStoreExtend64_1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0"); + case Match_LoadStoreExtend64_2: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #1"); + case Match_LoadStoreExtend64_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #2"); + case Match_LoadStoreExtend64_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #3"); + case Match_LoadStoreExtend64_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'lsl' or 'sxtx' with optional shift of #0 or #4"); + case Match_LoadStoreSImm7_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer multiple of 4 in range [-256, 252]"); + case Match_LoadStoreSImm7_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer multiple of 8 in range [-512, 508]"); + case Match_LoadStoreSImm7_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer multiple of 16 in range [-1024, 1016]"); + case Match_LoadStoreSImm9: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [-256, 255]"); + case Match_LoadStoreUImm12_1: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 4095]"); + case Match_LoadStoreUImm12_2: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 8190]"); + case Match_LoadStoreUImm12_4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 16380]"); + case Match_LoadStoreUImm12_8: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 32760]"); + case Match_LoadStoreUImm12_16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic reference or integer in range [0, 65520]"); + case Match_LogicalSecondSource: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected compatible register or logical immediate"); + case Match_MOVWUImm16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected relocated symbol or integer in range [0, 65535]"); + case Match_MRS: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected readable system register"); + case Match_MSR: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected writable system register or pstate"); + case Match_NamedImm_at: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]"); + case Match_NamedImm_dbarrier: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15] or symbolic barrier operand"); + case Match_NamedImm_dc: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected symbolic 'dc' operand"); + case Match_NamedImm_ic: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'"); + case Match_NamedImm_isb: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15] or 'sy'"); + case Match_NamedImm_prefetch: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected prefetch hint: p(ld|st|i)l[123](strm|keep)"); + case Match_NamedImm_tlbi: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected translation buffer invalidation operand"); + case Match_UImm16: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 65535]"); + case Match_UImm3: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 7]"); + case Match_UImm4: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15]"); + case Match_UImm5: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 31]"); + case Match_UImm6: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 63]"); + case Match_UImm7: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 127]"); + case Match_Width32: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [<lsb>, 31]"); + case Match_Width64: + return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [<lsb>, 63]"); + } + + llvm_unreachable("Implement any new match types added!"); + return true; +} + +void AArch64Operand::print(raw_ostream &OS) const { + switch (Kind) { + case k_CondCode: + OS << "<CondCode: " << CondCode.Code << ">"; + break; + case k_FPImmediate: + OS << "<fpimm: " << FPImm.Val << ">"; + break; + case k_ImmWithLSL: + OS << "<immwithlsl: imm=" << ImmWithLSL.Val + << ", shift=" << ImmWithLSL.ShiftAmount << ">"; + break; + case k_Immediate: + getImm()->print(OS); + break; + case k_Register: + OS << "<register " << getReg() << '>'; + break; + case k_Token: + OS << '\'' << getToken() << '\''; + break; + case k_ShiftExtend: + OS << "<shift: type=" << ShiftExtend.ShiftType + << ", amount=" << ShiftExtend.Amount << ">"; + break; + case k_SysReg: { + StringRef Name(SysReg.Data, SysReg.Length); + OS << "<sysreg: " << Name << '>'; + break; + } + default: + llvm_unreachable("No idea how to print this kind of operand"); + break; + } +} + +void AArch64Operand::dump() const { + print(errs()); +} + + +/// Force static initialization. +extern "C" void LLVMInitializeAArch64AsmParser() { + RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64Target); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "AArch64GenAsmMatcher.inc" diff --git a/lib/Target/AArch64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt new file mode 100644 index 0000000..a018a0a --- /dev/null +++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64AsmParser + AArch64AsmParser.cpp + ) + +add_dependencies(LLVMAArch64AsmParser AArch64CommonTableGen) diff --git a/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt new file mode 100644 index 0000000..bd1fcaf --- /dev/null +++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64AsmParser +parent = AArch64 +required_libraries = AArch64Desc AArch64Info MC MCParser Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile new file mode 100644 index 0000000..56c9ef5 --- /dev/null +++ b/lib/Target/AArch64/AsmParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64AsmParser + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt new file mode 100644 index 0000000..8164d6f --- /dev/null +++ b/lib/Target/AArch64/CMakeLists.txt @@ -0,0 +1,36 @@ +set(LLVM_TARGET_DEFINITIONS AArch64.td) + +tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv) +tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info) +tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering) +tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel) +tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget) +add_public_tablegen_target(AArch64CommonTableGen) + +add_llvm_target(AArch64CodeGen + AArch64AsmPrinter.cpp + AArch64BranchFixupPass.cpp + AArch64FrameLowering.cpp + AArch64ISelDAGToDAG.cpp + AArch64ISelLowering.cpp + AArch64InstrInfo.cpp + AArch64MachineFunctionInfo.cpp + AArch64MCInstLower.cpp + AArch64RegisterInfo.cpp + AArch64SelectionDAGInfo.cpp + AArch64Subtarget.cpp + AArch64TargetMachine.cpp + AArch64TargetObjectFile.cpp + ) + +add_subdirectory(AsmParser) +add_subdirectory(Disassembler) +add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) +add_subdirectory(TargetInfo) +add_subdirectory(Utils)
\ No newline at end of file diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp new file mode 100644 index 0000000..eba7666 --- /dev/null +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -0,0 +1,787 @@ +//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the functions necessary to decode AArch64 instruction +// bitpatterns into MCInsts (with the help of TableGenerated information from +// the instruction definitions). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-disassembler" + +#include "AArch64.h" +#include "AArch64RegisterInfo.h" +#include "AArch64Subtarget.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +/// AArch64 disassembler for all AArch64 platforms. +class AArch64Disassembler : public MCDisassembler { + const MCRegisterInfo *RegInfo; +public: + /// Initializes the disassembler. + /// + AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) + : MCDisassembler(STI), RegInfo(Info) { + } + + ~AArch64Disassembler() { + } + + /// See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + const MCRegisterInfo *getRegInfo() const { return RegInfo; } +}; + +} + +// Forward-declarations used in the auto-generated files. +static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus +DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus +DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, + unsigned OptionHiS, + uint64_t Address, + const void *Decoder); + + +static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder); + +template<int RegWidth> +static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, + unsigned FullImm, + uint64_t Address, + const void *Decoder); + +template<int RegWidth> +static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, + unsigned Bits, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder); + +static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +template<typename SomeNamedImmMapper> +static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus +DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper, + llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + + +static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); + + +static bool Check(DecodeStatus &Out, DecodeStatus In); + +#include "AArch64GenDisassemblerTables.inc" +#include "AArch64GenInstrInfo.inc" + +static bool Check(DecodeStatus &Out, DecodeStatus In) { + switch (In) { + case MCDisassembler::Success: + // Out stays the same. + return true; + case MCDisassembler::SoftFail: + Out = In; + return true; + case MCDisassembler::Fail: + Out = In; + return false; + } + llvm_unreachable("Invalid DecodeStatus!"); +} + +DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + CommentStream = &cs; + + uint8_t bytes[4]; + + // We want to read exactly 4 bytes of data. + if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + // Encoded as a small-endian 32-bit word in the stream. + uint32_t insn = (bytes[3] << 24) | + (bytes[2] << 16) | + (bytes[1] << 8) | + (bytes[0] << 0); + + // Calling the auto-generated decoder function. + DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); + Size = 0; + return MCDisassembler::Fail; +} + +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D); + return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo); +} + +static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + + +static DecodeStatus +DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + + +static DecodeStatus +DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, + unsigned OptionHiS, + uint64_t Address, + const void *Decoder) { + // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1}, + // S}. Hence we want to check bit 1. + if (!(OptionHiS & 2)) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(OptionHiS)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder) { + // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be + // between 0 and 31. + if (Imm6Bits > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, + unsigned Imm6Bits, + uint64_t Address, + const void *Decoder) { + // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32. + if (Imm6Bits < 32) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); + return MCDisassembler::Success; +} + + +template<int RegWidth> +static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, + unsigned FullImm, + uint64_t Address, + const void *Decoder) { + unsigned Imm16 = FullImm & 0xffff; + unsigned Shift = FullImm >> 16; + + if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Imm16)); + Inst.addOperand(MCOperand::CreateImm(Shift)); + return MCDisassembler::Success; +} + +template<int RegWidth> +static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, + unsigned Bits, + uint64_t Address, + const void *Decoder) { + uint64_t Imm; + if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm)) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Bits)); + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder) { + // Only values 0-4 are valid for this 3-bit field + if (ShiftAmount > 4) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); + return MCDisassembler::Success; +} + +static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, + unsigned ShiftAmount, + uint64_t Address, + const void *Decoder) { + // Only values below 32 are valid for a 32-bit register + if (ShiftAmount > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned ImmS = fieldFromInstruction(Insn, 10, 6); + unsigned ImmR = fieldFromInstruction(Insn, 16, 6); + unsigned SF = fieldFromInstruction(Insn, 31, 1); + + // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise + // out assertions that it thinks should never be hit. + enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc; + Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2); + + if (!SF) { + // ImmR and ImmS must be between 0 and 31 for 32-bit instructions. + if (ImmR > 31 || ImmS > 31) + return MCDisassembler::Fail; + } + + if (SF) { + DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + // BFM MCInsts use Rd as a source too. + if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); + } else { + DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); + // BFM MCInsts use Rd as a source too. + if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder); + } + + // ASR and LSR have more specific patterns so they won't get here: + assert(!(ImmS == 31 && !SF && Opc != BFM) + && "shift should have used auto decode"); + assert(!(ImmS == 63 && SF && Opc != BFM) + && "shift should have used auto decode"); + + // Extension instructions similarly: + if (Opc == SBFM && ImmR == 0) { + assert((ImmS != 7 && ImmS != 15) && "extension got here"); + assert((ImmS != 31 || SF == 0) && "extension got here"); + } else if (Opc == UBFM && ImmR == 0) { + assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here"); + } + + if (Opc == UBFM) { + // It might be a LSL instruction, which actually takes the shift amount + // itself as an MCInst operand. + if (SF && (ImmS + 1) % 64 == ImmR) { + Inst.setOpcode(AArch64::LSLxxi); + Inst.addOperand(MCOperand::CreateImm(63 - ImmS)); + return MCDisassembler::Success; + } else if (!SF && (ImmS + 1) % 32 == ImmR) { + Inst.setOpcode(AArch64::LSLwwi); + Inst.addOperand(MCOperand::CreateImm(31 - ImmS)); + return MCDisassembler::Success; + } + } + + // Otherwise it's definitely either an extract or an insert depending on which + // of ImmR or ImmS is larger. + unsigned ExtractOp, InsertOp; + switch (Opc) { + default: llvm_unreachable("unexpected instruction trying to decode bitfield"); + case SBFM: + ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii; + InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii; + break; + case BFM: + ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii; + InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii; + break; + case UBFM: + ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii; + InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii; + break; + } + + // Otherwise it's a boring insert or extract + Inst.addOperand(MCOperand::CreateImm(ImmR)); + Inst.addOperand(MCOperand::CreateImm(ImmS)); + + + if (ImmS < ImmR) + Inst.setOpcode(InsertOp); + else + Inst.setOpcode(ExtractOp); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + // This decoder exists to add the dummy Lane operand to the MCInst, which must + // be 1 in assembly but has no other real manifestation. + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned IsToVec = fieldFromInstruction(Insn, 16, 1); + + if (IsToVec) { + DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); + } else { + DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); + DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder); + } + + // Add the lane + Inst.addOperand(MCOperand::CreateImm(1)); + + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + DecodeStatus Result = MCDisassembler::Success; + unsigned Rt = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned Rt2 = fieldFromInstruction(Insn, 10, 5); + unsigned SImm7 = fieldFromInstruction(Insn, 15, 7); + unsigned L = fieldFromInstruction(Insn, 22, 1); + unsigned V = fieldFromInstruction(Insn, 26, 1); + unsigned Opc = fieldFromInstruction(Insn, 30, 2); + + // Not an official name, but it turns out that bit 23 distinguishes indexed + // from non-indexed operations. + unsigned Indexed = fieldFromInstruction(Insn, 23, 1); + + if (Indexed && L == 0) { + // The MCInst for an indexed store has an out operand and 4 ins: + // Rn_wb, Rt, Rt2, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + // You shouldn't load to the same register twice in an instruction... + if (L && Rt == Rt2) + Result = MCDisassembler::SoftFail; + + // ... or do any operation that writes-back to a transfer register. But note + // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different. + if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn)) + Result = MCDisassembler::SoftFail; + + // Exactly how we decode the MCInst's registers depends on the Opc and V + // fields of the instruction. These also obviously determine the size of the + // operation so we can fill in that information while we're at it. + if (V) { + // The instruction operates on the FP/SIMD registers + switch (Opc) { + default: return MCDisassembler::Fail; + case 0: + DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 1: + DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 2: + DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder); + break; + } + } else { + switch (Opc) { + default: return MCDisassembler::Fail; + case 0: + DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 1: + assert(L && "unexpected \"store signed\" attempt"); + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + case 2: + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); + break; + } + } + + if (Indexed && L == 1) { + // The MCInst for an indexed load has 3 out operands and an 3 ins: + // Rt, Rt2, Rn_wb, Rt2, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(SImm7)); + + return Result; +} + +static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, + uint32_t Val, + uint64_t Address, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(Val, 0, 5); + unsigned Rn = fieldFromInstruction(Val, 5, 5); + unsigned Rt2 = fieldFromInstruction(Val, 10, 5); + unsigned MemSize = fieldFromInstruction(Val, 30, 2); + + DecodeStatus S = MCDisassembler::Success; + if (Rt == Rt2) S = MCDisassembler::SoftFail; + + switch (MemSize) { + case 2: + if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + break; + case 3: + if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction"); + } + + if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +template<typename SomeNamedImmMapper> +static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + SomeNamedImmMapper Mapper; + bool ValidNamed; + Mapper.toString(Val, ValidNamed); + if (ValidNamed || Mapper.validImm(Val)) { + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; + } + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + bool ValidNamed; + Mapper.toString(Val, ValidNamed); + + Inst.addOperand(MCOperand::CreateImm(Val)); + + return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail; +} + +static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address, + Decoder); +} + +static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder) { + return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address, + Decoder); +} + +static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned Imm9 = fieldFromInstruction(Insn, 12, 9); + + unsigned Opc = fieldFromInstruction(Insn, 22, 2); + unsigned V = fieldFromInstruction(Insn, 26, 1); + unsigned Size = fieldFromInstruction(Insn, 30, 2); + + if (Opc == 0 || (V == 1 && Opc == 2)) { + // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + if (V == 0 && (Opc == 2 || Size == 3)) { + DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); + } else if (V == 0) { + DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); + } else if (V == 1 && (Opc & 2)) { + DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + } else { + switch (Size) { + case 0: + DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder); + break; + case 1: + DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder); + break; + case 2: + DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); + break; + case 3: + DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); + break; + } + } + + if (Opc != 0 && (V != 1 || Opc != 2)) { + // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + } + + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + + Inst.addOperand(MCOperand::CreateImm(Imm9)); + + // N.b. The official documentation says undpredictable if Rt == Rn, but this + // takes place at the architectural rather than encoding level: + // + // "STR xzr, [sp], #4" is perfectly valid. + if (V == 0 && Rt == Rn && Rn != 31) + return MCDisassembler::SoftFail; + else + return MCDisassembler::Success; +} + +static MCDisassembler *createAArch64Disassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new AArch64Disassembler(STI, T.createMCRegInfo("")); +} + +extern "C" void LLVMInitializeAArch64Disassembler() { + TargetRegistry::RegisterMCDisassembler(TheAArch64Target, + createAArch64Disassembler); +} + + diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt new file mode 100644 index 0000000..d4bd163 --- /dev/null +++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64Disassembler + AArch64Disassembler.cpp + ) + +add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen) diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000..a93e343 --- /dev/null +++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Disassembler +parent = AArch64 +required_libraries = AArch64CodeGen AArch64Desc AArch64Info AArch64Utils MC Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile new file mode 100644 index 0000000..5c86120 --- /dev/null +++ b/lib/Target/AArch64/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Disassembler + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp new file mode 100644 index 0000000..82ce80c --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -0,0 +1,408 @@ +//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an AArch64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#define PRINT_ALIAS_INSTR +#include "AArch64GenAsmWriter.inc" + +static int64_t unpackSignedImm(int BitWidth, uint64_t Value) { + assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit"); + if (Value & (1ULL << (BitWidth - 1))) + return static_cast<int64_t>(Value) - (1LL << BitWidth); + else + return Value; +} + +AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) : + MCInstPrinter(MAI, MII, MRI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + +void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << getRegisterName(RegNo); +} + +void +AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O) { + const MCOperand &MOImm = MI->getOperand(OpNum); + int32_t Imm = unpackSignedImm(9, MOImm.getImm()); + + O << '#' << Imm; +} + +void +AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, unsigned MemSize, + unsigned RmSize) { + unsigned ExtImm = MI->getOperand(OpNum).getImm(); + unsigned OptionHi = ExtImm >> 1; + unsigned S = ExtImm & 1; + bool IsLSL = OptionHi == 1 && RmSize == 64; + + const char *Ext; + switch (OptionHi) { + case 1: + Ext = (RmSize == 32) ? "uxtw" : "lsl"; + break; + case 3: + Ext = (RmSize == 32) ? "sxtw" : "sxtx"; + break; + default: + llvm_unreachable("Incorrect Option on load/store (reg offset)"); + } + O << Ext; + + if (S) { + unsigned ShiftAmt = Log2_32(MemSize); + O << " #" << ShiftAmt; + } else if (IsLSL) { + O << " #0"; + } +} + +void +AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O) { + const MCOperand &Imm12Op = MI->getOperand(OpNum); + + if (Imm12Op.isImm()) { + int64_t Imm12 = Imm12Op.getImm(); + assert(Imm12 >= 0 && "Invalid immediate for add/sub imm"); + O << "#" << Imm12; + } else { + assert(Imm12Op.isExpr() && "Unexpected shift operand type"); + O << "#" << *Imm12Op.getExpr(); + } +} + +void +AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + + printAddSubImmLSL0Operand(MI, OpNum, O); + + O << ", lsl #12"; +} + +void +AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + O << MO.getImm(); +} + +template<unsigned RegWidth> void +AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmROp = MI->getOperand(OpNum); + unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm(); + + O << '#' << LSB; +} + +void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmSOp = MI->getOperand(OpNum); + unsigned Width = ImmSOp.getImm() + 1; + + O << '#' << Width; +} + +void +AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ImmSOp = MI->getOperand(OpNum); + const MCOperand &ImmROp = MI->getOperand(OpNum - 1); + + unsigned ImmR = ImmROp.getImm(); + unsigned ImmS = ImmSOp.getImm(); + + assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract"); + + O << '#' << (ImmS - ImmR + 1); +} + +void +AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &CRx = MI->getOperand(OpNum); + + O << 'c' << CRx.getImm(); +} + + +void +AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &ScaleOp = MI->getOperand(OpNum); + + O << '#' << (64 - ScaleOp.getImm()); +} + + +void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &o) { + const MCOperand &MOImm8 = MI->getOperand(OpNum); + + assert(MOImm8.isImm() + && "Immediate operand required for floating-point immediate inst"); + + uint32_t Imm8 = MOImm8.getImm(); + uint32_t Fraction = Imm8 & 0xf; + uint32_t Exponent = (Imm8 >> 4) & 0x7; + uint32_t Negative = (Imm8 >> 7) & 0x1; + + float Val = 1.0f + Fraction / 16.0f; + + // That is: + // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4, + // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0 + if (Exponent & 0x4) { + Val /= 1 << (7 - Exponent); + } else { + Val *= 1 << (Exponent + 1); + } + + Val = Negative ? -Val : Val; + + o << '#' << format("%.8f", Val); +} + +void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &o) { + o << "#0.0"; +} + +void +AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm())); +} + +template <unsigned field_width, unsigned scale> void +AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + if (!MO.isImm()) { + printOperand(MI, OpNum, O); + return; + } + + // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which + // is multiplied by 4 (because all A64 instructions are 32-bits wide). + uint64_t UImm = MO.getImm(); + uint64_t Sign = UImm & (1LL << (field_width - 1)); + int64_t SImm = scale * ((UImm & ~Sign) - Sign); + + O << "#" << SImm; +} + +template<unsigned RegWidth> void +AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + uint64_t Val; + A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val); + O << "#0x"; + O.write_hex(Val); +} + +void +AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, int MemSize) { + const MCOperand &MOImm = MI->getOperand(OpNum); + + if (MOImm.isImm()) { + uint32_t Imm = MOImm.getImm() * MemSize; + + O << "#" << Imm; + } else { + O << "#" << *MOImm.getExpr(); + } +} + +void +AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, + A64SE::ShiftExtSpecifiers Shift) { + const MCOperand &MO = MI->getOperand(OpNum); + + // LSL #0 is not printed + if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0) + return; + + switch (Shift) { + case A64SE::LSL: O << "lsl"; break; + case A64SE::LSR: O << "lsr"; break; + case A64SE::ASR: O << "asr"; break; + case A64SE::ROR: O << "ror"; break; + default: llvm_unreachable("Invalid shift specifier in logical instruction"); + } + + O << " #" << MO.getImm(); +} + +void +AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &UImm16MO = MI->getOperand(OpNum); + const MCOperand &ShiftMO = MI->getOperand(OpNum + 1); + + if (UImm16MO.isImm()) { + O << '#' << UImm16MO.getImm(); + + if (ShiftMO.getImm() != 0) + O << ", lsl #" << (ShiftMO.getImm() * 16); + + return; + } + + O << "#" << *UImm16MO.getExpr(); +} + +void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + bool ValidName; + const MCOperand &MO = MI->getOperand(OpNum); + StringRef Name = Mapper.toString(MO.getImm(), ValidName); + + if (ValidName) + O << Name; + else + O << '#' << MO.getImm(); +} + +void +AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + bool ValidName; + std::string Name = Mapper.toString(MO.getImm(), ValidName); + if (ValidName) { + O << Name; + return; + } +} + + +void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O, + A64SE::ShiftExtSpecifiers Ext) { + // FIXME: In principle TableGen should be able to detect this itself far more + // easily. We will only accumulate more of these hacks. + unsigned Reg0 = MI->getOperand(0).getReg(); + unsigned Reg1 = MI->getOperand(1).getReg(); + + if (isStackReg(Reg0) || isStackReg(Reg1)) { + A64SE::ShiftExtSpecifiers LSLEquiv; + + if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP) + LSLEquiv = A64SE::UXTX; + else + LSLEquiv = A64SE::UXTW; + + if (Ext == LSLEquiv) { + O << "lsl #" << MI->getOperand(OpNum).getImm(); + return; + } + } + + switch (Ext) { + case A64SE::UXTB: O << "uxtb"; break; + case A64SE::UXTH: O << "uxth"; break; + case A64SE::UXTW: O << "uxtw"; break; + case A64SE::UXTX: O << "uxtx"; break; + case A64SE::SXTB: O << "sxtb"; break; + case A64SE::SXTH: O << "sxth"; break; + case A64SE::SXTW: O << "sxtw"; break; + case A64SE::SXTX: O << "sxtx"; break; + default: llvm_unreachable("Unexpected shift type for printing"); + } + + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.getImm() != 0) + O << " #" << MO.getImm(); +} + +template<int MemScale> void +AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MOImm = MI->getOperand(OpNum); + int32_t Imm = unpackSignedImm(7, MOImm.getImm()); + + O << "#" << (Imm * MemScale); +} + +void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + O << getRegisterName(Reg); + } else if (Op.isImm()) { + O << '#' << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + // If a symbolic branch target was added as a constant expression then print + // that address in hex. + const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); + int64_t Address; + if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + O << "0x"; + O.write_hex(Address); + } + else { + // Otherwise, just print the expression. + O << *Op.getExpr(); + } + } +} + + +void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + if (MI->getOpcode() == AArch64::TLSDESCCALL) { + // This is a special assembler directive which applies an + // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed + // form outside the normal TableGenerated scheme. + O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr(); + } else if (!printAliasInstr(MI, O)) + printInstruction(MI, O); + + printAnnotation(O, Annot); +} diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h new file mode 100644 index 0000000..639fa86 --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -0,0 +1,172 @@ +//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an AArch64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64INSTPRINTER_H +#define LLVM_AARCH64INSTPRINTER_H + +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { + +class MCOperand; + +class AArch64InstPrinter : public MCInstPrinter { +public: + AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + + // Autogenerated by tblgen + void printInstruction(const MCInst *MI, raw_ostream &O); + bool printAliasInstr(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); + + void printRegName(raw_ostream &O, unsigned RegNum) const; + + template<unsigned MemSize, unsigned RmSize> + void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize); + } + + + void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, unsigned MemSize, + unsigned RmSize); + + void printAddSubImmLSL0Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O); + void printAddSubImmLSL12Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &O); + + void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template<unsigned RegWidth> + void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + + void printCondCodeOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printCRxOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); + + void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); + + template<int MemScale> + void printOffsetUImm12Operand(const MCInst *MI, + unsigned OpNum, raw_ostream &o) { + printOffsetUImm12Operand(MI, OpNum, o, MemScale); + } + + void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &o, int MemScale); + + template<unsigned field_width, unsigned scale> + void printLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + template<unsigned RegWidth> + void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template<typename SomeNamedImmMapper> + void printNamedImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O); + } + + void printNamedImmOperand(const NamedImmMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, + const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printMRSOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O); + } + + void printMSROperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O); + } + + void printShiftOperand(const char *name, const MCInst *MI, + unsigned OpIdx, raw_ostream &O); + + void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("lsr", MI, OpNum, O); + } + void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("asr", MI, OpNum, O); + } + void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand("ror", MI, OpNum, O); + } + + template<A64SE::ShiftExtSpecifiers Shift> + void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printShiftOperand(MI, OpNum, O, Shift); + } + + void printShiftOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, A64SE::ShiftExtSpecifiers Sh); + + + void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + template<int MemSize> void + printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + template<A64SE::ShiftExtSpecifiers EXT> + void printRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + printRegExtendOperand(MI, OpNum, O, EXT); + } + + void printRegExtendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + + bool isStackReg(unsigned RegNo) { + return RegNo == AArch64::XSP || RegNo == AArch64::WSP; + } + + +}; + +} + +#endif diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..d4b980a --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64AsmPrinter + AArch64InstPrinter.cpp + ) + +add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen) + diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..4836c7c --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64AsmPrinter +parent = AArch64 +required_libraries = AArch64Utils MC Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile new file mode 100644 index 0000000..1c36a8d --- /dev/null +++ b/lib/Target/AArch64/InstPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64AsmPrinter + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt new file mode 100644 index 0000000..3b296fd --- /dev/null +++ b/lib/Target/AArch64/LLVMBuild.txt @@ -0,0 +1,36 @@ +;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils + +[component_0] +type = TargetGroup +name = AArch64 +parent = Target +has_asmparser = 1 +has_asmprinter = 1 +has_disassembler = 1 +;has_jit = 1 + +[component_1] +type = Library +name = AArch64CodeGen +parent = AArch64 +required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AsmPrinter CodeGen Core MC SelectionDAG Support Target +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp new file mode 100644 index 0000000..a3373b1 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -0,0 +1,585 @@ +//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 implementation of the MCAsmBackend class, +// which is principally concerned with relaxation of the various fixup kinds. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class AArch64AsmBackend : public MCAsmBackend { + const MCSubtargetInfo* STI; +public: + AArch64AsmBackend(const Target &T, const StringRef TT) + : MCAsmBackend(), + STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", "")) + {} + + + ~AArch64AsmBackend() { + delete STI; + } + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; + + virtual void processFixupValue(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved); +}; +} // end anonymous namespace + +void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, + const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved) { + // The ADRP instruction adds some multiple of 0x1000 to the current PC & + // ~0xfff. This means that the required offset to reach a symbol can vary by + // up to one step depending on where the ADRP is in memory. For example: + // + // ADRP x0, there + // there: + // + // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and + // we'll need that as an offset. At any other address "there" will be in the + // same page as the ADRP and the instruction should encode 0x0. Assuming the + // section isn't 0x1000-aligned, we therefore need to delegate this decision + // to the linker -- a relocation! + if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page || + (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page) + IsResolved = false; +} + + +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value); + +namespace { + +class ELFAArch64AsmBackend : public AArch64AsmBackend { +public: + uint8_t OSABI; + ELFAArch64AsmBackend(const Target &T, const StringRef TT, + uint8_t _OSABI) + : AArch64AsmBackend(T, TT), OSABI(_OSABI) { } + + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const; + + unsigned int getNumFixupKinds() const { + return AArch64::NumTargetFixupKinds; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { +// This table *must* be in the order that the fixup_* kinds are defined in +// AArch64FixupKinds.h. +// +// Name Offset (bits) Size (bits) Flags +{ "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_add_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst8_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst16_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst32_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst64_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst128_lo12", 0, 32, 0 }, +{ "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_movw_uabs_g0", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g1", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g2", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 }, +{ "fixup_a64_movw_uabs_g3", 0, 32, 0 }, +{ "fixup_a64_movw_sabs_g0", 0, 32, 0 }, +{ "fixup_a64_movw_sabs_g1", 0, 32, 0 }, +{ "fixup_a64_movw_sabs_g2", 0, 32, 0 }, +{ "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g2", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g1", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g0", 0, 32, 0 }, +{ "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 }, +{ "fixup_a64_add_dtprel_hi12", 0, 32, 0 }, +{ "fixup_a64_add_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_movw_gottprel_g1", 0, 32, 0 }, +{ "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 }, +{ "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_movw_tprel_g2", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g1", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g0", 0, 32, 0 }, +{ "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 }, +{ "fixup_a64_add_tprel_hi12", 0, 32, 0 }, +{ "fixup_a64_add_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 }, +{ "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 }, +{ "fixup_a64_tlsdesc_call", 0, 0, 0 } + }; + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8; + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the bits + // from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) { + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + } + } + + bool mayNeedRelaxation(const MCInst&) const { + return false; + } + + void relaxInstruction(const MCInst&, llvm::MCInst&) const { + llvm_unreachable("Cannot relax instructions"); + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createAArch64ELFObjectWriter(OS, OSABI); + } +}; + +} // end anonymous namespace + +bool +ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + // Correct for now. With all instructions 32-bit only very low-level + // considerations could make you select something which may fail. + return false; +} + + +bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { + // Can't emit NOP with size not multiple of 32-bits + if (Count % 4 != 0) + return false; + + uint64_t NumNops = Count / 4; + for (uint64_t i = 0; i != NumNops; ++i) + OW->Write32(0xd503201f); + + return true; +} + +static unsigned ADRImmBits(unsigned Value) { + unsigned lo2 = Value & 0x3; + unsigned hi19 = (Value & 0x1fffff) >> 2; + + return (hi19 << 5) | (lo2 << 29); +} + +static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_2: + assert((int64_t)Value >= -32768 && + (int64_t)Value <= 65536 && + "Out of range ABS16 fixup"); + return Value; + case FK_Data_4: + assert((int64_t)Value >= -(1LL << 31) && + (int64_t)Value <= (1LL << 32) - 1 && + "Out of range ABS32 fixup"); + return Value; + case FK_Data_8: + return Value; + + case AArch64::fixup_a64_ld_gottprel_prel19: + // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F + // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20. + case AArch64::fixup_a64_ld_prel: + // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits + // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup"); + return (Value & 0x1ffffc) << 3; + + case AArch64::fixup_a64_adr_prel: + // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of + // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range ADR fixup"); + return ADRImmBits(Value & 0x1fffff); + + case AArch64::fixup_a64_adr_prel_page: + // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF + // F000 of the result of the operation, checking that -2^32 <= result < + // 2^32. + assert((int64_t)Value >= -(1LL << 32) && + (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); + return ADRImmBits((Value & 0x1fffff000ULL) >> 12); + + case AArch64::fixup_a64_add_dtprel_hi12: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits + // FF F000 of DTPREL(S+A), check 0 <= X < 2^24. + case AArch64::fixup_a64_add_tprel_hi12: + // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits + // FF F000 of TPREL(S+A), check 0 <= X < 2^24. + assert((int64_t)Value >= 0 && + (int64_t)Value < (1LL << 24) && "Out of range ADD fixup"); + return (Value & 0xfff000) >> 2; + + case AArch64::fixup_a64_add_dtprel_lo12: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits + // FFF of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_add_tprel_lo12: + // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits + // FFF of TPREL(S+A), check 0 <= X < 2^12. + assert((int64_t)Value >= 0 && + (int64_t)Value < (1LL << 12) && "Out of range ADD fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_add_dtprel_lo12_nc: + // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits + // FFF of DTPREL(S+A) with no overflow check. + case AArch64::fixup_a64_add_tprel_lo12_nc: + // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits + // FFF of TPREL(S+A) with no overflow check. + case AArch64::fixup_a64_tlsdesc_add_lo12_nc: + // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits + // FFF of G(TLSDESC(S+A)), with no overflow check. + case AArch64::fixup_a64_add_lo12: + // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of + // S+A, with no overflow check. + return (Value & 0xfff) << 10; + + case AArch64::fixup_a64_ldst8_dtprel_lo12: + // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst8_tprel_lo12: + // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst8_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst8_lo12: + // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF + // of S+A, with no overflow check. + return (Value & 0xfff) << 10; + + case AArch64::fixup_a64_ldst16_dtprel_lo12: + // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst16_tprel_lo12: + // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst16_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst16_lo12: + // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE + // of S+A, with no overflow check. + return (Value & 0xffe) << 9; + + case AArch64::fixup_a64_ldst32_dtprel_lo12: + // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst32_tprel_lo12: + // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst32_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst32_lo12: + // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC + // of S+A, with no overflow check. + return (Value & 0xffc) << 8; + + case AArch64::fixup_a64_ldst64_dtprel_lo12: + // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), check 0 <= X < 2^12. + case AArch64::fixup_a64_ldst64_tprel_lo12: + // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), check 0 <= X < 2^12. + assert((int64_t) Value >= 0 && + (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); + // ... fallthrough to no-checking versions ... + case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: + // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 + // of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst64_tprel_lo12_nc: + // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 + // of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_ldst64_lo12: + // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8 + // of S+A, with no overflow check. + return (Value & 0xff8) << 7; + + case AArch64::fixup_a64_ldst128_lo12: + // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0 + // of S+A, with no overflow check. + return (Value & 0xff0) << 6; + + case AArch64::fixup_a64_movw_uabs_g0: + // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A + // with a check that S+A < 2^16 + assert(Value <= 0xffff && "Out of range move wide fixup"); + return (Value & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g0_nc: + // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of DTPREL(S+A) with no overflow check. + case AArch64::fixup_a64_movw_gottprel_g0_nc: + // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of G(TPREL(S+A)) - GOT with no overflow check. + case AArch64::fixup_a64_movw_tprel_g0_nc: + // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits + // FFFF of TPREL(S+A) with no overflow check. + case AArch64::fixup_a64_movw_uabs_g0_nc: + // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of + // S+A with no overflow check. + return (Value & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g1: + // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of + // S+A with a check that S+A < 2^32 + assert(Value <= 0xffffffffull && "Out of range move wide fixup"); + return ((Value >> 16) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g1_nc: + // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field + // to bits FFFF0000 of DTPREL(S+A), with no overflow check. + case AArch64::fixup_a64_movw_tprel_g1_nc: + // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field + // to bits FFFF0000 of TPREL(S+A), with no overflow check. + case AArch64::fixup_a64_movw_uabs_g1_nc: + // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits + // FFFF0000 of S+A with no overflow check. + return ((Value >> 16) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g2: + // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000 + // 0000 of S+A with a check that S+A < 2^48 + assert(Value <= 0xffffffffffffull && "Out of range move wide fixup"); + return ((Value >> 32) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g2_nc: + // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000 + // 0000 of S+A with no overflow check. + return ((Value >> 32) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_uabs_g3: + // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000 + // 0000 0000 of S+A (no overflow check needed) + return ((Value >> 48) & 0xffff) << 5; + + case AArch64::fixup_a64_movw_dtprel_g0: + // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field + // to bits FFFF of DTPREL(S+A). + case AArch64::fixup_a64_movw_tprel_g0: + // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to + // bits FFFF of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g0: { + // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of + // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we + // should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 16) && Signed < (1LL << 16) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = (Value & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + // MCCodeEmitter should have encoded a MOVN, which is fine. + Value = (~Value & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_movw_dtprel_g1: + // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field + // to bits FFFF0000 of DTPREL(S+A). + case AArch64::fixup_a64_movw_gottprel_g1: + // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field + // to bits FFFF0000 of G(TPREL(S+A)) - GOT. + case AArch64::fixup_a64_movw_tprel_g1: + // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to + // bits FFFF0000 of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g1: { + // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000 + // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we + // should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 32) && Signed < (1LL << 32) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = ((Value >> 16) & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + Value = ((~Value >> 16) & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_movw_dtprel_g2: + // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field + // to bits FFFF 0000 0000 of DTPREL(S+A). + case AArch64::fixup_a64_movw_tprel_g2: + // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to + // bits FFFF 0000 0000 of TPREL(S+A). + case AArch64::fixup_a64_movw_sabs_g2: { + // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000 + // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that + // we should convert between MOVN and MOVZ to achieve our goals). + int64_t Signed = Value; + assert(Signed >= -(1LL << 48) && Signed < (1LL << 48) + && "Out of range move wide fixup"); + if (Signed >= 0) { + Value = ((Value >> 32) & 0xffff) << 5; + // Bit 30 converts the MOVN encoding into a MOVZ + Value |= 1 << 30; + } else { + Value = ((~Value >> 32) & 0xffff) << 5; + } + return Value; + } + + case AArch64::fixup_a64_tstbr: + // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to + // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15. + assert((int64_t)Value >= -(1LL << 15) && + (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup"); + return (Value & 0xfffc) << (5 - 2); + + case AArch64::fixup_a64_condbr: + // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch + // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20. + assert((int64_t)Value >= -(1LL << 20) && + (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup"); + return (Value & 0x1ffffc) << (5 - 2); + + case AArch64::fixup_a64_uncondbr: + // R_AARCH64_JUMP26 same as below (except to a linker, possibly). + case AArch64::fixup_a64_call: + // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P, + // checking that -2^27 <= S+A-P < 2^27. + assert((int64_t)Value >= -(1LL << 27) && + (int64_t)Value < (1LL << 27) && "Out of range branch fixup"); + return (Value & 0xffffffc) >> 2; + + case AArch64::fixup_a64_adr_gottprel_page: + // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits + // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32. + case AArch64::fixup_a64_tlsdesc_adr_page: + // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000 + // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32. + case AArch64::fixup_a64_adr_prel_got_page: + // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits + // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) < + // 2^32. + assert((int64_t)Value >= -(1LL << 32) && + (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); + return ADRImmBits((Value & 0x1fffff000ULL) >> 12); + + case AArch64::fixup_a64_ld64_gottprel_lo12_nc: + // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8 + // of X, with no overflow check. Check that X & 7 == 0. + case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: + // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of + // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0. + case AArch64::fixup_a64_ld64_got_lo12_nc: + // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of + // G(S) with no overflow check. Check X & 7 == 0 + assert(((int64_t)Value & 7) == 0 && "Misaligned fixup"); + return (Value & 0xff8) << 7; + + case AArch64::fixup_a64_tlsdesc_call: + // R_AARCH64_TLSDESC_CALL: For relaxation only. + return 0; + } +} + +MCAsmBackend * +llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) { + Triple TheTriple(TT); + + return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS()); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp new file mode 100644 index 0000000..4bcc65d --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -0,0 +1,292 @@ +//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file handles ELF-specific object emission, converting LLVM's internal +// fixups into the appropriate relocations. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { +class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { +public: + AArch64ELFObjectWriter(uint8_t OSABI); + + virtual ~AArch64ELFObjectWriter(); + +protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; +private: +}; +} + +AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, + /*HasRelocationAddend*/ true) +{} + +AArch64ELFObjectWriter::~AArch64ELFObjectWriter() +{} + +unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + unsigned Type; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented fixup -> relocation"); + case FK_Data_8: + return ELF::R_AARCH64_PREL64; + case FK_Data_4: + return ELF::R_AARCH64_PREL32; + case FK_Data_2: + return ELF::R_AARCH64_PREL16; + case AArch64::fixup_a64_ld_prel: + Type = ELF::R_AARCH64_LD_PREL_LO19; + break; + case AArch64::fixup_a64_adr_prel: + Type = ELF::R_AARCH64_ADR_PREL_LO21; + break; + case AArch64::fixup_a64_adr_prel_page: + Type = ELF::R_AARCH64_ADR_PREL_PG_HI21; + break; + case AArch64::fixup_a64_adr_prel_got_page: + Type = ELF::R_AARCH64_ADR_GOT_PAGE; + break; + case AArch64::fixup_a64_tstbr: + Type = ELF::R_AARCH64_TSTBR14; + break; + case AArch64::fixup_a64_condbr: + Type = ELF::R_AARCH64_CONDBR19; + break; + case AArch64::fixup_a64_uncondbr: + Type = ELF::R_AARCH64_JUMP26; + break; + case AArch64::fixup_a64_call: + Type = ELF::R_AARCH64_CALL26; + break; + case AArch64::fixup_a64_adr_gottprel_page: + Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; + break; + case AArch64::fixup_a64_ld_gottprel_prel19: + Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; + break; + case AArch64::fixup_a64_tlsdesc_adr_page: + Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented fixup -> relocation"); + case FK_Data_8: + return ELF::R_AARCH64_ABS64; + case FK_Data_4: + return ELF::R_AARCH64_ABS32; + case FK_Data_2: + return ELF::R_AARCH64_ABS16; + case AArch64::fixup_a64_add_lo12: + Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ld64_got_lo12_nc: + Type = ELF::R_AARCH64_LD64_GOT_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_lo12: + Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_lo12: + Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_lo12: + Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_lo12: + Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC; + break; + case AArch64::fixup_a64_ldst128_lo12: + Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC; + break; + case AArch64::fixup_a64_movw_uabs_g0: + Type = ELF::R_AARCH64_MOVW_UABS_G0; + break; + case AArch64::fixup_a64_movw_uabs_g0_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G0_NC; + break; + case AArch64::fixup_a64_movw_uabs_g1: + Type = ELF::R_AARCH64_MOVW_UABS_G1; + break; + case AArch64::fixup_a64_movw_uabs_g1_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G1_NC; + break; + case AArch64::fixup_a64_movw_uabs_g2: + Type = ELF::R_AARCH64_MOVW_UABS_G2; + break; + case AArch64::fixup_a64_movw_uabs_g2_nc: + Type = ELF::R_AARCH64_MOVW_UABS_G2_NC; + break; + case AArch64::fixup_a64_movw_uabs_g3: + Type = ELF::R_AARCH64_MOVW_UABS_G3; + break; + case AArch64::fixup_a64_movw_sabs_g0: + Type = ELF::R_AARCH64_MOVW_SABS_G0; + break; + case AArch64::fixup_a64_movw_sabs_g1: + Type = ELF::R_AARCH64_MOVW_SABS_G1; + break; + case AArch64::fixup_a64_movw_sabs_g2: + Type = ELF::R_AARCH64_MOVW_SABS_G2; + break; + + // TLS Local-dynamic block + case AArch64::fixup_a64_movw_dtprel_g2: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; + break; + case AArch64::fixup_a64_movw_dtprel_g1: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; + break; + case AArch64::fixup_a64_movw_dtprel_g1_nc: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; + break; + case AArch64::fixup_a64_movw_dtprel_g0: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; + break; + case AArch64::fixup_a64_movw_dtprel_g0_nc: + Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; + break; + case AArch64::fixup_a64_add_dtprel_hi12: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12; + break; + case AArch64::fixup_a64_add_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; + break; + case AArch64::fixup_a64_add_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_dtprel_lo12: + Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; + break; + case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; + break; + + // TLS initial-exec block + case AArch64::fixup_a64_movw_gottprel_g1: + Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; + break; + case AArch64::fixup_a64_movw_gottprel_g0_nc: + Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; + break; + case AArch64::fixup_a64_ld64_gottprel_lo12_nc: + Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; + break; + + // TLS local-exec block + case AArch64::fixup_a64_movw_tprel_g2: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; + break; + case AArch64::fixup_a64_movw_tprel_g1: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; + break; + case AArch64::fixup_a64_movw_tprel_g1_nc: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; + break; + case AArch64::fixup_a64_movw_tprel_g0: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; + break; + case AArch64::fixup_a64_movw_tprel_g0_nc: + Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; + break; + case AArch64::fixup_a64_add_tprel_hi12: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12; + break; + case AArch64::fixup_a64_add_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; + break; + case AArch64::fixup_a64_add_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst8_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst8_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst16_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst16_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst32_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst32_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; + break; + case AArch64::fixup_a64_ldst64_tprel_lo12: + Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; + break; + case AArch64::fixup_a64_ldst64_tprel_lo12_nc: + Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; + break; + + // TLS general-dynamic block + case AArch64::fixup_a64_tlsdesc_adr_page: + Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; + break; + case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: + Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; + break; + case AArch64::fixup_a64_tlsdesc_add_lo12_nc: + Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; + break; + case AArch64::fixup_a64_tlsdesc_call: + Type = ELF::R_AARCH64_TLSDESC_CALL; + break; + } + } + + return Type; +} + +MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp new file mode 100644 index 0000000..b83577a --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -0,0 +1,160 @@ +//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits AArch64 ELF .o object files. Different +// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit +// regions of data and code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +/// Extend the generic ELFStreamer class so that it can emit mapping symbols at +/// the appropriate points in the object files. These symbols are defined in the +/// AArch64 ELF ABI: +/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf +/// +/// In brief: $x or $d should be emitted at the start of each contiguous region +/// of A64 code or data in a section. In practice, this emission does not rely +/// on explicit assembler directives but on inherent properties of the +/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an +/// instruction). +/// +/// As a result this system is orthogonal to the DataRegion infrastructure used +/// by MachO. Beware! +class AArch64ELFStreamer : public MCELFStreamer { +public: + AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCELFStreamer(Context, TAB, OS, Emitter), + MappingSymbolCounter(0), LastEMS(EMS_None) { + } + + ~AArch64ELFStreamer() {} + + virtual void ChangeSection(const MCSection *Section) { + // We have to keep track of the mapping symbol state of any sections we + // use. Each one should start off as EMS_None, which is provided as the + // default constructor by DenseMap::lookup. + LastMappingSymbols[getPreviousSection()] = LastEMS; + LastEMS = LastMappingSymbols.lookup(Section); + + MCELFStreamer::ChangeSection(Section); + } + + /// This function is the one used to emit instruction data into the ELF + /// streamer. We override it to add the appropriate mapping symbol if + /// necessary. + virtual void EmitInstruction(const MCInst& Inst) { + EmitA64MappingSymbol(); + MCELFStreamer::EmitInstruction(Inst); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) + /// if necessary. + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitBytes(Data, AddrSpace); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) + /// if necessary. + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + } + +private: + enum ElfMappingSymbol { + EMS_None, + EMS_A64, + EMS_Data + }; + + void EmitDataMappingSymbol() { + if (LastEMS == EMS_Data) return; + EmitMappingSymbol("$d"); + LastEMS = EMS_Data; + } + + void EmitA64MappingSymbol() { + if (LastEMS == EMS_A64) return; + EmitMappingSymbol("$x"); + LastEMS = EMS_A64; + } + + void EmitMappingSymbol(StringRef Name) { + MCSymbol *Start = getContext().CreateTempSymbol(); + EmitLabel(Start); + + MCSymbol *Symbol = + getContext().GetOrCreateSymbol(Name + "." + + Twine(MappingSymbolCounter++)); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + MCELF::SetType(SD, ELF::STT_NOTYPE); + MCELF::SetBinding(SD, ELF::STB_LOCAL); + SD.setExternal(false); + Symbol->setSection(*getCurrentSection()); + + const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + Symbol->setVariableValue(Value); + } + + int64_t MappingSymbolCounter; + + DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; + ElfMappingSymbol LastEMS; + + /// @} +}; +} + +namespace llvm { + MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack) { + AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + if (NoExecStack) + S->getAssembler().setNoExecStack(true); + return S; + } +} + + diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h new file mode 100644 index 0000000..5a89ca5 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -0,0 +1,27 @@ +//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF streamer information for the AArch64 backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_ELF_STREAMER_H +#define LLVM_AARCH64_ELF_STREAMER_H + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + + MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack); +} + +#endif // AArch64_ELF_STREAMER_H diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h new file mode 100644 index 0000000..eeb122d --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -0,0 +1,113 @@ +//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the LLVM fixups applied to MCInsts in the AArch64 +// backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H +#define LLVM_AARCH64_AARCH64FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { + namespace AArch64 { + enum Fixups { + fixup_a64_ld_prel = FirstTargetFixupKind, + fixup_a64_adr_prel, + fixup_a64_adr_prel_page, + + fixup_a64_add_lo12, + + fixup_a64_ldst8_lo12, + fixup_a64_ldst16_lo12, + fixup_a64_ldst32_lo12, + fixup_a64_ldst64_lo12, + fixup_a64_ldst128_lo12, + + fixup_a64_tstbr, + fixup_a64_condbr, + fixup_a64_uncondbr, + fixup_a64_call, + + fixup_a64_movw_uabs_g0, + fixup_a64_movw_uabs_g0_nc, + fixup_a64_movw_uabs_g1, + fixup_a64_movw_uabs_g1_nc, + fixup_a64_movw_uabs_g2, + fixup_a64_movw_uabs_g2_nc, + fixup_a64_movw_uabs_g3, + + fixup_a64_movw_sabs_g0, + fixup_a64_movw_sabs_g1, + fixup_a64_movw_sabs_g2, + + fixup_a64_adr_prel_got_page, + fixup_a64_ld64_got_lo12_nc, + + // Produce offsets relative to the module's dynamic TLS area. + fixup_a64_movw_dtprel_g2, + fixup_a64_movw_dtprel_g1, + fixup_a64_movw_dtprel_g1_nc, + fixup_a64_movw_dtprel_g0, + fixup_a64_movw_dtprel_g0_nc, + fixup_a64_add_dtprel_hi12, + fixup_a64_add_dtprel_lo12, + fixup_a64_add_dtprel_lo12_nc, + fixup_a64_ldst8_dtprel_lo12, + fixup_a64_ldst8_dtprel_lo12_nc, + fixup_a64_ldst16_dtprel_lo12, + fixup_a64_ldst16_dtprel_lo12_nc, + fixup_a64_ldst32_dtprel_lo12, + fixup_a64_ldst32_dtprel_lo12_nc, + fixup_a64_ldst64_dtprel_lo12, + fixup_a64_ldst64_dtprel_lo12_nc, + + // Produce the GOT entry containing a variable's address in TLS's + // initial-exec mode. + fixup_a64_movw_gottprel_g1, + fixup_a64_movw_gottprel_g0_nc, + fixup_a64_adr_gottprel_page, + fixup_a64_ld64_gottprel_lo12_nc, + fixup_a64_ld_gottprel_prel19, + + // Produce offsets relative to the thread pointer: TPIDR_EL0. + fixup_a64_movw_tprel_g2, + fixup_a64_movw_tprel_g1, + fixup_a64_movw_tprel_g1_nc, + fixup_a64_movw_tprel_g0, + fixup_a64_movw_tprel_g0_nc, + fixup_a64_add_tprel_hi12, + fixup_a64_add_tprel_lo12, + fixup_a64_add_tprel_lo12_nc, + fixup_a64_ldst8_tprel_lo12, + fixup_a64_ldst8_tprel_lo12_nc, + fixup_a64_ldst16_tprel_lo12, + fixup_a64_ldst16_tprel_lo12_nc, + fixup_a64_ldst32_tprel_lo12, + fixup_a64_ldst32_tprel_lo12_nc, + fixup_a64_ldst64_tprel_lo12, + fixup_a64_ldst64_tprel_lo12_nc, + + // Produce the special fixups used by the general-dynamic TLS model. + fixup_a64_tlsdesc_adr_page, + fixup_a64_tlsdesc_ld64_lo12_nc, + fixup_a64_tlsdesc_add_lo12_nc, + fixup_a64_tlsdesc_call, + + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind + }; + } +} + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp new file mode 100644 index 0000000..8ec8cbf --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -0,0 +1,41 @@ +//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the AArch64MCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCAsmInfo.h" + +using namespace llvm; + +AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() { + PointerSize = 8; + + // ".comm align is in bytes but .align is pow-2." + AlignmentIsInBytes = false; + + CommentString = "//"; + PrivateGlobalPrefix = ".L"; + Code32Directive = ".code\t32"; + + Data16bitsDirective = "\t.hword\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = "\t.xword\t"; + + UseDataRegionDirectives = true; + + WeakRefDirective = "\t.weak\t"; + + HasLEB128 = true; + SupportsDebugInformation = true; + + // Exceptions handling + ExceptionsType = ExceptionHandling::DwarfCFI; +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h new file mode 100644 index 0000000..a20bc47 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -0,0 +1,27 @@ +//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the AArch64MCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64TARGETASMINFO_H +#define LLVM_AARCH64TARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + + struct AArch64ELFMCAsmInfo : public MCAsmInfo { + explicit AArch64ELFMCAsmInfo(); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp new file mode 100644 index 0000000..756e037 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -0,0 +1,513 @@ +//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64MCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCExpr.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class AArch64MCCodeEmitter : public MCCodeEmitter { + AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; + MCContext &Ctx; + +public: + AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {} + + ~AArch64MCCodeEmitter() {} + + unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + template<int MemSize> + unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + return getOffsetUImm12OpValue(MI, OpIdx, Fixups, MemSize); + } + + unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + int MemSize) const; + + unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + // Labels are handled mostly the same way: a symbol is needed, and + // just gets some fixup attached. + template<AArch64::Fixups fixupDesired> + unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + unsigned getAddressWithFixup(const MCOperand &MO, + unsigned FixupKind, + SmallVectorImpl<MCFixup> &Fixups) const; + + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const; + + + void EmitByte(unsigned char C, raw_ostream &OS) const { + OS << (char)C; + } + + void EmitInstruction(uint32_t Val, raw_ostream &OS) const { + // Output the constant in little endian byte order. + for (unsigned i = 0; i != 4; ++i) { + EmitByte(Val & 0xff, OS); + Val >>= 8; + } + } + + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const; + + template<int hasRs, int hasRt2> unsigned + fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const; + + unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue) const; + + unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue) const; + + +}; + +} // end anonymous namespace + +unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO, + unsigned FixupKind, + SmallVectorImpl<MCFixup> &Fixups) const { + if (!MO.isExpr()) { + // This can occur for manually decoded or constructed MCInsts, but neither + // the assembly-parser nor instruction selection will currently produce an + // MCInst that's not a symbol reference. + assert(MO.isImm() && "Unexpected address requested"); + return MO.getImm(); + } + + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind = MCFixupKind(FixupKind); + Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + + return 0; +} + +unsigned AArch64MCCodeEmitter:: +getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + int MemSize) const { + const MCOperand &ImmOp = MI.getOperand(OpIdx); + if (ImmOp.isImm()) + return ImmOp.getImm(); + + assert(ImmOp.isExpr() && "Unexpected operand type"); + const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr()); + unsigned FixupKind; + + + switch (Expr->getKind()) { + default: llvm_unreachable("Unexpected operand modifier"); + case AArch64MCExpr::VK_AARCH64_LO12: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, + AArch64::fixup_a64_ldst16_lo12, + AArch64::fixup_a64_ldst32_lo12, + AArch64::fixup_a64_ldst64_lo12, + AArch64::fixup_a64_ldst128_lo12 }; + assert(MemSize <= 16 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_GOT_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc; + break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12, + AArch64::fixup_a64_ldst16_dtprel_lo12, + AArch64::fixup_a64_ldst32_dtprel_lo12, + AArch64::fixup_a64_ldst64_dtprel_lo12 }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc, + AArch64::fixup_a64_ldst16_dtprel_lo12_nc, + AArch64::fixup_a64_ldst32_dtprel_lo12_nc, + AArch64::fixup_a64_ldst64_dtprel_lo12_nc }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc; + break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{ + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12, + AArch64::fixup_a64_ldst16_tprel_lo12, + AArch64::fixup_a64_ldst32_tprel_lo12, + AArch64::fixup_a64_ldst64_tprel_lo12 }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: { + unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc, + AArch64::fixup_a64_ldst16_tprel_lo12_nc, + AArch64::fixup_a64_ldst32_tprel_lo12_nc, + AArch64::fixup_a64_ldst64_tprel_lo12_nc }; + assert(MemSize <= 8 && "Invalid fixup for operation"); + FixupKind = FixupsBySize[Log2_32(MemSize)]; + break; + } + case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: + assert(MemSize == 8 && "Invalid fixup for operation"); + FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc; + break; + } + + return getAddressWithFixup(ImmOp, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + + assert(MO.isExpr()); + + unsigned FixupKind = 0; + switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) { + default: llvm_unreachable("Invalid expression modifier"); + case AArch64MCExpr::VK_AARCH64_LO12: + FixupKind = AArch64::fixup_a64_add_lo12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_HI12: + FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: + FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: + FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_HI12: + FixupKind = AArch64::fixup_a64_add_tprel_hi12; break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12: + FixupKind = AArch64::fixup_a64_add_tprel_lo12; break; + case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: + FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break; + case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: + FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break; + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + + assert(MO.isExpr()); + + unsigned Modifier = AArch64MCExpr::VK_AARCH64_None; + if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr())) + Modifier = Expr->getKind(); + + unsigned FixupKind = 0; + switch(Modifier) { + case AArch64MCExpr::VK_AARCH64_None: + FixupKind = AArch64::fixup_a64_adr_prel_page; + break; + case AArch64MCExpr::VK_AARCH64_GOT: + FixupKind = AArch64::fixup_a64_adr_prel_got_page; + break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL: + FixupKind = AArch64::fixup_a64_adr_gottprel_page; + break; + case AArch64MCExpr::VK_AARCH64_TLSDESC: + FixupKind = AArch64::fixup_a64_tlsdesc_adr_page; + break; + default: + llvm_unreachable("Unknown symbol reference kind for ADRP instruction"); + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + +unsigned +AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Only immediate expected for shift"); + + return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6; +} + +unsigned +AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Only immediate expected for shift"); + + return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6; +} + + +template<AArch64::Fixups fixupDesired> unsigned +AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, + unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + + if (MO.isExpr()) + return getAddressWithFixup(MO, fixupDesired, Fixups); + + assert(MO.isImm()); + return MO.getImm(); +} + +unsigned +AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI, + unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + + if (MO.isImm()) + return MO.getImm(); + + assert(MO.isExpr()); + + unsigned FixupKind; + if (isa<AArch64MCExpr>(MO.getExpr())) { + assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind() + == AArch64MCExpr::VK_AARCH64_GOTTPREL + && "Invalid symbol modifier for literal load"); + FixupKind = AArch64::fixup_a64_ld_gottprel_prel19; + } else { + FixupKind = AArch64::fixup_a64_ld_prel; + } + + return getAddressWithFixup(MO, FixupKind, Fixups); +} + + +unsigned +AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MO.isReg()) { + return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); + } else if (MO.isImm()) { + return static_cast<unsigned>(MO.getImm()); + } + + llvm_unreachable("Unable to encode MCOperand!"); + return 0; +} + +unsigned +AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &UImm16MO = MI.getOperand(OpIdx); + const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1); + + unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16; + + if (UImm16MO.isImm()) { + Result |= UImm16MO.getImm(); + return Result; + } + + const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr()); + AArch64::Fixups requestedFixup; + switch (A64E->getKind()) { + default: llvm_unreachable("unexpected expression modifier"); + case AArch64MCExpr::VK_AARCH64_ABS_G0: + requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break; + case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G1: + requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break; + case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G2: + requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break; + case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: + requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break; + case AArch64MCExpr::VK_AARCH64_ABS_G3: + requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break; + case AArch64MCExpr::VK_AARCH64_SABS_G0: + requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break; + case AArch64MCExpr::VK_AARCH64_SABS_G1: + requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break; + case AArch64MCExpr::VK_AARCH64_SABS_G2: + requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break; + case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break; + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: + requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break; + case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: + requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break; + } + + return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups); +} + +unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI, + unsigned EncodedValue) const { + // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation + // with 0s, but is architecturally ignored + EncodedValue &= ~0x1f0000u; + + return EncodedValue; +} + +template<int hasRs, int hasRt2> unsigned +AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, + unsigned EncodedValue) const { + if (!hasRs) EncodedValue |= 0x001F0000; + if (!hasRt2) EncodedValue |= 0x00007C00; + + return EncodedValue; +} + +unsigned +AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue) const { + // If one of the signed fixup kinds is applied to a MOVZ instruction, the + // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's + // job to ensure that any bits possibly affected by this are 0. This means we + // must zero out bit 30 (essentially emitting a MOVN). + MCOperand UImm16MO = MI.getOperand(1); + + // Nothing to do if there's no fixup. + if (UImm16MO.isImm()) + return EncodedValue; + + const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr()); + switch (A64E->getKind()) { + case AArch64MCExpr::VK_AARCH64_SABS_G0: + case AArch64MCExpr::VK_AARCH64_SABS_G1: + case AArch64MCExpr::VK_AARCH64_SABS_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G2: + case AArch64MCExpr::VK_AARCH64_DTPREL_G1: + case AArch64MCExpr::VK_AARCH64_DTPREL_G0: + case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G2: + case AArch64MCExpr::VK_AARCH64_TPREL_G1: + case AArch64MCExpr::VK_AARCH64_TPREL_G0: + return EncodedValue & ~(1u << 30); + default: + // Nothing to do for an unsigned fixup. + return EncodedValue; + } + + llvm_unreachable("Should have returned by now"); +} + +unsigned +AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI, + unsigned EncodedValue) const { + // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 + // (i.e. all bits 1) but is ignored by the processor. + EncodedValue |= 0x1f << 10; + return EncodedValue; +} + +MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new AArch64MCCodeEmitter(Ctx); +} + +void AArch64MCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MI.getOpcode() == AArch64::TLSDESCCALL) { + // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the + // following (BLR) instruction. It doesn't emit any code itself so it + // doesn't go through the normal TableGenerated channels. + MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call); + const MCExpr *Expr; + Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx); + Fixups.push_back(MCFixup::Create(0, Expr, Fixup)); + return; + } + + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); + + EmitInstruction(Binary, OS); +} + + +#include "AArch64GenMCCodeEmitter.inc" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp new file mode 100644 index 0000000..c1abfe7 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -0,0 +1,178 @@ +//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the assembly expression modifiers +// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "aarch64mcexpr" +#include "AArch64MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELF.h" +#include "llvm/Object/ELF.h" + +using namespace llvm; + +const AArch64MCExpr* +AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) AArch64MCExpr(Kind, Expr); +} + +void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { + switch (Kind) { + default: llvm_unreachable("Invalid kind!"); + case VK_AARCH64_GOT: OS << ":got:"; break; + case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break; + case VK_AARCH64_LO12: OS << ":lo12:"; break; + case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break; + case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break; + case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break; + case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break; + case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break; + case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break; + case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break; + case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break; + case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break; + case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break; + case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break; + case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break; + case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break; + case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break; + case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break; + case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break; + case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break; + case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break; + case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break; + case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break; + case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break; + case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break; + case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break; + case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break; + case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break; + case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break; + case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break; + case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break; + case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break; + case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break; + case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break; + case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break; + + } + + const MCExpr *Expr = getSubExpr(); + if (Expr->getKind() != MCExpr::SymbolRef) + OS << '('; + Expr->print(OS); + if (Expr->getKind() != MCExpr::SymbolRef) + OS << ')'; +} + +bool +AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const { + return getSubExpr()->EvaluateAsRelocatable(Res, *Layout); +} + +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expression"); + break; + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: { + // We're known to be under a TLS fixup, so any symbol should be + // modified. There should be only one. + const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr); + MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); + MCELF::SetType(SD, ELF::STT_TLS); + break; + } + + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm); + break; + } +} + +void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + switch (getKind()) { + default: + return; + case VK_AARCH64_DTPREL_G2: + case VK_AARCH64_DTPREL_G1: + case VK_AARCH64_DTPREL_G1_NC: + case VK_AARCH64_DTPREL_G0: + case VK_AARCH64_DTPREL_G0_NC: + case VK_AARCH64_DTPREL_HI12: + case VK_AARCH64_DTPREL_LO12: + case VK_AARCH64_DTPREL_LO12_NC: + case VK_AARCH64_GOTTPREL_G1: + case VK_AARCH64_GOTTPREL_G0_NC: + case VK_AARCH64_GOTTPREL: + case VK_AARCH64_GOTTPREL_LO12: + case VK_AARCH64_TPREL_G2: + case VK_AARCH64_TPREL_G1: + case VK_AARCH64_TPREL_G1_NC: + case VK_AARCH64_TPREL_G0: + case VK_AARCH64_TPREL_G0_NC: + case VK_AARCH64_TPREL_HI12: + case VK_AARCH64_TPREL_LO12: + case VK_AARCH64_TPREL_LO12_NC: + case VK_AARCH64_TLSDESC: + case VK_AARCH64_TLSDESC_LO12: + break; + } + + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); +} + +// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps +// that method should be made public? +// FIXME: really do above: now that two backends are using it. +static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { + switch (Value->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expr!"); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); + AddValueSymbolsImpl(BE->getLHS(), Asm); + AddValueSymbolsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: + Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); + break; + } +} + +void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { + AddValueSymbolsImpl(getSubExpr(), Asm); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h new file mode 100644 index 0000000..c0e3b29 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -0,0 +1,167 @@ +//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes AArch64-specific MCExprs, used for modifiers like +// ":lo12:" or ":gottprel_g1:". +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64MCEXPR_H +#define LLVM_AARCH64MCEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +class AArch64MCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_AARCH64_None, + VK_AARCH64_GOT, // :got: modifier in assembly + VK_AARCH64_GOT_LO12, // :got_lo12: + VK_AARCH64_LO12, // :lo12: + + VK_AARCH64_ABS_G0, // :abs_g0: + VK_AARCH64_ABS_G0_NC, // :abs_g0_nc: + VK_AARCH64_ABS_G1, + VK_AARCH64_ABS_G1_NC, + VK_AARCH64_ABS_G2, + VK_AARCH64_ABS_G2_NC, + VK_AARCH64_ABS_G3, + + VK_AARCH64_SABS_G0, // :abs_g0_s: + VK_AARCH64_SABS_G1, + VK_AARCH64_SABS_G2, + + VK_AARCH64_DTPREL_G2, // :dtprel_g2: + VK_AARCH64_DTPREL_G1, + VK_AARCH64_DTPREL_G1_NC, + VK_AARCH64_DTPREL_G0, + VK_AARCH64_DTPREL_G0_NC, + VK_AARCH64_DTPREL_HI12, + VK_AARCH64_DTPREL_LO12, + VK_AARCH64_DTPREL_LO12_NC, + + VK_AARCH64_GOTTPREL_G1, // :gottprel: + VK_AARCH64_GOTTPREL_G0_NC, + VK_AARCH64_GOTTPREL, + VK_AARCH64_GOTTPREL_LO12, + + VK_AARCH64_TPREL_G2, // :tprel: + VK_AARCH64_TPREL_G1, + VK_AARCH64_TPREL_G1_NC, + VK_AARCH64_TPREL_G0, + VK_AARCH64_TPREL_G0_NC, + VK_AARCH64_TPREL_HI12, + VK_AARCH64_TPREL_LO12, + VK_AARCH64_TPREL_LO12_NC, + + VK_AARCH64_TLSDESC, // :tlsdesc: + VK_AARCH64_TLSDESC_LO12 + }; + +private: + const VariantKind Kind; + const MCExpr *Expr; + + explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr) + : Kind(_Kind), Expr(_Expr) {} + +public: + /// @name Construction + /// @{ + + static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_AARCH64_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_AARCH64_GOT, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_GOT_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx); + } + + static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_GOTTPREL, Expr, Ctx); + } + + static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TLSDESC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TPREL_G1, Expr, Ctx); + } + + static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx); + } + + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this expression. + VariantKind getKind() const { return Kind; } + + /// getSubExpr - Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + /// @} + + void PrintImpl(raw_ostream &OS) const; + bool EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const; + void AddValueSymbols(MCAssembler *) const; + const MCSection *FindAssociatedSection() const { + return getSubExpr()->FindAssociatedSection(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + + static bool classof(const AArch64MCExpr *) { return true; } + +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp new file mode 100644 index 0000000..7960db0 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -0,0 +1,194 @@ +//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AArch64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "AArch64MCTargetDesc.h" +#include "AArch64ELFStreamer.h" +#include "AArch64MCAsmInfo.h" +#include "InstPrinter/AArch64InstPrinter.h" +#include "llvm/ADT/APInt.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" + +#define GET_REGINFO_MC_DESC +#include "AArch64GenRegisterInfo.inc" + +#define GET_INSTRINFO_MC_DESC +#include "AArch64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "AArch64GenSubtargetInfo.inc" + +using namespace llvm; + +MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT, + StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitAArch64MCSubtargetInfo(X, TT, CPU, ""); + return X; +} + + +static MCInstrInfo *createAArch64MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitAArch64MCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAArch64MCRegisterInfo(X, AArch64::X30); + return X; +} + +static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + + MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(); + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(AArch64::XSP, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) { + // On ELF platforms the default static relocation model has a smart enough + // linker to cope with referencing external symbols defined in a shared + // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. + RM = Reloc::Static; + } + + if (CM == CodeModel::Default) + CM = CodeModel::Small; + + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + + +static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new AArch64InstPrinter(MAI, MII, MRI, STI); + return 0; +} + +namespace { + +class AArch64MCInstrAnalysis : public MCInstrAnalysis { +public: + AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + + virtual bool isUnconditionalBranch(const MCInst &Inst) const { + if (Inst.getOpcode() == AArch64::Bcc + && Inst.getOperand(0).getImm() == A64CC::AL) + return true; + return MCInstrAnalysis::isUnconditionalBranch(Inst); + } + + virtual bool isConditionalBranch(const MCInst &Inst) const { + if (Inst.getOpcode() == AArch64::Bcc + && Inst.getOperand(0).getImm() == A64CC::AL) + return false; + return MCInstrAnalysis::isConditionalBranch(Inst); + } + + uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size) const { + unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0; + // FIXME: We only handle PCRel branches for now. + if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType + != MCOI::OPERAND_PCREL) + return -1ULL; + + int64_t Imm = Inst.getOperand(LblOperand).getImm(); + + return Addr + Imm; + } +}; + +} + +static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) { + return new AArch64MCInstrAnalysis(Info); +} + + + +extern "C" void LLVMInitializeAArch64TargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn A(TheAArch64Target, createAArch64MCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheAArch64Target, + createAArch64MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheAArch64Target, + createAArch64MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheAArch64Target, + createAArch64MCRegisterInfo); + + // Register the MC subtarget info. + using AArch64_MC::createAArch64MCSubtargetInfo; + TargetRegistry::RegisterMCSubtargetInfo(TheAArch64Target, + createAArch64MCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(TheAArch64Target, + createAArch64MCInstrAnalysis); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(TheAArch64Target, + createAArch64MCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheAArch64Target, + createAArch64AsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheAArch64Target, + createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheAArch64Target, + createAArch64MCInstPrinter); +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h new file mode 100644 index 0000000..3849fe3 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -0,0 +1,65 @@ +//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AArch64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64MCTARGETDESC_H +#define LLVM_AARCH64MCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class StringRef; +class Target; +class raw_ostream; + +extern Target TheAArch64Target; + +namespace AArch64_MC { + MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS); +} + +MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, + uint8_t OSABI); + +MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT, + StringRef CPU); + +} // End llvm namespace + +// Defines symbolic names for AArch64 registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "AArch64GenRegisterInfo.inc" + +// Defines symbolic names for the AArch64 instructions. +// +#define GET_INSTRINFO_ENUM +#include "AArch64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "AArch64GenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..44c66a2 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,13 @@ +add_llvm_library(LLVMAArch64Desc + AArch64AsmBackend.cpp + AArch64ELFObjectWriter.cpp + AArch64ELFStreamer.cpp + AArch64MCAsmInfo.cpp + AArch64MCCodeEmitter.cpp + AArch64MCExpr.cpp + AArch64MCTargetDesc.cpp + ) +add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..37c8035 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Desc +parent = AArch64 +required_libraries = AArch64AsmPrinter AArch64Info MC Support +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/MCTargetDesc/Makefile b/lib/Target/AArch64/MCTargetDesc/Makefile new file mode 100644 index 0000000..5779ac5 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Desc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile new file mode 100644 index 0000000..641bb83 --- /dev/null +++ b/lib/Target/AArch64/Makefile @@ -0,0 +1,30 @@ +##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMAArch64CodeGen +TARGET = AArch64 + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = AArch64GenAsmMatcher.inc \ + AArch64GenAsmWriter.inc \ + AArch64GenCallingConv.inc \ + AArch64GenDAGISel.inc \ + AArch64GenDisassemblerTables.inc \ + AArch64GenInstrInfo.inc \ + AArch64GenMCCodeEmitter.inc \ + AArch64GenMCPseudoLowering.inc \ + AArch64GenRegisterInfo.inc \ + AArch64GenSubtargetInfo.inc + +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils + +include $(LEVEL)/Makefile.common + + diff --git a/lib/Target/AArch64/README.txt b/lib/Target/AArch64/README.txt new file mode 100644 index 0000000..601990f --- /dev/null +++ b/lib/Target/AArch64/README.txt @@ -0,0 +1,2 @@ +This file will contain changes that need to be made before AArch64 can become an +officially supported target. Currently a placeholder. diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp new file mode 100644 index 0000000..b8099cb --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -0,0 +1,24 @@ +//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the key registration step for the architecture. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheAArch64Target; + +extern "C" void LLVMInitializeAArch64TargetInfo() { + RegisterTarget<Triple::aarch64> + X(TheAArch64Target, "aarch64", "AArch64"); +} diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..e236eed --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64Info + AArch64TargetInfo.cpp + ) + +add_dependencies(LLVMAArch64Info AArch64CommonTableGen) diff --git a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..5b003f0 --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Info +parent = AArch64 +required_libraries = MC Support Target +add_to_library_groups = AArch64 + diff --git a/lib/Target/AArch64/TargetInfo/Makefile b/lib/Target/AArch64/TargetInfo/Makefile new file mode 100644 index 0000000..9dc9aa4 --- /dev/null +++ b/lib/Target/AArch64/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Info + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp new file mode 100644 index 0000000..ab9bba1 --- /dev/null +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -0,0 +1,819 @@ +//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides basic encoding and assembly information for AArch64. +// +//===----------------------------------------------------------------------===// +#include "AArch64BaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Regex.h" + +using namespace llvm; + +StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const { + for (unsigned i = 0; i < NumPairs; ++i) { + if (Pairs[i].Value == Value) { + Valid = true; + return Pairs[i].Name; + } + } + + Valid = false; + return StringRef(); +} + +uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const { + std::string LowerCaseName = Name.lower(); + for (unsigned i = 0; i < NumPairs; ++i) { + if (Pairs[i].Name == LowerCaseName) { + Valid = true; + return Pairs[i].Value; + } + } + + Valid = false; + return -1; +} + +bool NamedImmMapper::validImm(uint32_t Value) const { + return Value < TooBigImm; +} + +const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = { + {"s1e1r", S1E1R}, + {"s1e2r", S1E2R}, + {"s1e3r", S1E3R}, + {"s1e1w", S1E1W}, + {"s1e2w", S1E2W}, + {"s1e3w", S1E3W}, + {"s1e0r", S1E0R}, + {"s1e0w", S1E0W}, + {"s12e1r", S12E1R}, + {"s12e1w", S12E1W}, + {"s12e0r", S12E0R}, + {"s12e0w", S12E0W}, +}; + +A64AT::ATMapper::ATMapper() + : NamedImmMapper(ATPairs, 0) {} + +const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = { + {"oshld", OSHLD}, + {"oshst", OSHST}, + {"osh", OSH}, + {"nshld", NSHLD}, + {"nshst", NSHST}, + {"nsh", NSH}, + {"ishld", ISHLD}, + {"ishst", ISHST}, + {"ish", ISH}, + {"ld", LD}, + {"st", ST}, + {"sy", SY} +}; + +A64DB::DBarrierMapper::DBarrierMapper() + : NamedImmMapper(DBarrierPairs, 16u) {} + +const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = { + {"zva", ZVA}, + {"ivac", IVAC}, + {"isw", ISW}, + {"cvac", CVAC}, + {"csw", CSW}, + {"cvau", CVAU}, + {"civac", CIVAC}, + {"cisw", CISW} +}; + +A64DC::DCMapper::DCMapper() + : NamedImmMapper(DCPairs, 0) {} + +const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = { + {"ialluis", IALLUIS}, + {"iallu", IALLU}, + {"ivau", IVAU} +}; + +A64IC::ICMapper::ICMapper() + : NamedImmMapper(ICPairs, 0) {} + +const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = { + {"sy", SY}, +}; + +A64ISB::ISBMapper::ISBMapper() + : NamedImmMapper(ISBPairs, 16) {} + +const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { + {"pldl1keep", PLDL1KEEP}, + {"pldl1strm", PLDL1STRM}, + {"pldl2keep", PLDL2KEEP}, + {"pldl2strm", PLDL2STRM}, + {"pldl3keep", PLDL3KEEP}, + {"pldl3strm", PLDL3STRM}, + {"plil1keep", PLIL1KEEP}, + {"plil1strm", PLIL1STRM}, + {"plil2keep", PLIL2KEEP}, + {"plil2strm", PLIL2STRM}, + {"plil3keep", PLIL3KEEP}, + {"plil3strm", PLIL3STRM}, + {"pstl1keep", PSTL1KEEP}, + {"pstl1strm", PSTL1STRM}, + {"pstl2keep", PSTL2KEEP}, + {"pstl2strm", PSTL2STRM}, + {"pstl3keep", PSTL3KEEP}, + {"pstl3strm", PSTL3STRM} +}; + +A64PRFM::PRFMMapper::PRFMMapper() + : NamedImmMapper(PRFMPairs, 32) {} + +const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = { + {"spsel", SPSel}, + {"daifset", DAIFSet}, + {"daifclr", DAIFClr} +}; + +A64PState::PStateMapper::PStateMapper() + : NamedImmMapper(PStatePairs, 0) {} + +const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { + {"mdccsr_el0", MDCCSR_EL0}, + {"dbgdtrrx_el0", DBGDTRRX_EL0}, + {"mdrar_el1", MDRAR_EL1}, + {"oslsr_el1", OSLSR_EL1}, + {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, + {"pmceid0_el0", PMCEID0_EL0}, + {"pmceid1_el0", PMCEID1_EL0}, + {"midr_el1", MIDR_EL1}, + {"ccsidr_el1", CCSIDR_EL1}, + {"clidr_el1", CLIDR_EL1}, + {"ctr_el0", CTR_EL0}, + {"mpidr_el1", MPIDR_EL1}, + {"revidr_el1", REVIDR_EL1}, + {"aidr_el1", AIDR_EL1}, + {"dczid_el0", DCZID_EL0}, + {"id_pfr0_el1", ID_PFR0_EL1}, + {"id_pfr1_el1", ID_PFR1_EL1}, + {"id_dfr0_el1", ID_DFR0_EL1}, + {"id_afr0_el1", ID_AFR0_EL1}, + {"id_mmfr0_el1", ID_MMFR0_EL1}, + {"id_mmfr1_el1", ID_MMFR1_EL1}, + {"id_mmfr2_el1", ID_MMFR2_EL1}, + {"id_mmfr3_el1", ID_MMFR3_EL1}, + {"id_isar0_el1", ID_ISAR0_EL1}, + {"id_isar1_el1", ID_ISAR1_EL1}, + {"id_isar2_el1", ID_ISAR2_EL1}, + {"id_isar3_el1", ID_ISAR3_EL1}, + {"id_isar4_el1", ID_ISAR4_EL1}, + {"id_isar5_el1", ID_ISAR5_EL1}, + {"id_aa64pfr0_el1", ID_AA64PFR0_EL1}, + {"id_aa64pfr1_el1", ID_AA64PFR1_EL1}, + {"id_aa64dfr0_el1", ID_AA64DFR0_EL1}, + {"id_aa64dfr1_el1", ID_AA64DFR1_EL1}, + {"id_aa64afr0_el1", ID_AA64AFR0_EL1}, + {"id_aa64afr1_el1", ID_AA64AFR1_EL1}, + {"id_aa64isar0_el1", ID_AA64ISAR0_EL1}, + {"id_aa64isar1_el1", ID_AA64ISAR1_EL1}, + {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1}, + {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1}, + {"mvfr0_el1", MVFR0_EL1}, + {"mvfr1_el1", MVFR1_EL1}, + {"mvfr2_el1", MVFR2_EL1}, + {"rvbar_el1", RVBAR_EL1}, + {"rvbar_el2", RVBAR_EL2}, + {"rvbar_el3", RVBAR_EL3}, + {"isr_el1", ISR_EL1}, + {"cntpct_el0", CNTPCT_EL0}, + {"cntvct_el0", CNTVCT_EL0} +}; + +A64SysReg::MRSMapper::MRSMapper() { + InstPairs = &MRSPairs[0]; + NumInstPairs = llvm::array_lengthof(MRSPairs); +} + +const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { + {"dbgdtrtx_el0", DBGDTRTX_EL0}, + {"oslar_el1", OSLAR_EL1}, + {"pmswinc_el0", PMSWINC_EL0} +}; + +A64SysReg::MSRMapper::MSRMapper() { + InstPairs = &MSRPairs[0]; + NumInstPairs = llvm::array_lengthof(MSRPairs); +} + + +const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { + {"osdtrrx_el1", OSDTRRX_EL1}, + {"osdtrtx_el1", OSDTRTX_EL1}, + {"teecr32_el1", TEECR32_EL1}, + {"mdccint_el1", MDCCINT_EL1}, + {"mdscr_el1", MDSCR_EL1}, + {"dbgdtr_el0", DBGDTR_EL0}, + {"oseccr_el1", OSECCR_EL1}, + {"dbgvcr32_el2", DBGVCR32_EL2}, + {"dbgbvr0_el1", DBGBVR0_EL1}, + {"dbgbvr1_el1", DBGBVR1_EL1}, + {"dbgbvr2_el1", DBGBVR2_EL1}, + {"dbgbvr3_el1", DBGBVR3_EL1}, + {"dbgbvr4_el1", DBGBVR4_EL1}, + {"dbgbvr5_el1", DBGBVR5_EL1}, + {"dbgbvr6_el1", DBGBVR6_EL1}, + {"dbgbvr7_el1", DBGBVR7_EL1}, + {"dbgbvr8_el1", DBGBVR8_EL1}, + {"dbgbvr9_el1", DBGBVR9_EL1}, + {"dbgbvr10_el1", DBGBVR10_EL1}, + {"dbgbvr11_el1", DBGBVR11_EL1}, + {"dbgbvr12_el1", DBGBVR12_EL1}, + {"dbgbvr13_el1", DBGBVR13_EL1}, + {"dbgbvr14_el1", DBGBVR14_EL1}, + {"dbgbvr15_el1", DBGBVR15_EL1}, + {"dbgbcr0_el1", DBGBCR0_EL1}, + {"dbgbcr1_el1", DBGBCR1_EL1}, + {"dbgbcr2_el1", DBGBCR2_EL1}, + {"dbgbcr3_el1", DBGBCR3_EL1}, + {"dbgbcr4_el1", DBGBCR4_EL1}, + {"dbgbcr5_el1", DBGBCR5_EL1}, + {"dbgbcr6_el1", DBGBCR6_EL1}, + {"dbgbcr7_el1", DBGBCR7_EL1}, + {"dbgbcr8_el1", DBGBCR8_EL1}, + {"dbgbcr9_el1", DBGBCR9_EL1}, + {"dbgbcr10_el1", DBGBCR10_EL1}, + {"dbgbcr11_el1", DBGBCR11_EL1}, + {"dbgbcr12_el1", DBGBCR12_EL1}, + {"dbgbcr13_el1", DBGBCR13_EL1}, + {"dbgbcr14_el1", DBGBCR14_EL1}, + {"dbgbcr15_el1", DBGBCR15_EL1}, + {"dbgwvr0_el1", DBGWVR0_EL1}, + {"dbgwvr1_el1", DBGWVR1_EL1}, + {"dbgwvr2_el1", DBGWVR2_EL1}, + {"dbgwvr3_el1", DBGWVR3_EL1}, + {"dbgwvr4_el1", DBGWVR4_EL1}, + {"dbgwvr5_el1", DBGWVR5_EL1}, + {"dbgwvr6_el1", DBGWVR6_EL1}, + {"dbgwvr7_el1", DBGWVR7_EL1}, + {"dbgwvr8_el1", DBGWVR8_EL1}, + {"dbgwvr9_el1", DBGWVR9_EL1}, + {"dbgwvr10_el1", DBGWVR10_EL1}, + {"dbgwvr11_el1", DBGWVR11_EL1}, + {"dbgwvr12_el1", DBGWVR12_EL1}, + {"dbgwvr13_el1", DBGWVR13_EL1}, + {"dbgwvr14_el1", DBGWVR14_EL1}, + {"dbgwvr15_el1", DBGWVR15_EL1}, + {"dbgwcr0_el1", DBGWCR0_EL1}, + {"dbgwcr1_el1", DBGWCR1_EL1}, + {"dbgwcr2_el1", DBGWCR2_EL1}, + {"dbgwcr3_el1", DBGWCR3_EL1}, + {"dbgwcr4_el1", DBGWCR4_EL1}, + {"dbgwcr5_el1", DBGWCR5_EL1}, + {"dbgwcr6_el1", DBGWCR6_EL1}, + {"dbgwcr7_el1", DBGWCR7_EL1}, + {"dbgwcr8_el1", DBGWCR8_EL1}, + {"dbgwcr9_el1", DBGWCR9_EL1}, + {"dbgwcr10_el1", DBGWCR10_EL1}, + {"dbgwcr11_el1", DBGWCR11_EL1}, + {"dbgwcr12_el1", DBGWCR12_EL1}, + {"dbgwcr13_el1", DBGWCR13_EL1}, + {"dbgwcr14_el1", DBGWCR14_EL1}, + {"dbgwcr15_el1", DBGWCR15_EL1}, + {"teehbr32_el1", TEEHBR32_EL1}, + {"osdlr_el1", OSDLR_EL1}, + {"dbgprcr_el1", DBGPRCR_EL1}, + {"dbgclaimset_el1", DBGCLAIMSET_EL1}, + {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, + {"csselr_el1", CSSELR_EL1}, + {"vpidr_el2", VPIDR_EL2}, + {"vmpidr_el2", VMPIDR_EL2}, + {"sctlr_el1", SCTLR_EL1}, + {"sctlr_el2", SCTLR_EL2}, + {"sctlr_el3", SCTLR_EL3}, + {"actlr_el1", ACTLR_EL1}, + {"actlr_el2", ACTLR_EL2}, + {"actlr_el3", ACTLR_EL3}, + {"cpacr_el1", CPACR_EL1}, + {"hcr_el2", HCR_EL2}, + {"scr_el3", SCR_EL3}, + {"mdcr_el2", MDCR_EL2}, + {"sder32_el3", SDER32_EL3}, + {"cptr_el2", CPTR_EL2}, + {"cptr_el3", CPTR_EL3}, + {"hstr_el2", HSTR_EL2}, + {"hacr_el2", HACR_EL2}, + {"mdcr_el3", MDCR_EL3}, + {"ttbr0_el1", TTBR0_EL1}, + {"ttbr0_el2", TTBR0_EL2}, + {"ttbr0_el3", TTBR0_EL3}, + {"ttbr1_el1", TTBR1_EL1}, + {"tcr_el1", TCR_EL1}, + {"tcr_el2", TCR_EL2}, + {"tcr_el3", TCR_EL3}, + {"vttbr_el2", VTTBR_EL2}, + {"vtcr_el2", VTCR_EL2}, + {"dacr32_el2", DACR32_EL2}, + {"spsr_el1", SPSR_EL1}, + {"spsr_el2", SPSR_EL2}, + {"spsr_el3", SPSR_EL3}, + {"elr_el1", ELR_EL1}, + {"elr_el2", ELR_EL2}, + {"elr_el3", ELR_EL3}, + {"sp_el0", SP_EL0}, + {"sp_el1", SP_EL1}, + {"sp_el2", SP_EL2}, + {"spsel", SPSel}, + {"nzcv", NZCV}, + {"daif", DAIF}, + {"currentel", CurrentEL}, + {"spsr_irq", SPSR_irq}, + {"spsr_abt", SPSR_abt}, + {"spsr_und", SPSR_und}, + {"spsr_fiq", SPSR_fiq}, + {"fpcr", FPCR}, + {"fpsr", FPSR}, + {"dspsr_el0", DSPSR_EL0}, + {"dlr_el0", DLR_EL0}, + {"ifsr32_el2", IFSR32_EL2}, + {"afsr0_el1", AFSR0_EL1}, + {"afsr0_el2", AFSR0_EL2}, + {"afsr0_el3", AFSR0_EL3}, + {"afsr1_el1", AFSR1_EL1}, + {"afsr1_el2", AFSR1_EL2}, + {"afsr1_el3", AFSR1_EL3}, + {"esr_el1", ESR_EL1}, + {"esr_el2", ESR_EL2}, + {"esr_el3", ESR_EL3}, + {"fpexc32_el2", FPEXC32_EL2}, + {"far_el1", FAR_EL1}, + {"far_el2", FAR_EL2}, + {"far_el3", FAR_EL3}, + {"hpfar_el2", HPFAR_EL2}, + {"par_el1", PAR_EL1}, + {"pmcr_el0", PMCR_EL0}, + {"pmcntenset_el0", PMCNTENSET_EL0}, + {"pmcntenclr_el0", PMCNTENCLR_EL0}, + {"pmovsclr_el0", PMOVSCLR_EL0}, + {"pmselr_el0", PMSELR_EL0}, + {"pmccntr_el0", PMCCNTR_EL0}, + {"pmxevtyper_el0", PMXEVTYPER_EL0}, + {"pmxevcntr_el0", PMXEVCNTR_EL0}, + {"pmuserenr_el0", PMUSERENR_EL0}, + {"pmintenset_el1", PMINTENSET_EL1}, + {"pmintenclr_el1", PMINTENCLR_EL1}, + {"pmovsset_el0", PMOVSSET_EL0}, + {"mair_el1", MAIR_EL1}, + {"mair_el2", MAIR_EL2}, + {"mair_el3", MAIR_EL3}, + {"amair_el1", AMAIR_EL1}, + {"amair_el2", AMAIR_EL2}, + {"amair_el3", AMAIR_EL3}, + {"vbar_el1", VBAR_EL1}, + {"vbar_el2", VBAR_EL2}, + {"vbar_el3", VBAR_EL3}, + {"rmr_el1", RMR_EL1}, + {"rmr_el2", RMR_EL2}, + {"rmr_el3", RMR_EL3}, + {"contextidr_el1", CONTEXTIDR_EL1}, + {"tpidr_el0", TPIDR_EL0}, + {"tpidr_el2", TPIDR_EL2}, + {"tpidr_el3", TPIDR_EL3}, + {"tpidrro_el0", TPIDRRO_EL0}, + {"tpidr_el1", TPIDR_EL1}, + {"cntfrq_el0", CNTFRQ_EL0}, + {"cntvoff_el2", CNTVOFF_EL2}, + {"cntkctl_el1", CNTKCTL_EL1}, + {"cnthctl_el2", CNTHCTL_EL2}, + {"cntp_tval_el0", CNTP_TVAL_EL0}, + {"cnthp_tval_el2", CNTHP_TVAL_EL2}, + {"cntps_tval_el1", CNTPS_TVAL_EL1}, + {"cntp_ctl_el0", CNTP_CTL_EL0}, + {"cnthp_ctl_el2", CNTHP_CTL_EL2}, + {"cntps_ctl_el1", CNTPS_CTL_EL1}, + {"cntp_cval_el0", CNTP_CVAL_EL0}, + {"cnthp_cval_el2", CNTHP_CVAL_EL2}, + {"cntps_cval_el1", CNTPS_CVAL_EL1}, + {"cntv_tval_el0", CNTV_TVAL_EL0}, + {"cntv_ctl_el0", CNTV_CTL_EL0}, + {"cntv_cval_el0", CNTV_CVAL_EL0}, + {"pmevcntr0_el0", PMEVCNTR0_EL0}, + {"pmevcntr1_el0", PMEVCNTR1_EL0}, + {"pmevcntr2_el0", PMEVCNTR2_EL0}, + {"pmevcntr3_el0", PMEVCNTR3_EL0}, + {"pmevcntr4_el0", PMEVCNTR4_EL0}, + {"pmevcntr5_el0", PMEVCNTR5_EL0}, + {"pmevcntr6_el0", PMEVCNTR6_EL0}, + {"pmevcntr7_el0", PMEVCNTR7_EL0}, + {"pmevcntr8_el0", PMEVCNTR8_EL0}, + {"pmevcntr9_el0", PMEVCNTR9_EL0}, + {"pmevcntr10_el0", PMEVCNTR10_EL0}, + {"pmevcntr11_el0", PMEVCNTR11_EL0}, + {"pmevcntr12_el0", PMEVCNTR12_EL0}, + {"pmevcntr13_el0", PMEVCNTR13_EL0}, + {"pmevcntr14_el0", PMEVCNTR14_EL0}, + {"pmevcntr15_el0", PMEVCNTR15_EL0}, + {"pmevcntr16_el0", PMEVCNTR16_EL0}, + {"pmevcntr17_el0", PMEVCNTR17_EL0}, + {"pmevcntr18_el0", PMEVCNTR18_EL0}, + {"pmevcntr19_el0", PMEVCNTR19_EL0}, + {"pmevcntr20_el0", PMEVCNTR20_EL0}, + {"pmevcntr21_el0", PMEVCNTR21_EL0}, + {"pmevcntr22_el0", PMEVCNTR22_EL0}, + {"pmevcntr23_el0", PMEVCNTR23_EL0}, + {"pmevcntr24_el0", PMEVCNTR24_EL0}, + {"pmevcntr25_el0", PMEVCNTR25_EL0}, + {"pmevcntr26_el0", PMEVCNTR26_EL0}, + {"pmevcntr27_el0", PMEVCNTR27_EL0}, + {"pmevcntr28_el0", PMEVCNTR28_EL0}, + {"pmevcntr29_el0", PMEVCNTR29_EL0}, + {"pmevcntr30_el0", PMEVCNTR30_EL0}, + {"pmccfiltr_el0", PMCCFILTR_EL0}, + {"pmevtyper0_el0", PMEVTYPER0_EL0}, + {"pmevtyper1_el0", PMEVTYPER1_EL0}, + {"pmevtyper2_el0", PMEVTYPER2_EL0}, + {"pmevtyper3_el0", PMEVTYPER3_EL0}, + {"pmevtyper4_el0", PMEVTYPER4_EL0}, + {"pmevtyper5_el0", PMEVTYPER5_EL0}, + {"pmevtyper6_el0", PMEVTYPER6_EL0}, + {"pmevtyper7_el0", PMEVTYPER7_EL0}, + {"pmevtyper8_el0", PMEVTYPER8_EL0}, + {"pmevtyper9_el0", PMEVTYPER9_EL0}, + {"pmevtyper10_el0", PMEVTYPER10_EL0}, + {"pmevtyper11_el0", PMEVTYPER11_EL0}, + {"pmevtyper12_el0", PMEVTYPER12_EL0}, + {"pmevtyper13_el0", PMEVTYPER13_EL0}, + {"pmevtyper14_el0", PMEVTYPER14_EL0}, + {"pmevtyper15_el0", PMEVTYPER15_EL0}, + {"pmevtyper16_el0", PMEVTYPER16_EL0}, + {"pmevtyper17_el0", PMEVTYPER17_EL0}, + {"pmevtyper18_el0", PMEVTYPER18_EL0}, + {"pmevtyper19_el0", PMEVTYPER19_EL0}, + {"pmevtyper20_el0", PMEVTYPER20_EL0}, + {"pmevtyper21_el0", PMEVTYPER21_EL0}, + {"pmevtyper22_el0", PMEVTYPER22_EL0}, + {"pmevtyper23_el0", PMEVTYPER23_EL0}, + {"pmevtyper24_el0", PMEVTYPER24_EL0}, + {"pmevtyper25_el0", PMEVTYPER25_EL0}, + {"pmevtyper26_el0", PMEVTYPER26_EL0}, + {"pmevtyper27_el0", PMEVTYPER27_EL0}, + {"pmevtyper28_el0", PMEVTYPER28_EL0}, + {"pmevtyper29_el0", PMEVTYPER29_EL0}, + {"pmevtyper30_el0", PMEVTYPER30_EL0}, +}; + +uint32_t +A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { + // First search the registers shared by all + std::string NameLower = Name.lower(); + for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { + if (SysRegPairs[i].Name == NameLower) { + Valid = true; + return SysRegPairs[i].Value; + } + } + + // Now try the instruction-specific registers (either read-only or + // write-only). + for (unsigned i = 0; i < NumInstPairs; ++i) { + if (InstPairs[i].Name == NameLower) { + Valid = true; + return InstPairs[i].Value; + } + } + + // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits + // are: 11 xxx 1x11 xxxx xxx + Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$"); + + SmallVector<StringRef, 4> Ops; + if (!GenericRegPattern.match(NameLower, &Ops)) { + Valid = false; + return -1; + } + + uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0; + uint32_t Bits; + Ops[1].getAsInteger(10, Op1); + Ops[2].getAsInteger(10, CRn); + Ops[3].getAsInteger(10, CRm); + Ops[4].getAsInteger(10, Op2); + Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2; + + Valid = true; + return Bits; +} + +std::string +A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { + for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { + if (SysRegPairs[i].Value == Bits) { + Valid = true; + return SysRegPairs[i].Name; + } + } + + for (unsigned i = 0; i < NumInstPairs; ++i) { + if (InstPairs[i].Value == Bits) { + Valid = true; + return InstPairs[i].Name; + } + } + + uint32_t Op0 = (Bits >> 14) & 0x3; + uint32_t Op1 = (Bits >> 11) & 0x7; + uint32_t CRn = (Bits >> 7) & 0xf; + uint32_t CRm = (Bits >> 3) & 0xf; + uint32_t Op2 = Bits & 0x7; + + // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic + // name. + if (Op0 != 3 || (CRn != 11 && CRn != 15)) { + Valid = false; + return ""; + } + + assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg"); + + Valid = true; + return "s3_" + utostr(Op1) + "_c" + utostr(CRn) + + "_c" + utostr(CRm) + "_" + utostr(Op2); +} + +const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = { + {"ipas2e1is", IPAS2E1IS}, + {"ipas2le1is", IPAS2LE1IS}, + {"vmalle1is", VMALLE1IS}, + {"alle2is", ALLE2IS}, + {"alle3is", ALLE3IS}, + {"vae1is", VAE1IS}, + {"vae2is", VAE2IS}, + {"vae3is", VAE3IS}, + {"aside1is", ASIDE1IS}, + {"vaae1is", VAAE1IS}, + {"alle1is", ALLE1IS}, + {"vale1is", VALE1IS}, + {"vale2is", VALE2IS}, + {"vale3is", VALE3IS}, + {"vmalls12e1is", VMALLS12E1IS}, + {"vaale1is", VAALE1IS}, + {"ipas2e1", IPAS2E1}, + {"ipas2le1", IPAS2LE1}, + {"vmalle1", VMALLE1}, + {"alle2", ALLE2}, + {"alle3", ALLE3}, + {"vae1", VAE1}, + {"vae2", VAE2}, + {"vae3", VAE3}, + {"aside1", ASIDE1}, + {"vaae1", VAAE1}, + {"alle1", ALLE1}, + {"vale1", VALE1}, + {"vale2", VALE2}, + {"vale3", VALE3}, + {"vmalls12e1", VMALLS12E1}, + {"vaale1", VAALE1} +}; + +A64TLBI::TLBIMapper::TLBIMapper() + : NamedImmMapper(TLBIPairs, 0) {} + +bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) { + const fltSemantics &Sem = Val.getSemantics(); + unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1; + + uint32_t ExpMask; + switch (FracBits) { + case 10: // IEEE half-precision + ExpMask = 0x1f; + break; + case 23: // IEEE single-precision + ExpMask = 0xff; + break; + case 52: // IEEE double-precision + ExpMask = 0x7ff; + break; + case 112: // IEEE quad-precision + // No immediates are valid for double precision. + return false; + default: + llvm_unreachable("Only half, single and double precision supported"); + } + + uint32_t ExpStart = FracBits; + uint64_t FracMask = (1ULL << FracBits) - 1; + + uint32_t Sign = Val.isNegative(); + + uint64_t Bits= Val.bitcastToAPInt().getLimitedValue(); + uint64_t Fraction = Bits & FracMask; + int32_t Exponent = ((Bits >> ExpStart) & ExpMask); + Exponent -= ExpMask >> 1; + + // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19) + // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48) + // This translates to: only 4 bits of fraction; -3 <= exp <= 4. + uint64_t A64FracStart = FracBits - 4; + uint64_t A64FracMask = 0xf; + + // Are there too many fraction bits? + if (Fraction & ~(A64FracMask << A64FracStart)) + return false; + + if (Exponent < -3 || Exponent > 4) + return false; + + uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask; + uint32_t PackedExp = (Exponent + 7) & 0x7; + + Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction; + return true; +} + +// Encoding of the immediate for logical (immediate) instructions: +// +// | N | imms | immr | size | R | S | +// |---+--------+--------+------+--------------+--------------| +// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) | +// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) | +// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) | +// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) | +// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) | +// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) | +// | 0 | 11111x | - | | UNALLOCATED | | +// +// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in +// which the lower S+1 bits are ones and the remaining bits are zero, then +// rotated right by R bits, which is then replicated across the datapath. +// +// + Values of 'N', 'imms' and 'immr' which do not match the above table are +// RESERVED. +// + If all 's' bits in the imms field are set then the instruction is +// RESERVED. +// + The 'x' bits in the 'immr' field are IGNORED. + +bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { + int RepeatWidth; + int Rotation = 0; + int Num1s = 0; + + // Because there are S+1 ones in the replicated mask, an immediate of all + // zeros is not allowed. Filtering it here is probably more efficient. + if (Imm == 0) return false; + + for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) { + uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1; + uint64_t ReplicatedMask = Imm & RepeatMask; + + if (ReplicatedMask == 0) continue; + + // First we have to make sure the mask is actually repeated in each slot for + // this width-specifier. + bool IsReplicatedMask = true; + for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) { + if (((Imm >> i) & RepeatMask) != ReplicatedMask) { + IsReplicatedMask = false; + break; + } + } + if (!IsReplicatedMask) continue; + + // Now we have to work out the amount of rotation needed. The first part of + // this calculation is actually independent of RepeatWidth, but the complex + // case will depend on it. + Rotation = CountTrailingZeros_64(Imm); + if (Rotation == 0) { + // There were no leading zeros, which means it's either in place or there + // are 1s at each end (e.g. 0x8003 needs rotating). + Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm) + : CountLeadingOnes_32(Imm); + Rotation = RepeatWidth - Rotation; + } + + uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation) + | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); + // Of course, they may not actually be ones, so we have to check that: + if (!isMask_64(ReplicatedOnes)) + continue; + + Num1s = CountTrailingOnes_64(ReplicatedOnes); + + // We know we've got an almost valid encoding (certainly, if this is invalid + // no other parameters would work). + break; + } + + // The encodings which would produce all 1s are RESERVED. + if (RepeatWidth == 1 || Num1s == RepeatWidth) return false; + + uint32_t N = RepeatWidth == 64; + uint32_t ImmR = RepeatWidth - Rotation; + uint32_t ImmS = Num1s - 1; + + switch (RepeatWidth) { + default: break; // No action required for other valid rotations. + case 16: ImmS |= 0x20; break; // 10ssss + case 8: ImmS |= 0x30; break; // 110sss + case 4: ImmS |= 0x38; break; // 1110ss + case 2: ImmS |= 0x3c; break; // 11110s + } + + Bits = ImmS | (ImmR << 6) | (N << 12); + + return true; +} + + +bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, + uint64_t &Imm) { + uint32_t N = Bits >> 12; + uint32_t ImmR = (Bits >> 6) & 0x3f; + uint32_t ImmS = Bits & 0x3f; + + // N=1 encodes a 64-bit replication and is invalid for the 32-bit + // instructions. + if (RegWidth == 32 && N != 0) return false; + + int Width = 0; + if (N == 1) + Width = 64; + else if ((ImmS & 0x20) == 0) + Width = 32; + else if ((ImmS & 0x10) == 0) + Width = 16; + else if ((ImmS & 0x08) == 0) + Width = 8; + else if ((ImmS & 0x04) == 0) + Width = 4; + else if ((ImmS & 0x02) == 0) + Width = 2; + else { + // ImmS is 0b11111x: UNALLOCATED + return false; + } + + int Num1s = (ImmS & (Width - 1)) + 1; + + // All encodings which would map to -1 (signed) are RESERVED. + if (Num1s == Width) return false; + + int Rotation = (ImmR & (Width - 1)); + uint64_t Mask = (1ULL << Num1s) - 1; + uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; + Mask = (Mask >> Rotation) + | ((Mask << (Width - Rotation)) & WidthMask); + + Imm = 0; + for (unsigned i = 0; i < RegWidth / Width; ++i) { + Imm |= Mask; + Mask <<= Width; + } + + return true; +} + +bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { + // If high bits are set then a 32-bit MOVZ can't possibly work. + if (RegWidth == 32 && (Value & ~0xffffffffULL)) + return false; + + for (int i = 0; i < RegWidth; i += 16) { + // If the value is 0 when we mask out all the bits that could be set with + // the current LSL value then it's representable. + if ((Value & ~(0xffffULL << i)) == 0) { + Shift = i / 16; + UImm16 = (Value >> i) & 0xffff; + return true; + } + } + return false; +} + +bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { + // MOVN is defined to set its register to NOT(LSL(imm16, shift)). + + // We have to be a little careful about a 32-bit register: 0xffff_1234 *is* + // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not + // a valid input for isMOVZImm. + if (RegWidth == 32 && (Value & ~0xffffffffULL)) + return false; + + uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value; + + return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift); +} + +bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value, + int &UImm16, int &Shift) { + if (isMOVZImm(RegWidth, Value, UImm16, Shift)) + return false; + + return isMOVNImm(RegWidth, Value, UImm16, Shift); +} diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h new file mode 100644 index 0000000..5eebf44 --- /dev/null +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -0,0 +1,784 @@ +//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the AArch64 target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AARCH64_BASEINFO_H +#define LLVM_AARCH64_BASEINFO_H + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +// // Enums corresponding to AArch64 condition codes +namespace A64CC { + // The CondCodes constants map directly to the 4-bit encoding of the + // condition field for predicated instructions. + enum CondCodes { // Meaning (integer) Meaning (floating-point) + EQ = 0, // Equal Equal + NE, // Not equal Not equal, or unordered + HS, // Unsigned higher or same >, ==, or unordered + LO, // Unsigned lower or same Less than + MI, // Minus, negative Less than + PL, // Plus, positive or zero >, ==, or unordered + VS, // Overflow Unordered + VC, // No overflow Ordered + HI, // Unsigned higher Greater than, or unordered + LS, // Unsigned lower or same Less than or equal + GE, // Greater than or equal Greater than or equal + LT, // Less than Less than, or unordered + GT, // Signed greater than Greater than + LE, // Signed less than or equal <, ==, or unordered + AL, // Always (unconditional) Always (unconditional) + NV, // Always (unconditional) Always (unconditional) + // Note the NV exists purely to disassemble 0b1111. Execution + // is "always". + Invalid + }; + +} // namespace A64CC + +inline static const char *A64CondCodeToString(A64CC::CondCodes CC) { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case A64CC::EQ: return "eq"; + case A64CC::NE: return "ne"; + case A64CC::HS: return "hs"; + case A64CC::LO: return "lo"; + case A64CC::MI: return "mi"; + case A64CC::PL: return "pl"; + case A64CC::VS: return "vs"; + case A64CC::VC: return "vc"; + case A64CC::HI: return "hi"; + case A64CC::LS: return "ls"; + case A64CC::GE: return "ge"; + case A64CC::LT: return "lt"; + case A64CC::GT: return "gt"; + case A64CC::LE: return "le"; + case A64CC::AL: return "al"; + case A64CC::NV: return "nv"; + } +} + +inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) { + return StringSwitch<A64CC::CondCodes>(CondStr.lower()) + .Case("eq", A64CC::EQ) + .Case("ne", A64CC::NE) + .Case("ne", A64CC::NE) + .Case("hs", A64CC::HS) + .Case("cs", A64CC::HS) + .Case("lo", A64CC::LO) + .Case("cc", A64CC::LO) + .Case("mi", A64CC::MI) + .Case("pl", A64CC::PL) + .Case("vs", A64CC::VS) + .Case("vc", A64CC::VC) + .Case("hi", A64CC::HI) + .Case("ls", A64CC::LS) + .Case("ge", A64CC::GE) + .Case("lt", A64CC::LT) + .Case("gt", A64CC::GT) + .Case("le", A64CC::LE) + .Case("al", A64CC::AL) + .Case("nv", A64CC::NV) + .Default(A64CC::Invalid); +} + +inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) { + // It turns out that the condition codes have been designed so that in order + // to reverse the intent of the condition you only have to invert the low bit: + + return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1); +} + +/// Instances of this class can perform bidirectional mapping from random +/// identifier strings to operand encodings. For example "MSR" takes a named +/// system-register which must be encoded somehow and decoded for printing. This +/// central location means that the information for those transformations is not +/// duplicated and remains in sync. +/// +/// FIXME: currently the algorithm is a completely unoptimised linear +/// search. Obviously this could be improved, but we would probably want to work +/// out just how often these instructions are emitted before working on it. It +/// might even be optimal to just reorder the tables for the common instructions +/// rather than changing the algorithm. +struct NamedImmMapper { + struct Mapping { + const char *Name; + uint32_t Value; + }; + + template<int N> + NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) + : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} + + StringRef toString(uint32_t Value, bool &Valid) const; + uint32_t fromString(StringRef Name, bool &Valid) const; + + /// Many of the instructions allow an alternative assembly form consisting of + /// a simple immediate. Currently the only valid forms are ranges [0, N) where + /// N being 0 indicates no immediate syntax-form is allowed. + bool validImm(uint32_t Value) const; +protected: + const Mapping *Pairs; + size_t NumPairs; + uint32_t TooBigImm; +}; + +namespace A64AT { + enum ATValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + S1E1R = 0x43c0, // 01 000 0111 1000 000 + S1E2R = 0x63c0, // 01 100 0111 1000 000 + S1E3R = 0x73c0, // 01 110 0111 1000 000 + S1E1W = 0x43c1, // 01 000 0111 1000 001 + S1E2W = 0x63c1, // 01 100 0111 1000 001 + S1E3W = 0x73c1, // 01 110 0111 1000 001 + S1E0R = 0x43c2, // 01 000 0111 1000 010 + S1E0W = 0x43c3, // 01 000 0111 1000 011 + S12E1R = 0x63c4, // 01 100 0111 1000 100 + S12E1W = 0x63c5, // 01 100 0111 1000 101 + S12E0R = 0x63c6, // 01 100 0111 1000 110 + S12E0W = 0x63c7 // 01 100 0111 1000 111 + }; + + struct ATMapper : NamedImmMapper { + const static Mapping ATPairs[]; + + ATMapper(); + }; + +} +namespace A64DB { + enum DBValues { + Invalid = -1, + OSHLD = 0x1, + OSHST = 0x2, + OSH = 0x3, + NSHLD = 0x5, + NSHST = 0x6, + NSH = 0x7, + ISHLD = 0x9, + ISHST = 0xa, + ISH = 0xb, + LD = 0xd, + ST = 0xe, + SY = 0xf + }; + + struct DBarrierMapper : NamedImmMapper { + const static Mapping DBarrierPairs[]; + + DBarrierMapper(); + }; +} + +namespace A64DC { + enum DCValues { + Invalid = -1, // Op1 CRn CRm Op2 + ZVA = 0x5ba1, // 01 011 0111 0100 001 + IVAC = 0x43b1, // 01 000 0111 0110 001 + ISW = 0x43b2, // 01 000 0111 0110 010 + CVAC = 0x5bd1, // 01 011 0111 1010 001 + CSW = 0x43d2, // 01 000 0111 1010 010 + CVAU = 0x5bd9, // 01 011 0111 1011 001 + CIVAC = 0x5bf1, // 01 011 0111 1110 001 + CISW = 0x43f2 // 01 000 0111 1110 010 + }; + + struct DCMapper : NamedImmMapper { + const static Mapping DCPairs[]; + + DCMapper(); + }; + +} + +namespace A64IC { + enum ICValues { + Invalid = -1, // Op1 CRn CRm Op2 + IALLUIS = 0x0388, // 000 0111 0001 000 + IALLU = 0x03a8, // 000 0111 0101 000 + IVAU = 0x1ba9 // 011 0111 0101 001 + }; + + + struct ICMapper : NamedImmMapper { + const static Mapping ICPairs[]; + + ICMapper(); + }; + + static inline bool NeedsRegister(ICValues Val) { + return Val == IVAU; + } +} + +namespace A64ISB { + enum ISBValues { + Invalid = -1, + SY = 0xf + }; + struct ISBMapper : NamedImmMapper { + const static Mapping ISBPairs[]; + + ISBMapper(); + }; +} + +namespace A64PRFM { + enum PRFMValues { + Invalid = -1, + PLDL1KEEP = 0x00, + PLDL1STRM = 0x01, + PLDL2KEEP = 0x02, + PLDL2STRM = 0x03, + PLDL3KEEP = 0x04, + PLDL3STRM = 0x05, + PLIL1KEEP = 0x08, + PLIL1STRM = 0x09, + PLIL2KEEP = 0x0a, + PLIL2STRM = 0x0b, + PLIL3KEEP = 0x0c, + PLIL3STRM = 0x0d, + PSTL1KEEP = 0x10, + PSTL1STRM = 0x11, + PSTL2KEEP = 0x12, + PSTL2STRM = 0x13, + PSTL3KEEP = 0x14, + PSTL3STRM = 0x15 + }; + + struct PRFMMapper : NamedImmMapper { + const static Mapping PRFMPairs[]; + + PRFMMapper(); + }; +} + +namespace A64PState { + enum PStateValues { + Invalid = -1, + SPSel = 0x05, + DAIFSet = 0x1e, + DAIFClr = 0x1f + }; + + struct PStateMapper : NamedImmMapper { + const static Mapping PStatePairs[]; + + PStateMapper(); + }; + +} + +namespace A64SE { + enum ShiftExtSpecifiers { + Invalid = -1, + LSL, + LSR, + ASR, + ROR, + + UXTB, + UXTH, + UXTW, + UXTX, + + SXTB, + SXTH, + SXTW, + SXTX + }; +} + +namespace A64SysReg { + enum SysRegROValues { + MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 + DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000 + MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000 + OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100 + DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110 + PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110 + PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111 + MIDR_EL1 = 0xc000, // 11 000 0000 0000 000 + CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000 + CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001 + CTR_EL0 = 0xd801, // 11 011 0000 0000 001 + MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101 + REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110 + AIDR_EL1 = 0xc807, // 11 001 0000 0000 111 + DCZID_EL0 = 0xd807, // 11 011 0000 0000 111 + ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000 + ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001 + ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010 + ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011 + ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100 + ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101 + ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110 + ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111 + ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000 + ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001 + ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010 + ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011 + ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100 + ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101 + ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 + ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 + ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 + ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 + ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 + ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 + ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 + ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 + ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 + ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 + MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 + MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 + MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 + RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001 + RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001 + RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001 + ISR_EL1 = 0xc608, // 11 000 1100 0001 000 + CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 + CNTVCT_EL0 = 0xdf02 // 11 011 1110 0000 010 + }; + + enum SysRegWOValues { + DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000 + OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100 + PMSWINC_EL0 = 0xdce4 // 11 011 1001 1100 100 + }; + + enum SysRegValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010 + OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010 + TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000 + MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000 + MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010 + DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000 + OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010 + DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000 + DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100 + DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100 + DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100 + DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100 + DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100 + DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100 + DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100 + DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100 + DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100 + DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100 + DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100 + DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100 + DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100 + DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100 + DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100 + DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100 + DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101 + DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101 + DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101 + DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101 + DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101 + DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101 + DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101 + DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101 + DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101 + DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101 + DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101 + DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101 + DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101 + DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101 + DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101 + DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101 + DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110 + DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110 + DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110 + DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110 + DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110 + DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110 + DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110 + DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110 + DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110 + DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110 + DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110 + DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110 + DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110 + DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110 + DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110 + DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110 + DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111 + DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111 + DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111 + DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111 + DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111 + DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111 + DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111 + DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111 + DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111 + DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111 + DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111 + DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111 + DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111 + DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111 + DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111 + DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111 + TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000 + OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100 + DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100 + DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110 + DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110 + CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000 + VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000 + VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101 + CPACR_EL1 = 0xc082, // 11 000 0001 0000 010 + SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000 + SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000 + SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000 + ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001 + ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001 + ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001 + HCR_EL2 = 0xe088, // 11 100 0001 0001 000 + SCR_EL3 = 0xf088, // 11 110 0001 0001 000 + MDCR_EL2 = 0xe089, // 11 100 0001 0001 001 + SDER32_EL3 = 0xf089, // 11 110 0001 0001 001 + CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010 + CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010 + HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011 + HACR_EL2 = 0xe08f, // 11 100 0001 0001 111 + MDCR_EL3 = 0xf099, // 11 110 0001 0011 001 + TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000 + TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000 + TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000 + TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001 + TCR_EL1 = 0xc102, // 11 000 0010 0000 010 + TCR_EL2 = 0xe102, // 11 100 0010 0000 010 + TCR_EL3 = 0xf102, // 11 110 0010 0000 010 + VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000 + VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010 + DACR32_EL2 = 0xe180, // 11 100 0011 0000 000 + SPSR_EL1 = 0xc200, // 11 000 0100 0000 000 + SPSR_EL2 = 0xe200, // 11 100 0100 0000 000 + SPSR_EL3 = 0xf200, // 11 110 0100 0000 000 + ELR_EL1 = 0xc201, // 11 000 0100 0000 001 + ELR_EL2 = 0xe201, // 11 100 0100 0000 001 + ELR_EL3 = 0xf201, // 11 110 0100 0000 001 + SP_EL0 = 0xc208, // 11 000 0100 0001 000 + SP_EL1 = 0xe208, // 11 100 0100 0001 000 + SP_EL2 = 0xf208, // 11 110 0100 0001 000 + SPSel = 0xc210, // 11 000 0100 0010 000 + NZCV = 0xda10, // 11 011 0100 0010 000 + DAIF = 0xda11, // 11 011 0100 0010 001 + CurrentEL = 0xc212, // 11 000 0100 0010 010 + SPSR_irq = 0xe218, // 11 100 0100 0011 000 + SPSR_abt = 0xe219, // 11 100 0100 0011 001 + SPSR_und = 0xe21a, // 11 100 0100 0011 010 + SPSR_fiq = 0xe21b, // 11 100 0100 0011 011 + FPCR = 0xda20, // 11 011 0100 0100 000 + FPSR = 0xda21, // 11 011 0100 0100 001 + DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000 + DLR_EL0 = 0xda29, // 11 011 0100 0101 001 + IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001 + AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000 + AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000 + AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000 + AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001 + AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001 + AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001 + ESR_EL1 = 0xc290, // 11 000 0101 0010 000 + ESR_EL2 = 0xe290, // 11 100 0101 0010 000 + ESR_EL3 = 0xf290, // 11 110 0101 0010 000 + FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000 + FAR_EL1 = 0xc300, // 11 000 0110 0000 000 + FAR_EL2 = 0xe300, // 11 100 0110 0000 000 + FAR_EL3 = 0xf300, // 11 110 0110 0000 000 + HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100 + PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000 + PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000 + PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001 + PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010 + PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011 + PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101 + PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000 + PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001 + PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010 + PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000 + PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001 + PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010 + PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011 + MAIR_EL1 = 0xc510, // 11 000 1010 0010 000 + MAIR_EL2 = 0xe510, // 11 100 1010 0010 000 + MAIR_EL3 = 0xf510, // 11 110 1010 0010 000 + AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000 + AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000 + AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000 + VBAR_EL1 = 0xc600, // 11 000 1100 0000 000 + VBAR_EL2 = 0xe600, // 11 100 1100 0000 000 + VBAR_EL3 = 0xf600, // 11 110 1100 0000 000 + RMR_EL1 = 0xc602, // 11 000 1100 0000 010 + RMR_EL2 = 0xe602, // 11 100 1100 0000 010 + RMR_EL3 = 0xf602, // 11 110 1100 0000 010 + CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001 + TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010 + TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010 + TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010 + TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011 + TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100 + CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000 + CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011 + CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000 + CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000 + CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000 + CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000 + CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000 + CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001 + CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001 + CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001 + CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010 + CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010 + CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010 + CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000 + CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001 + CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010 + PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000 + PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001 + PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010 + PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011 + PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100 + PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101 + PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110 + PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111 + PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000 + PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001 + PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010 + PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011 + PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100 + PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101 + PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110 + PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111 + PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000 + PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001 + PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010 + PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011 + PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100 + PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101 + PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110 + PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111 + PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000 + PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001 + PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010 + PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011 + PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100 + PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101 + PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110 + PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111 + PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000 + PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001 + PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010 + PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011 + PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100 + PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101 + PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110 + PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111 + PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000 + PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001 + PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010 + PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011 + PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100 + PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101 + PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110 + PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111 + PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000 + PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001 + PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010 + PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011 + PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100 + PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101 + PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110 + PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111 + PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000 + PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001 + PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010 + PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011 + PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100 + PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101 + PMEVTYPER30_EL0 = 0xdf7e // 11 011 1110 1111 110 + }; + + // Note that these do not inherit from NamedImmMapper. This class is + // sufficiently different in its behaviour that I don't believe it's worth + // burdening the common NamedImmMapper with abstractions only needed in + // this one case. + struct SysRegMapper { + static const NamedImmMapper::Mapping SysRegPairs[]; + + const NamedImmMapper::Mapping *InstPairs; + size_t NumInstPairs; + + SysRegMapper() {} + uint32_t fromString(StringRef Name, bool &Valid) const; + std::string toString(uint32_t Bits, bool &Valid) const; + }; + + struct MSRMapper : SysRegMapper { + static const NamedImmMapper::Mapping MSRPairs[]; + MSRMapper(); + }; + + struct MRSMapper : SysRegMapper { + static const NamedImmMapper::Mapping MRSPairs[]; + MRSMapper(); + }; + + uint32_t ParseGenericRegister(StringRef Name, bool &Valid); +} + +namespace A64TLBI { + enum TLBIValues { + Invalid = -1, // Op0 Op1 CRn CRm Op2 + IPAS2E1IS = 0x6401, // 01 100 1000 0000 001 + IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101 + VMALLE1IS = 0x4418, // 01 000 1000 0011 000 + ALLE2IS = 0x6418, // 01 100 1000 0011 000 + ALLE3IS = 0x7418, // 01 110 1000 0011 000 + VAE1IS = 0x4419, // 01 000 1000 0011 001 + VAE2IS = 0x6419, // 01 100 1000 0011 001 + VAE3IS = 0x7419, // 01 110 1000 0011 001 + ASIDE1IS = 0x441a, // 01 000 1000 0011 010 + VAAE1IS = 0x441b, // 01 000 1000 0011 011 + ALLE1IS = 0x641c, // 01 100 1000 0011 100 + VALE1IS = 0x441d, // 01 000 1000 0011 101 + VALE2IS = 0x641d, // 01 100 1000 0011 101 + VALE3IS = 0x741d, // 01 110 1000 0011 101 + VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110 + VAALE1IS = 0x441f, // 01 000 1000 0011 111 + IPAS2E1 = 0x6421, // 01 100 1000 0100 001 + IPAS2LE1 = 0x6425, // 01 100 1000 0100 101 + VMALLE1 = 0x4438, // 01 000 1000 0111 000 + ALLE2 = 0x6438, // 01 100 1000 0111 000 + ALLE3 = 0x7438, // 01 110 1000 0111 000 + VAE1 = 0x4439, // 01 000 1000 0111 001 + VAE2 = 0x6439, // 01 100 1000 0111 001 + VAE3 = 0x7439, // 01 110 1000 0111 001 + ASIDE1 = 0x443a, // 01 000 1000 0111 010 + VAAE1 = 0x443b, // 01 000 1000 0111 011 + ALLE1 = 0x643c, // 01 100 1000 0111 100 + VALE1 = 0x443d, // 01 000 1000 0111 101 + VALE2 = 0x643d, // 01 100 1000 0111 101 + VALE3 = 0x743d, // 01 110 1000 0111 101 + VMALLS12E1 = 0x643e, // 01 100 1000 0111 110 + VAALE1 = 0x443f // 01 000 1000 0111 111 + }; + + struct TLBIMapper : NamedImmMapper { + const static Mapping TLBIPairs[]; + + TLBIMapper(); + }; + + static inline bool NeedsRegister(TLBIValues Val) { + switch (Val) { + case VMALLE1IS: + case ALLE2IS: + case ALLE3IS: + case ALLE1IS: + case VMALLS12E1IS: + case VMALLE1: + case ALLE2: + case ALLE3: + case ALLE1: + case VMALLS12E1: + return false; + default: + return true; + } + } +} + +namespace AArch64II { + + enum TOF { + //===--------------------------------------------------------------===// + // AArch64 Specific MachineOperand flags. + + MO_NO_FLAG, + + // MO_GOT - Represents a relocation referring to the GOT entry of a given + // symbol. Used in adrp. + MO_GOT, + + // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the + // GOT entry of a given symbol. Used in ldr only. + MO_GOT_LO12, + + // MO_DTPREL_* - Represents a relocation referring to the offset from a + // module's dynamic thread pointer. Used in the local-dynamic TLS access + // model. + MO_DTPREL_G1, + MO_DTPREL_G0_NC, + + // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry + // providing the offset of a variable from the thread-pointer. Used in + // initial-exec TLS model where this offset is assigned in the static thread + // block and thus known by the dynamic linker. + MO_GOTTPREL, + MO_GOTTPREL_LO12, + + // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing + // a TLS descriptor chosen by the dynamic linker. Used for the + // general-dynamic and local-dynamic TLS access models where very littls is + // known at link-time. + MO_TLSDESC, + MO_TLSDESC_LO12, + + // MO_TPREL_* - Represents a relocation referring to the offset of a + // variable from the thread pointer itself. Used in the local-exec TLS + // access model. + MO_TPREL_G1, + MO_TPREL_G0_NC, + + // MO_LO12 - On a symbol operand, this represents a relocation containing + // lower 12 bits of the address. Used in add/sub/ldr/str. + MO_LO12 + }; +} + +class APFloat; + +namespace A64Imms { + bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits); + + inline bool isFPImm(const APFloat &Val) { + uint32_t Imm8; + return isFPImm(Val, Imm8); + } + + bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits); + bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm); + + bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + + // We sometimes want to know whether the immediate is representable with a + // MOVN but *not* with a MOVZ (because that would take priority). + bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); + +} + +} // end namespace llvm; + +#endif diff --git a/lib/Target/AArch64/Utils/CMakeLists.txt b/lib/Target/AArch64/Utils/CMakeLists.txt new file mode 100644 index 0000000..2c28348 --- /dev/null +++ b/lib/Target/AArch64/Utils/CMakeLists.txt @@ -0,0 +1,5 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAArch64Utils + AArch64BaseInfo.cpp + ) diff --git a/lib/Target/AArch64/Utils/LLVMBuild.txt b/lib/Target/AArch64/Utils/LLVMBuild.txt new file mode 100644 index 0000000..1be5375 --- /dev/null +++ b/lib/Target/AArch64/Utils/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/AArch646/Utils/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AArch64Utils +parent = AArch64 +required_libraries = Core Support +add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/Utils/Makefile b/lib/Target/AArch64/Utils/Makefile new file mode 100644 index 0000000..0f4a645 --- /dev/null +++ b/lib/Target/AArch64/Utils/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AArch64/Utils/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAArch64Utils + +# Hack: we need to include 'main' AArch64 target directory to grab private headers +#CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index a76715a..46915ee 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -110,6 +110,11 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true", "Is microcontroller profile ('M' series)">; +// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. +// See ARMInstrInfo.td for details. +def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", + "NaCl trap">; + // ARM ISAs. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index fc6ac90..58c7798 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -37,6 +37,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCObjectStreamer.h" @@ -45,6 +46,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -182,7 +184,7 @@ namespace { const size_t TagHeaderSize = 1 + 4; Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4); - Streamer.EmitBytes(CurrentVendor, 0); + Streamer.EmitBytes(CurrentVendor); Streamer.EmitIntValue(0, 1); // '\0' Streamer.EmitIntValue(ARMBuildAttrs::File, 1); @@ -192,14 +194,14 @@ namespace { // emit each field as its type (ULEB or String) for (unsigned int i=0; i<Contents.size(); ++i) { AttributeItemType item = Contents[i]; - Streamer.EmitULEB128IntValue(item.Tag, 0); + Streamer.EmitULEB128IntValue(item.Tag); switch (item.Type) { default: llvm_unreachable("Invalid attribute type"); case AttributeItemType::NumericAttribute: - Streamer.EmitULEB128IntValue(item.IntValue, 0); + Streamer.EmitULEB128IntValue(item.IntValue); break; case AttributeItemType::TextAttribute: - Streamer.EmitBytes(item.StringValue.upper(), 0); + Streamer.EmitBytes(item.StringValue.upper()); Streamer.EmitIntValue(0, 1); // '\0' break; } @@ -340,6 +342,11 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); assert(!MO.getSubReg() && "Subregs should be eliminated!"); + if(ARM::GPRPairRegClass.contains(Reg)) { + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + Reg = TRI->getSubReg(Reg, ARM::gsub_0); + } O << ARMInstPrinter::getRegisterName(Reg); break; } @@ -528,14 +535,12 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); if (!MO.isReg()) return true; - const TargetRegisterClass &RC = ARM::GPRRegClass; const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); - - unsigned RegIdx = TRI->getEncodingValue(MO.getReg()); - RegIdx |= 1; //The odd register is also the higher-numbered one of a pair. - - unsigned Reg = RC.getRegister(RegIdx); + unsigned Reg = MO.getReg(); + if(!ARM::GPRPairRegClass.contains(Reg)) + return false; + Reg = TRI->getSubReg(Reg, ARM::gsub_1); O << ARMInstPrinter::getRegisterName(Reg); return false; } @@ -657,7 +662,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, 4/*size*/); else // Internal to current translation unit. // @@ -667,7 +672,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // We need to fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), - 4/*size*/, 0/*addrspace*/); + 4/*size*/); } Stubs.clear(); @@ -685,7 +690,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), OutContext), - 4/*size*/, 0/*addrspace*/); + 4/*size*/); } Stubs.clear(); @@ -699,6 +704,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + // FIXME: This should eventually end up somewhere else where more + // intelligent flag decisions can be made. For now we are just maintaining + // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. + if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&OutStreamer)) + MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); } //===----------------------------------------------------------------------===// @@ -1682,6 +1692,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } break; } + case ARM::TRAPNaCl: { + //.long 0xe7fedef0 @ trap + uint32_t Val = 0xe7fedef0UL; + OutStreamer.AddComment("trap"); + OutStreamer.EmitIntValue(Val, 4); + return; + } case ARM::tTRAP: { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index f7392fb..c945e4f 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -1,4 +1,4 @@ -//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===// +//===-- ARMAsmPrinter.h - ARM implementation of AsmPrinter ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// ARM Assembly printer class. -// -//===----------------------------------------------------------------------===// #ifndef ARMASMPRINTER_H #define ARMASMPRINTER_H @@ -54,7 +50,7 @@ public: } virtual const char *getPassName() const LLVM_OVERRIDE { - return "ARM Assembly Printer"; + return "ARM Assembly / Object Emitter"; } void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0076910..ed001ea 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2719,7 +2719,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::t2STMDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; if (Subtarget.isSwift()) { - // rdar://8402126 int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. switch (Opc) { default: break; @@ -4047,7 +4046,6 @@ getPartialRegUpdateClearance(const MachineInstr *MI, case ARM::VLDRS: case ARM::FCONSTS: case ARM::VMOVSR: - // rdar://problem/8791586 case ARM::VMOVv8i8: case ARM::VMOVv4i16: case ARM::VMOVv2i32: diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index d2f6a33..abdd251 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -205,7 +205,8 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, } // First prefer the paired physreg. - if (PairedPhys) + if (PairedPhys && + std::find(Order.begin(), Order.end(), PairedPhys) != Order.end()) Hints.push_back(PairedPhys); // Then prefer even or odd registers. @@ -400,64 +401,6 @@ requiresVirtualBaseRegisters(const MachineFunction &MF) const { return true; } -static void -emitSPUpdate(bool isARM, - MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { - if (isARM) - emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); - else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); -} - - -void ARMBaseRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - if (!TFI->hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - assert(!AFI->isThumb1OnlyFunction() && - "This eliminateCallFramePseudoInstr does not support Thumb1!"); - bool isARM = !AFI->isThumbFunction(); - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - int PIdx = Old->findFirstPredOperandIdx(); - ARMCC::CondCodes Pred = (PIdx == -1) - ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); - if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old->getOperand(2).getReg(); - emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg); - } else { - // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old->getOperand(3).getReg(); - assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg); - } - } - } - MBB.erase(I); -} - int64_t ARMBaseRegisterInfo:: getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { const MCInstrDesc &Desc = MI->getDesc(); @@ -717,8 +660,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, void ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { - unsigned i = 0; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -727,13 +670,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned FrameReg; int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); @@ -755,18 +692,18 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special handling of dbg_value instructions. if (MI.isDebugValue()) { - MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; if (!AFI->isThumbFunction()) - Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII); + Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); else { assert(AFI->isThumb2Function()); - Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); + Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); } if (Done) return; @@ -786,7 +723,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); if (Offset == 0) // Must be addrmode4/6. - MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false); else { ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass); if (!AFI->isThumbFunction()) @@ -798,6 +735,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, Pred, PredReg, TII); } // Update the original instruction to use the scratch register. - MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); + MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true); } } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index aaa56a9..725033b 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -168,12 +168,9 @@ public: virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const; - virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 70a25c2..4891609 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1468,7 +1468,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { if (CPEBB->empty()) { BBInfo[CPEBB->getNumber()].Size = 0; - // This block no longer needs to be aligned. <rdar://problem/10534709>. + // This block no longer needs to be aligned. CPEBB->setAlignment(0); } else // Entries are sorted by descending alignment, so realign from the front. diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 94c574a..29fcd40 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -146,6 +146,7 @@ class ARMFastISel : public FastISel { virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); + virtual bool FastLowerArguments(); private: #include "ARMGenFastISel.inc" @@ -2099,6 +2100,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + // Build a list of return value registers. + SmallVector<unsigned, 4> RetRegs; + CallingConv::ID CC = F.getCallingConv(); if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; @@ -2157,13 +2161,16 @@ bool ARMFastISel::SelectRet(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); - // Mark the register as live out of the function. - MRI.addLiveOut(VA.getLocReg()); + // Add register to return instruction. + RetRegs.push_back(VA.getLocReg()); } unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(RetOpc))); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(RetOpc)); + AddOptionalDefs(MIB); + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); return true; } @@ -2451,7 +2458,6 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, if (Len >= 2 && Alignment == 2) VT = MVT::i16; else { - assert (Alignment == 1 && "Expected an alignment of 1!"); VT = MVT::i8; } } @@ -2562,7 +2568,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return SelectCall(&I, "memset"); } case Intrinsic::trap: { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::TRAP)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get( + Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP)); return true; } } @@ -2877,6 +2884,80 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, return DestReg2; } +bool ARMFastISel::FastLowerArguments() { + if (!FuncInfo.CanLowerReturn) + return false; + + const Function *F = FuncInfo.Fn; + if (F->isVarArg()) + return false; + + CallingConv::ID CC = F->getCallingConv(); + switch (CC) { + default: + return false; + case CallingConv::Fast: + case CallingConv::C: + case CallingConv::ARM_AAPCS_VFP: + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + break; + } + + // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments + // which are passed in r0 - r3. + unsigned Idx = 1; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (Idx > 4) + return false; + + if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) || + F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || + F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) + return false; + + Type *ArgTy = I->getType(); + if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + return false; + + EVT ArgVT = TLI.getValueType(ArgTy); + if (!ArgVT.isSimple()) return false; + switch (ArgVT.getSimpleVT().SimpleTy) { + case MVT::i8: + case MVT::i16: + case MVT::i32: + break; + default: + return false; + } + } + + + static const uint16_t GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 + }; + + const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + Idx = 0; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (I->use_empty()) + continue; + unsigned SrcReg = GPRArgRegs[Idx]; + unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); + // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. + // Without this, EmitLiveInCopies may eliminate the livein if its only + // use is a bitcast (which isn't turned into an instruction). + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(DstReg, getKillRegState(true)); + UpdateValueMap(I, ResultReg); + } + + return true; +} + namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 39d27c4..0ca6450 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -119,13 +119,14 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { + int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, + ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - ARMCC::AL, 0, TII, MIFlags); + Pred, PredReg, TII, MIFlags); else emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - ARMCC::AL, 0, TII, MIFlags); + Pred, PredReg, TII, MIFlags); } void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -1430,3 +1431,51 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, AFI->setLRIsSpilledForFarJump(true); } } + + +void ARMFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + assert(!AFI->isThumb1OnlyFunction() && + "This eliminateCallFramePseudoInstr does not support Thumb1!"); + bool isARM = !AFI->isThumbFunction(); + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + int PIdx = Old->findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = (PIdx == -1) + ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. + unsigned PredReg = Old->getOperand(2).getReg(); + emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, + Pred, PredReg); + } else { + // Note: PredReg is operand 3 for ADJCALLSTACKUP. + unsigned PredReg = Old->getOperand(3).getReg(); + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, + Pred, PredReg); + } + } + } + MBB.erase(I); +} + diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index a1c2b93..efa255a 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -70,6 +70,11 @@ public: unsigned LdrOpc, bool isVarArg, bool NoGap, bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const; + + virtual void eliminateCallFramePseudoInstr( + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 939bed7..a83f052 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/CallingConv.h" @@ -257,6 +258,8 @@ private: // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); + SDNode *SelectInlineAsm(SDNode *N); + SDNode *SelectConcatVector(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); @@ -2552,6 +2555,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::INLINEASM: { + SDNode *ResNode = SelectInlineAsm(N); + if (ResNode) + return ResNode; + break; + } case ISD::XOR: { // Select special operations if XOR node forms integer ABS pattern SDNode *ResNode = SelectABSOp(N); @@ -3446,6 +3455,138 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ + std::vector<SDValue> AsmNodeOperands; + unsigned Flag, Kind; + bool Changed = false; + unsigned NumOps = N->getNumOperands(); + + ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>( + N->getOperand(InlineAsm::Op_AsmString)); + StringRef AsmString = StringRef(S->getSymbol()); + + // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. + // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require + // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs + // respectively. Since there is no constraint to explicitly specify a + // reg pair, we search %H operand inside the asm string. If it is found, the + // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. + if (AsmString.find(":H}") == StringRef::npos) + return NULL; + + DebugLoc dl = N->getDebugLoc(); + SDValue Glue = N->getOperand(NumOps-1); + + // Glue node will be appended late. + for(unsigned i = 0; i < NumOps -1; ++i) { + SDValue op = N->getOperand(i); + AsmNodeOperands.push_back(op); + + if (i < InlineAsm::Op_FirstOperand) + continue; + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { + Flag = C->getZExtValue(); + Kind = InlineAsm::getKind(Flag); + } + else + continue; + + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef + && Kind != InlineAsm::Kind_RegDefEarlyClobber) + continue; + + unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); + unsigned RC; + bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); + if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) + continue; + + assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); + SDValue V0 = N->getOperand(i+1); + SDValue V1 = N->getOperand(i+2); + unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); + unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); + SDValue PairedReg; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + if (Kind == InlineAsm::Kind_RegDef || + Kind == InlineAsm::Kind_RegDefEarlyClobber) { + // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to + // the original GPRs. + + unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); + SDValue Chain = SDValue(N,0); + + SDNode *GU = N->getGluedUser(); + SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, + Chain.getValue(1)); + + // Extract values from a GPRPair reg and copy to the original GPR reg. + SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, + RegCopy); + SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, + RegCopy); + SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, + RegCopy.getValue(1)); + SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); + + // Update the original glue user. + std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); + Ops.push_back(T1.getValue(1)); + CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size()); + GU = T1.getNode(); + } + else { + // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a + // GPRPair and then pass the GPRPair to the inline asm. + SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; + + // As REG_SEQ doesn't take RegisterSDNode, we copy them first. + SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, + Chain.getValue(1)); + SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, + T0.getValue(1)); + SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); + + // Copy REG_SEQ into a GPRPair-typed VR and replace the original two + // i32 VRs of inline asm with it. + unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); + Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); + + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + Glue = Chain.getValue(1); + } + + Changed = true; + + if(PairedReg.getNode()) { + Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); + Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); + // Replace the current flag. + AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( + Flag, MVT::i32); + // Add the new register node and skip the original two GPRs. + AsmNodeOperands.push_back(PairedReg); + // Skip the next two GPRs. + i += 2; + } + } + + AsmNodeOperands.push_back(Glue); + if (!Changed) + return NULL; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], + AsmNodeOperands.size()); + New->setNodeId(-1); + return New.getNode(); +} + + bool ARMDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5b3e31f..ef96e56 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -781,6 +781,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && @@ -833,21 +835,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setSchedulingPreference(Sched::Hybrid); //// temporary - rewrite interface to use type - maxStoresPerMemset = 8; - maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; - maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores - maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; - maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores - maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemset = 8; + MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores + MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores + MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); - benefitFromCodePlacementOpt = true; + BenefitFromCodePlacementOpt = true; // Prefer likely predicted branches to selects on out-of-order cores. - predictableSelectIsExpensive = Subtarget->isLikeA9(); + PredictableSelectIsExpensive = Subtarget->isLikeA9(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } @@ -1926,15 +1928,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, isVarArg)); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps; + RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -1963,10 +1959,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs.getValue(1), Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc // Extract the 2nd half and fall through to handle it as an f64 value. @@ -1979,6 +1977,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), Flag); @@ -1988,15 +1987,16 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are // stuck together, avoiding something bad. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - SDValue result; + // Update chain and glue. + RetOps[0] = Chain; if (Flag.getNode()) - result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - else // Return Void - result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); + RetOps.push_back(Flag); - return result; + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, + RetOps.data(), RetOps.size()); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2576,7 +2576,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, } // The remaining GPRs hold either the beginning of variable-argument -// data, or the beginning of an aggregate passed by value (usuall +// data, or the beginning of an aggregate passed by value (usually // byval). Either way, we allocate stack slots adjacent to the data // provided by our caller, and store the unallocated registers there. // If this is a variadic function, the va_list pointer will begin with @@ -4294,6 +4294,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ return true; } +/// \return true if this is a reverse operation on an vector. +static bool isReverseMask(ArrayRef<int> M, EVT VT) { + unsigned NumElts = VT.getVectorNumElements(); + // Make sure the mask has the right size. + if (NumElts != M.size()) + return false; + + // Look for <15, ..., 3, -1, 1, 0>. + for (unsigned i = 0; i != NumElts; ++i) + if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) + return false; + + return true; +} + // If N is an integer constant that can be moved into a register in one // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. @@ -4689,7 +4704,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isVZIPMask(M, VT, WhichResult) || isVTRN_v_undef_Mask(M, VT, WhichResult) || isVUZP_v_undef_Mask(M, VT, WhichResult) || - isVZIP_v_undef_Mask(M, VT, WhichResult)); + isVZIP_v_undef_Mask(M, VT, WhichResult) || + ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit @@ -4793,6 +4809,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, &VTBLMask[0], 8)); } +static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, + SelectionDAG &DAG) { + DebugLoc DL = Op.getDebugLoc(); + SDValue OpLHS = Op.getOperand(0); + EVT VT = OpLHS.getValueType(); + + assert((VT == MVT::v8i16 || VT == MVT::v16i8) && + "Expect an v8i16/v16i8 type"); + OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); + // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, + // extract the first 8 bytes into the top double word and the last 8 bytes + // into the bottom double word. The v8i16 case is similar. + unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; + return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, + DAG.getConstant(ExtractNum, MVT::i32)); +} + static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4930,6 +4963,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) + return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); + if (VT == MVT::v8i8) { SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); if (NewOp.getNode()) @@ -5967,9 +6003,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); } - unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; - unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; if (IsCmpxchg || IsMinMax) @@ -6007,42 +6040,26 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // cmp storesuccess, #0 // bne- loopMBB // fallthrough --> exitMBB - // - // Note that the registers are explicitly specified because there is not any - // way to force the register allocator to allocate a register pair. - // - // FIXME: The hardcoded registers are not necessary for Thumb2, but we - // need to properly enforce the restriction that the two output registers - // for ldrexd must be different. BB = loopMBB; + // Load - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned GPRPair2; - if (IsMinMax) { - //We need an extra double register for doing min/max. - unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(undef) - .addReg(vallo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2) - .addReg(r1) - .addReg(valhi) - .addImm(ARM::gsub_1); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) + .addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr)); + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) + .addReg(GPRPair0, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); } - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - + unsigned StoreLo, StoreHi; if (IsCmpxchg) { // Add early exit for (unsigned i = 0; i < 2; i++) { @@ -6058,19 +6075,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, } // Copy to physregs for strexd - unsigned setlo = MI->getOperand(5).getReg(); - unsigned sethi = MI->getOperand(6).getReg(); - unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(undef) - .addReg(setlo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(sethi) - .addImm(ARM::gsub_1); + StoreLo = MI->getOperand(5).getReg(); + StoreHi = MI->getOperand(6).getReg(); } else if (Op1) { // Perform binary operation unsigned tmpRegLo = MRI.createVirtualRegister(TRC); @@ -6082,32 +6088,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, .addReg(desthi).addReg(valhi)) .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(tmpRegLo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(tmpRegHi) - .addImm(ARM::gsub_1); + StoreLo = tmpRegLo; + StoreHi = tmpRegHi; } else { // Copy to physregs for strexd - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(vallo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(valhi) - .addImm(ARM::gsub_1); + StoreLo = vallo; + StoreHi = valhi; } - unsigned GPRPairStore = GPRPair1; if (IsMinMax) { // Compare and branch to exit block. BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -6115,12 +6102,33 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, BB->addSuccessor(exitMBB); BB->addSuccessor(contBB); BB = contBB; - GPRPairStore = GPRPair2; + StoreLo = vallo; + StoreHi = valhi; } // Store - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(GPRPairStore).addReg(ptr)); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) + .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); + } else { + // Marshal a pair... + unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) + .addReg(UndefPair) + .addReg(StoreLo) + .addImm(ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) + .addReg(r1) + .addReg(StoreHi) + .addImm(ARM::gsub_1); + + // ...and store it + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) + .addReg(StorePair).addReg(ptr)); + } // Cmp+jump AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(storesuccess).addImm(0)); @@ -6329,7 +6337,16 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DispatchBB->setIsLandingPad(); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); - BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP)); + unsigned trap_opcode; + if (Subtarget->isThumb()) { + trap_opcode = ARM::tTRAP; + } else { + if (Subtarget->useNaClTrap()) + trap_opcode = ARM::TRAPNaCl; + else + trap_opcode = ARM::TRAP; + } + BuildMI(TrapBB, dl, TII->get(trap_opcode)); DispatchBB->addSuccessor(TrapBB); MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); @@ -7123,7 +7140,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LE); + /*IsMinMax*/ true, ARMCC::LT); case ARM::ATOMMAX6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, @@ -7133,7 +7150,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LS); + /*IsMinMax*/ true, ARMCC::LO); case ARM::ATOMUMAX6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, @@ -10343,4 +10360,3 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } - diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 12712c0..9409f35 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -117,7 +117,7 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, SDNPVariadic]>; def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; @@ -239,6 +239,9 @@ def IsARM : Predicate<"!Subtarget->isThumb()">, def IsIOS : Predicate<"Subtarget->isTargetIOS()">; def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; +def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, + AssemblerPredicate<"FeatureNaClTrap", "NaCl">; +def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; @@ -1762,11 +1765,32 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", let Inst{3-0} = opt; } -// A5.4 Permanently UNDEFINED instructions. +/* + * A5.4 Permanently UNDEFINED instructions. + * + * For most targets use UDF #65006, for which the OS will generate SIGTRAP. + * Other UDF encodings generate SIGILL. + * + * NaCl's OS instead chooses an ARM UDF encoding that's also a UDF in Thumb. + * Encoding A1: + * 1110 0111 1111 iiii iiii iiii 1111 iiii + * Encoding T1: + * 1101 1110 iiii iiii + * It uses the following encoding: + * 1110 0111 1111 1110 1101 1110 1111 0000 + * - In ARM: UDF #60896; + * - In Thumb: UDF #254 followed by a branch-to-self. + */ +let isBarrier = 1, isTerminator = 1 in +def TRAPNaCl : AXI<(outs), (ins), MiscFrm, NoItinerary, + "trap", [(trap)]>, + Requires<[IsARM,UseNaClTrap]> { + let Inst = 0xe7fedef0; +} let isBarrier = 1, isTerminator = 1 in def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, "trap", [(trap)]>, - Requires<[IsARM]> { + Requires<[IsARM,DontUseNaClTrap]> { let Inst = 0xe7ffdefe; } @@ -2079,6 +2103,18 @@ def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> { let Inst{24-23} = 0b11; } +def : ARMInstAlias<"srsda $mode", (SRSDA imm0_31:$mode)>; +def : ARMInstAlias<"srsda $mode!", (SRSDA_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsdb $mode", (SRSDB imm0_31:$mode)>; +def : ARMInstAlias<"srsdb $mode!", (SRSDB_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsia $mode", (SRSIA imm0_31:$mode)>; +def : ARMInstAlias<"srsia $mode!", (SRSIA_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsib $mode", (SRSIB imm0_31:$mode)>; +def : ARMInstAlias<"srsib $mode!", (SRSIB_UPD imm0_31:$mode)>; + // Return From Exception class RFEI<bit wb, string asm> : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 697a8d2..0411ac4 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4264,6 +4264,7 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", "$Vd, $Vm, #0", NEONvceqz>; @@ -4277,10 +4278,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", "$Vd, $Vm, #0", NEONvcgez>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", "$Vd, $Vm, #0", NEONvclez>; +} // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -4292,10 +4295,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", "$Vd, $Vm, #0", NEONvcgtz>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", "$Vd, $Vm, #0", NEONvcltz>; +} // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", @@ -5740,6 +5745,10 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +// Fold extracting an element out of a v2i32 into a vfp register. +def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), + (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; + // Vector lengthening move with load, matching extending loads. // extload, zextload and sextload for a standard lengthening load. Example: diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index cf8b302..c9d709e 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3481,6 +3481,13 @@ def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary, def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary, "srsia","\tsp, $mode", []>; + +def : t2InstAlias<"srsdb${p} $mode", (t2SRSDB imm0_31:$mode, pred:$p)>; +def : t2InstAlias<"srsdb${p} $mode!", (t2SRSDB_UPD imm0_31:$mode, pred:$p)>; + +def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>; +def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>; + // Return From Exception is a system instruction. class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a1c21ee..98bd6c1 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1188,7 +1188,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddDeadKill = true; } // Never kill the base register in the first instruction. - // <rdar://problem/11101911> if (EvenReg == BaseReg) EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 404634f..4191931 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1887,6 +1887,9 @@ def CortexA9Model : SchedMachineModel { let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let ILPWindow = 10; // Don't reschedule small blocks to hide + // latency. Minimum latency requirements are already + // modeled strictly by reserving resources. let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 058d4c4..f4d568c 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -14,7 +14,9 @@ #include "ARMSubtarget.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -43,58 +45,83 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) - , HasV4TOps(false) - , HasV5TOps(false) - , HasV5TEOps(false) - , HasV6Ops(false) - , HasV6T2Ops(false) - , HasV7Ops(false) - , HasVFPv2(false) - , HasVFPv3(false) - , HasVFPv4(false) - , HasNEON(false) - , UseNEONForSinglePrecisionFP(false) - , UseMulOps(UseFusedMulOps) - , SlowFPVMLx(false) - , HasVMLxForwarding(false) - , SlowFPBrcc(false) - , InThumbMode(false) - , HasThumb2(false) - , IsMClass(false) - , NoARM(false) - , PostRAScheduler(false) - , IsR9Reserved(ReserveR9) - , UseMovt(false) - , SupportsTailCall(false) - , HasFP16(false) - , HasD16(false) - , HasHardwareDivide(false) - , HasHardwareDivideInARM(false) - , HasT2ExtractPack(false) - , HasDataBarrier(false) - , Pref32BitThumb(false) - , AvoidCPSRPartialUpdate(false) - , AvoidMOVsShifterOperand(false) - , HasRAS(false) - , HasMPExtension(false) - , FPOnlySP(false) - , AllowsUnalignedMem(false) - , Thumb2DSP(false) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) , TargetABI(ARM_ABI_APCS) { - // Determine default and user specified characteristics + initializeEnvironment(); + resetSubtargetFeatures(CPU, FS); +} + +void ARMSubtarget::initializeEnvironment() { + HasV4TOps = false; + HasV5TOps = false; + HasV5TEOps = false; + HasV6Ops = false; + HasV6T2Ops = false; + HasV7Ops = false; + HasVFPv2 = false; + HasVFPv3 = false; + HasVFPv4 = false; + HasNEON = false; + UseNEONForSinglePrecisionFP = false; + UseMulOps = UseFusedMulOps; + SlowFPVMLx = false; + HasVMLxForwarding = false; + SlowFPBrcc = false; + InThumbMode = false; + HasThumb2 = false; + IsMClass = false; + NoARM = false; + PostRAScheduler = false; + IsR9Reserved = ReserveR9; + UseMovt = false; + SupportsTailCall = false; + HasFP16 = false; + HasD16 = false; + HasHardwareDivide = false; + HasHardwareDivideInARM = false; + HasT2ExtractPack = false; + HasDataBarrier = false; + Pref32BitThumb = false; + AvoidCPSRPartialUpdate = false; + AvoidMOVsShifterOperand = false; + HasRAS = false; + HasMPExtension = false; + FPOnlySP = false; + AllowsUnalignedMem = false; + Thumb2DSP = false; + UseNaClTrap = false; +} + +void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { + AttributeSet FnAttrs = MF->getFunction()->getAttributes(); + Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-cpu"); + Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-features"); + std::string CPU = + !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; + std::string FS = + !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; + if (!FS.empty()) { + initializeEnvironment(); + resetSubtargetFeatures(CPU, FS); + } +} + +void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { if (CPUString.empty()) CPUString = "generic"; // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied // based on the architecture version. - std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPUString); + std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(), + CPUString); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS; + ArchFS = ArchFS + "," + FS.str(); else ArchFS = FS; } @@ -111,7 +138,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - if ((TT.find("eabi") != std::string::npos) || (isTargetIOS() && isMClass())) + if ((TargetTriple.getTriple().find("eabi") != std::string::npos) || + (isTargetIOS() && isMClass())) // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g. // Darwin-EABI conforms to AACPS but not the rest of EABI. TargetABI = ARM_ABI_AAPCS; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 64878cd..8ce22e1 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -156,6 +156,9 @@ protected: /// and such) instructions in Thumb2 code. bool Thumb2DSP; + /// NaCl TRAP instruction is generated instead of the regular TRAP. + bool UseNaClTrap; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -199,6 +202,12 @@ protected: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + /// \brief Reset the features for the ARM target. + virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); + void resetSubtargetFeatures(StringRef CPU, StringRef FS); +public: void computeIssueWidth(); bool hasV4TOps() const { return HasV4TOps; } @@ -241,6 +250,7 @@ protected: bool hasRAS() const { return HasRAS; } bool hasMPExtension() const { return HasMPExtension; } bool hasThumb2DSP() const { return Thumb2DSP; } + bool useNaClTrap() const { return UseNaClTrap; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index be6bec7..d4caf5c 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -46,6 +46,10 @@ public: virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } + virtual const ARMTargetLowering *getTargetLowering() const { + // Implemented by derived classes + llvm_unreachable("getTargetLowering not implemented"); + } virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 03a23be..01c04b4 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" using namespace llvm; // Declare the pass initialization routine locally as target-specific passes @@ -34,18 +35,20 @@ namespace { class ARMTTI : public ImmutablePass, public TargetTransformInfo { const ARMBaseTargetMachine *TM; const ARMSubtarget *ST; + const ARMTargetLowering *TLI; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - ARMTTI() : ImmutablePass(ID), TM(0), ST(0) { + ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { llvm_unreachable("This pass cannot be directly constructed"); } ARMTTI(const ARMBaseTargetMachine *TM) - : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()) { + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { initializeARMTTIPass(*PassRegistry::getPassRegistry()); } @@ -77,6 +80,52 @@ public: virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; /// @} + + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool Vector) const { + if (Vector) { + if (ST->hasNEON()) + return 16; + return 0; + } + + if (ST->isThumb1Only()) + return 8; + return 16; + } + + unsigned getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasNEON()) + return 128; + return 0; + } + + return 32; + } + + unsigned getMaximumUnrollFactor() const { + // These are out of order CPUs: + if (ST->isCortexA15() || ST->isSwift()) + return 2; + return 1; + } + + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const; + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; + + unsigned getAddressComputationCost(Type *Val) const; + /// @} }; } // end anonymous namespace @@ -122,3 +171,200 @@ unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { } return 2; } + +unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + EVT SrcTy = TLI->getValueType(Src); + EVT DstTy = TLI->getValueType(Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + + // Some arithmetic, load and store operations have specific instructions + // to cast up/down their types automatically at no extra cost. + // TODO: Get these tables to know at least what the related operations are. + static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + + // Vector float <-> i32 conversions. + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + + // Vector double <-> i32 conversions. + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 } + }; + + if (SrcTy.isVector() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl, + array_lengthof(NEONVectorConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorConversionTbl[Idx].Cost; + } + + // Scalar float to integer conversions. + static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = { + { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 }, + { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } + }; + if (SrcTy.isFloatingPoint() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl, + array_lengthof(NEONFloatConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONFloatConversionTbl[Idx].Cost; + } + + + // Scalar integer to float conversions. + static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = { + { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 } + }; + + if (SrcTy.isInteger() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl, + array_lengthof(NEONIntegerConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONIntegerConversionTbl[Idx].Cost; + } + + // Scalar integer conversion costs. + static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = { + // i16 -> i64 requires two dependent operations. + { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, + + // Truncates on i64 are assumed to be free. + { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 } + }; + + if (SrcTy.isInteger()) { + int Idx = + ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl, + array_lengthof(ARMIntegerConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return ARMIntegerConversionTbl[Idx].Cost; + } + + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, + unsigned Index) const { + // Penalize inserting into an D-subregister. We end up with a three times + // lower estimated throughput on swift. + if (ST->isSwift() && + Opcode == Instruction::InsertElement && + ValTy->isVectorTy() && + ValTy->getScalarSizeInBits() <= 32) + return 3; + + return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); +} + +unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + // On NEON a a vector select gets lowered to vbsl. + if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); + return LT.first; + } + + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned ARMTTI::getAddressComputationCost(Type *Ty) const { + // In many cases the address computation is not merged into the instruction + // addressing mode. + return 1; +} + +unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + // We only handle costs of reverse shuffles for now. + if (Kind != SK_Reverse) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + + static const CostTblEntry<MVT> NEONShuffleTbl[] = { + // Reverse shuffle cost one instruction if we are shuffling within a double + // word (vrev) or two if we shuffle a quad word (vrev, vext). + { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 }, + { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 }, + { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, + { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, + + { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 }, + { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 }, + { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 }, + { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 } + }; + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + + int Idx = CostTableLookup<MVT>(NEONShuffleTbl, array_lengthof(NEONShuffleTbl), + ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + + return LT.first * NEONShuffleTbl[Idx].Cost; +} diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ad37a21..6c678fd 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -18,7 +18,9 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" @@ -28,6 +30,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" @@ -250,6 +253,13 @@ public: // Not in an ITBlock to start with. ITState.CurPosition = ~0U; + + // Set ELF header flags. + // FIXME: This should eventually end up somewhere else where more + // intelligent flag decisions can be made. For now we are just maintaining + // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default. + if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&Parser.getStreamer())) + MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); } // Implementation of the MCTargetAsmParser interface: @@ -259,6 +269,7 @@ public: SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseDirective(AsmToken DirectiveID); + unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); unsigned checkTargetMatchPredicate(MCInst &Inst); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -271,7 +282,7 @@ public: namespace { /// ARMOperand - Instances of this class represent a parsed ARM machine -/// instruction. +/// operand. class ARMOperand : public MCParsedAsmOperand { enum KindTy { k_CondCode, @@ -2557,7 +2568,7 @@ int ARMAsmParser::tryParseShiftRegister( Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); const MCExpr *ShiftExpr = 0; - if (getParser().ParseExpression(ShiftExpr, EndLoc)) { + if (getParser().parseExpression(ShiftExpr, EndLoc)) { Error(ImmLoc, "invalid immediate shift value"); return -1; } @@ -2640,7 +2651,7 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat left bracket token. const MCExpr *ImmVal; - if (getParser().ParseExpression(ImmVal)) + if (getParser().parseExpression(ImmVal)) return true; const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); if (!MCE) @@ -2785,7 +2796,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const MCExpr *Expr; SMLoc Loc = Parser.getTok().getLoc(); - if (getParser().ParseExpression(Expr)) { + if (getParser().parseExpression(Expr)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -2998,7 +3009,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { const MCExpr *LaneIndex; SMLoc Loc = Parser.getTok().getLoc(); - if (getParser().ParseExpression(LaneIndex)) { + if (getParser().parseExpression(LaneIndex)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3316,7 +3327,7 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc Loc = Parser.getTok().getLoc(); const MCExpr *MemBarrierID; - if (getParser().ParseExpression(MemBarrierID)) { + if (getParser().parseExpression(MemBarrierID)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3532,7 +3543,7 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, const MCExpr *ShiftAmount; SMLoc Loc = Parser.getTok().getLoc(); SMLoc EndLoc; - if (getParser().ParseExpression(ShiftAmount, EndLoc)) { + if (getParser().parseExpression(ShiftAmount, EndLoc)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3612,7 +3623,7 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const MCExpr *ShiftAmount; SMLoc EndLoc; - if (getParser().ParseExpression(ShiftAmount, EndLoc)) { + if (getParser().parseExpression(ShiftAmount, EndLoc)) { Error(ExLoc, "malformed shift expression"); return MatchOperand_ParseFail; } @@ -3673,7 +3684,7 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const MCExpr *ShiftAmount; SMLoc EndLoc; - if (getParser().ParseExpression(ShiftAmount, EndLoc)) { + if (getParser().parseExpression(ShiftAmount, EndLoc)) { Error(ExLoc, "malformed rotate expression"); return MatchOperand_ParseFail; } @@ -3710,7 +3721,7 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const MCExpr *LSBExpr; SMLoc E = Parser.getTok().getLoc(); - if (getParser().ParseExpression(LSBExpr)) { + if (getParser().parseExpression(LSBExpr)) { Error(E, "malformed immediate expression"); return MatchOperand_ParseFail; } @@ -3743,7 +3754,7 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const MCExpr *WidthExpr; SMLoc EndLoc; - if (getParser().ParseExpression(WidthExpr, EndLoc)) { + if (getParser().parseExpression(WidthExpr, EndLoc)) { Error(E, "malformed immediate expression"); return MatchOperand_ParseFail; } @@ -3839,7 +3850,7 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { bool isNegative = Parser.getTok().is(AsmToken::Minus); const MCExpr *Offset; SMLoc E; - if (getParser().ParseExpression(Offset, E)) + if (getParser().parseExpression(Offset, E)) return MatchOperand_ParseFail; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset); if (!CE) { @@ -4226,9 +4237,10 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (BaseRegNum == -1) return Error(BaseRegTok.getLoc(), "register expected"); - // The next token must either be a comma or a closing bracket. + // The next token must either be a comma, a colon or a closing bracket. const AsmToken &Tok = Parser.getTok(); - if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac)) + if (!Tok.is(AsmToken::Colon) && !Tok.is(AsmToken::Comma) && + !Tok.is(AsmToken::RBrac)) return Error(Tok.getLoc(), "malformed memory operand"); if (Tok.is(AsmToken::RBrac)) { @@ -4248,8 +4260,11 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return false; } - assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!"); - Parser.Lex(); // Eat the comma. + assert((Tok.is(AsmToken::Colon) || Tok.is(AsmToken::Comma)) && + "Lost colon or comma in memory operand?!"); + if (Tok.is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + } // If we have a ':', it's an alignment specifier. if (Parser.getTok().is(AsmToken::Colon)) { @@ -4257,7 +4272,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { E = Parser.getTok().getLoc(); const MCExpr *Expr; - if (getParser().ParseExpression(Expr)) + if (getParser().parseExpression(Expr)) return true; // The expression has to be a constant. Memory references with relocations @@ -4313,7 +4328,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { bool isNegative = getParser().getTok().is(AsmToken::Minus); const MCExpr *Offset; - if (getParser().ParseExpression(Offset)) + if (getParser().parseExpression(Offset)) return true; // The expression has to be a constant. Memory references with relocations @@ -4432,7 +4447,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, Parser.Lex(); // Eat hash token. const MCExpr *Expr; - if (getParser().ParseExpression(Expr)) + if (getParser().parseExpression(Expr)) return true; // Range check the immediate. // lsl, ror: 0 <= imm <= 31 @@ -4461,7 +4476,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Anything that can accept a floating point constant as an operand - // needs to go through here, as the regular ParseExpression is + // needs to go through here, as the regular parseExpression is // integer only. // // This routine still creates a generic Immediate operand, containing @@ -4581,7 +4596,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, // identifier (like labels) as expressions and create them as immediates. const MCExpr *IdVal; S = Parser.getTok().getLoc(); - if (getParser().ParseExpression(IdVal)) + if (getParser().parseExpression(IdVal)) return true; E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(IdVal, S, E)); @@ -4600,7 +4615,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, if (Parser.getTok().isNot(AsmToken::Colon)) { bool isNegative = Parser.getTok().is(AsmToken::Minus); const MCExpr *ImmVal; - if (getParser().ParseExpression(ImmVal)) + if (getParser().parseExpression(ImmVal)) return true; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); if (CE) { @@ -4610,6 +4625,15 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, } E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); + + // There can be a trailing '!' on operands that we want as a separate + // '!' Token operand. Handle that here. For example, the compatibilty + // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken(Parser.getTok().getString(), + Parser.getTok().getLoc())); + Parser.Lex(); // Eat exclaim token + } return false; } // w/ a ':' after the '#', it's just like a plain ':'. @@ -4624,7 +4648,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return true; const MCExpr *SubExprVal; - if (getParser().ParseExpression(SubExprVal)) + if (getParser().parseExpression(SubExprVal)) return true; const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal, @@ -4997,7 +5021,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // In Thumb1, only the branch (B) instruction can be predicated. if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(NameLoc, "conditional execution not supported in Thumb1"); } @@ -5011,14 +5035,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (Mnemonic == "it") { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2); if (ITMask.size() > 3) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "too many conditions on IT instruction"); } unsigned Mask = 8; for (unsigned i = ITMask.size(); i != 0; --i) { char pos = ITMask[i - 1]; if (pos != 't' && pos != 'e') { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "illegal IT block condition mask '" + ITMask + "'"); } Mask >>= 1; @@ -5044,14 +5068,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // If we had a carry-set on an instruction that can't do that, issue an // error. if (!CanAcceptCarrySet && CarrySetting) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(NameLoc, "instruction '" + Mnemonic + "' can not set flags, but 's' suffix specified"); } // If we had a predication code on an instruction that can't do that, issue an // error. if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(NameLoc, "instruction '" + Mnemonic + "' is not predicable, but condition code specified"); } @@ -5100,7 +5124,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. if (parseOperand(Operands, Mnemonic)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } @@ -5109,7 +5133,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // Parse and remember the operand. if (parseOperand(Operands, Mnemonic)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } } @@ -5117,7 +5141,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -5148,53 +5172,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, delete Op; } - // The vector-compare-to-zero instructions have a literal token "#0" at - // the end that comes to here as an immediate operand. Convert it to a - // token to play nicely with the matcher. - if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" || - Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[5])->isImm()) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (CE && CE->getValue() == 0) { - Operands.erase(Operands.begin() + 5); - Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); - delete Op; - } - } - // VCMP{E} does the same thing, but with a different operand count. - if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 && - static_cast<ARMOperand*>(Operands[4])->isImm()) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[4]); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (CE && CE->getValue() == 0) { - Operands.erase(Operands.begin() + 4); - Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); - delete Op; - } - } - // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the - // end. Convert it to a token here. Take care not to convert those - // that should hit the Thumb2 encoding. - if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[5])->isImm()) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (CE && CE->getValue() == 0 && - (isThumbOne() || - // The cc_out operand matches the IT block. - ((inITBlock() != CarrySetting) && - // Neither register operand is a high register. - (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) && - isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){ - Operands.erase(Operands.begin() + 5); - Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); - delete Op; - } - } - // Adjust operands of ldrexd/strexd to MCK_GPRPair. // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, // a single GPRPair reg operand is used in the .td file to replace the two @@ -7646,10 +7623,10 @@ bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); + getParser().getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -7793,13 +7770,13 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { unsigned Reg; SMLoc SRegLoc, ERegLoc; if (ParseRegister(Reg, SRegLoc, ERegLoc)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(SRegLoc, "register name expected"); } // Shouldn't be anything else. if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Parser.getTok().getLoc(), "unexpected input in .req directive."); } @@ -7817,7 +7794,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { /// ::= .unreq registername bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { if (Parser.getTok().isNot(AsmToken::Identifier)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(L, "unexpected input in .unreq directive."); } RegisterReqs.erase(Parser.getTok().getIdentifier()); @@ -7847,3 +7824,21 @@ extern "C" void LLVMInitializeARMAsmParser() { #define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION #include "ARMGenAsmMatcher.inc" + +// Define this matcher function after the auto-generated include so we +// have the match class enum definitions. +unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, + unsigned Kind) { + ARMOperand *Op = static_cast<ARMOperand*>(AsmOp); + // If the kind is a token for a literal immediate, check if our asm + // operand matches. This is for InstAliases which have a fixed-value + // immediate in the syntax. + if (Kind == MCK__35_0 && Op->isImm()) { + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + if (!CE) + return Match_InvalidOperand; + if (CE->getValue() == 0) + return Match_Success; + } + return Match_InvalidOperand; +} diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index d48b37e..2afb20d 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -627,8 +627,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (MO2.getImm()) { - // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << (MO2.getImm() << 3); + O << ":" << (MO2.getImm() << 3); } O << "]" << markup(">"); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 1f1b334..e66e985 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -663,25 +664,20 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { - if (TheTriple.getArchName() == "armv4t" || - TheTriple.getArchName() == "thumbv4t") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T); - else if (TheTriple.getArchName() == "armv5e" || - TheTriple.getArchName() == "thumbv5e") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ); - else if (TheTriple.getArchName() == "armv6" || - TheTriple.getArchName() == "thumbv6") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6); - else if (TheTriple.getArchName() == "armv7f" || - TheTriple.getArchName() == "thumbv7f") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F); - else if (TheTriple.getArchName() == "armv7k" || - TheTriple.getArchName() == "thumbv7k") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K); - else if (TheTriple.getArchName() == "armv7s" || - TheTriple.getArchName() == "thumbv7s") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S); - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7); + object::mach::CPUSubtypeARM CS = + StringSwitch<object::mach::CPUSubtypeARM>(TheTriple.getArchName()) + .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T) + .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ) + .Cases("armv6", "thumbv6", object::mach::CSARM_V6) + .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M) + .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM) + .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F) + .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K) + .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M) + .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S) + .Default(object::mach::CSARM_V7); + + return new DarwinARMAsmBackend(T, TT, CS); } if (TheTriple.isOSWindows()) diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 9193e40..f98bbd2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -37,7 +37,6 @@ namespace { virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const; - virtual unsigned getEFlags() const; virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, const MCValue &Target, const MCFragment &F, @@ -53,11 +52,6 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) ARMELFObjectWriter::~ARMELFObjectWriter() {} -// FIXME: get the real EABI Version from the Triple. -unsigned ARMELFObjectWriter::getEFlags() const { - return ELF::EF_ARM_EABIMASK & DefaultEABIVersion; -} - // In ARM, _MergedGlobals and other most symbols get emitted directly. // I.e. not as an offset to a section symbol. // This code is an approximation of what ARM/gcc does. diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 39ded8f..418971d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "ARMUnwindOp.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" @@ -53,14 +54,27 @@ namespace { /// by MachO. Beware! class ARMELFStreamer : public MCELFStreamer { public: - ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, bool IsThumb) - : MCELFStreamer(Context, TAB, OS, Emitter), - IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { - } + ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter, bool IsThumb) + : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter), + IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0), + FnStart(0), Personality(0), CantUnwind(false) {} ~ARMELFStreamer() {} + // ARM exception handling directives + virtual void EmitFnStart(); + virtual void EmitFnEnd(); + virtual void EmitCantUnwind(); + virtual void EmitPersonality(const MCSymbol *Per); + virtual void EmitHandlerData(); + virtual void EmitSetFP(unsigned NewFpReg, + unsigned NewSpReg, + int64_t Offset = 0); + virtual void EmitPad(int64_t Offset); + virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector); + virtual void ChangeSection(const MCSection *Section) { // We have to keep track of the mapping symbol state of any sections we // use. Each one should start off as EMS_None, which is provided as the @@ -119,6 +133,10 @@ public: } } + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_ARMELFStreamer; + } + private: enum ElfMappingSymbol { EMS_None, @@ -172,6 +190,15 @@ private: SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc); } + // Helper functions for ARM exception handling directives + void Reset(); + + void EmitPersonalityFixup(StringRef Name); + + void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, + SectionKind Kind, const MCSymbol &Fn); + void SwitchToExTabSection(const MCSymbol &FnStart); + void SwitchToExIdxSection(const MCSymbol &FnStart); bool IsThumb; int64_t MappingSymbolCounter; @@ -179,10 +206,200 @@ private: DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; ElfMappingSymbol LastEMS; - /// @} + // ARM Exception Handling Frame Information + MCSymbol *ExTab; + MCSymbol *FnStart; + const MCSymbol *Personality; + bool CantUnwind; }; } +inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, + unsigned Type, + unsigned Flags, + SectionKind Kind, + const MCSymbol &Fn) { + const MCSectionELF &FnSection = + static_cast<const MCSectionELF &>(Fn.getSection()); + + // Create the name for new section + StringRef FnSecName(FnSection.getSectionName()); + SmallString<128> EHSecName(Prefix); + if (FnSecName != ".text") { + EHSecName += FnSecName; + } + + // Get .ARM.extab or .ARM.exidx section + const MCSectionELF *EHSection = NULL; + if (const MCSymbol *Group = FnSection.getGroup()) { + EHSection = getContext().getELFSection( + EHSecName, Type, Flags | ELF::SHF_GROUP, Kind, + FnSection.getEntrySize(), Group->getName()); + } else { + EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind); + } + assert(EHSection); + + // Switch to .ARM.extab or .ARM.exidx section + SwitchSection(EHSection); + EmitCodeAlignment(4, 0); +} + +inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) { + SwitchToEHSection(".ARM.extab", + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC, + SectionKind::getDataRel(), + FnStart); +} + +inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { + SwitchToEHSection(".ARM.exidx", + ELF::SHT_ARM_EXIDX, + ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER, + SectionKind::getDataRel(), + FnStart); +} + +void ARMELFStreamer::Reset() { + ExTab = NULL; + FnStart = NULL; + Personality = NULL; + CantUnwind = false; +} + +// Add the R_ARM_NONE fixup at the same position +void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { + const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name); + + const MCSymbolRefExpr *PersonalityRef = + MCSymbolRefExpr::Create(PersonalitySym, + MCSymbolRefExpr::VK_ARM_NONE, + getContext()); + + AddValueSymbols(PersonalityRef); + MCDataFragment *DF = getOrCreateDataFragment(); + DF->getFixups().push_back( + MCFixup::Create(DF->getContents().size(), PersonalityRef, + MCFixup::getKindForSize(4, false))); +} + +void ARMELFStreamer::EmitFnStart() { + assert(FnStart == 0); + FnStart = getContext().CreateTempSymbol(); + EmitLabel(FnStart); +} + +void ARMELFStreamer::EmitFnEnd() { + assert(FnStart && ".fnstart must preceeds .fnend"); + + // Emit unwind opcodes if there is no .handlerdata directive + int PersonalityIndex = -1; + if (!ExTab && !CantUnwind) { + // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab. + SwitchToExTabSection(*FnStart); + + // Create .ARM.extab label for offset in .ARM.exidx + ExTab = getContext().CreateTempSymbol(); + EmitLabel(ExTab); + + PersonalityIndex = 1; + + uint32_t Entry = 0; + uint32_t NumExtraEntryWords = 0; + Entry |= NumExtraEntryWords << 24; + Entry |= (EHT_COMPACT | PersonalityIndex) << 16; + + // TODO: This should be generated according to .save, .vsave, .setfp + // directives. Currently, we are simply generating FINISH opcode. + Entry |= UNWIND_OPCODE_FINISH << 8; + Entry |= UNWIND_OPCODE_FINISH; + + EmitIntValue(Entry, 4, 0); + } + + // Emit the exception index table entry + SwitchToExIdxSection(*FnStart); + + if (PersonalityIndex == 1) + EmitPersonalityFixup("__aeabi_unwind_cpp_pr1"); + + const MCSymbolRefExpr *FnStartRef = + MCSymbolRefExpr::Create(FnStart, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); + + EmitValue(FnStartRef, 4, 0); + + if (CantUnwind) { + EmitIntValue(EXIDX_CANTUNWIND, 4, 0); + } else { + const MCSymbolRefExpr *ExTabEntryRef = + MCSymbolRefExpr::Create(ExTab, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); + EmitValue(ExTabEntryRef, 4, 0); + } + + // Clean exception handling frame information + Reset(); +} + +void ARMELFStreamer::EmitCantUnwind() { + CantUnwind = true; +} + +void ARMELFStreamer::EmitHandlerData() { + SwitchToExTabSection(*FnStart); + + // Create .ARM.extab label for offset in .ARM.exidx + assert(!ExTab); + ExTab = getContext().CreateTempSymbol(); + EmitLabel(ExTab); + + // Emit Personality + assert(Personality && ".personality directive must preceed .handlerdata"); + + const MCSymbolRefExpr *PersonalityRef = + MCSymbolRefExpr::Create(Personality, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); + + EmitValue(PersonalityRef, 4, 0); + + // Emit unwind opcodes + uint32_t Entry = 0; + uint32_t NumExtraEntryWords = 0; + + // TODO: This should be generated according to .save, .vsave, .setfp + // directives. Currently, we are simply generating FINISH opcode. + Entry |= NumExtraEntryWords << 24; + Entry |= UNWIND_OPCODE_FINISH << 16; + Entry |= UNWIND_OPCODE_FINISH << 8; + Entry |= UNWIND_OPCODE_FINISH; + + EmitIntValue(Entry, 4, 0); +} + +void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) { + Personality = Per; +} + +void ARMELFStreamer::EmitSetFP(unsigned NewFpReg, + unsigned NewSpReg, + int64_t Offset) { + // TODO: Not implemented +} + +void ARMELFStreamer::EmitPad(int64_t Offset) { + // TODO: Not implemented +} + +void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool IsVector) { + // TODO: Not implemented +} + namespace llvm { MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index b404e6c..cd4067a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -64,6 +64,9 @@ public: return getSubExpr()->FindAssociatedSection(); } + // There are no TLS ARMMCExprs at the moment. + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index f4958f3..f09fb5a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -11,11 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "ARMMCTargetDesc.h" #include "ARMBaseInfo.h" #include "ARMELFStreamer.h" #include "ARMMCAsmInfo.h" +#include "ARMMCTargetDesc.h" #include "InstPrinter/ARMInstPrinter.h" +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -37,6 +38,8 @@ using namespace llvm; std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { + Triple triple(TT); + // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. unsigned Len = TT.size(); @@ -119,6 +122,13 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { ARMArchFeature += ",+thumb-mode"; } + if (triple.isOSNaCl()) { + if (ARMArchFeature.empty()) + ARMArchFeature = "+nacl-trap"; + else + ARMArchFeature += ",+nacl-trap"; + } + return ARMArchFeature; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h new file mode 100644 index 0000000..dad5576 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h @@ -0,0 +1,112 @@ +//===-- ARMUnwindOp.h - ARM Unwind Opcodes ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the constants for the ARM unwind opcodes and exception +// handling table entry kinds. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_UNWIND_OP_H +#define ARM_UNWIND_OP_H + +namespace llvm { + + /// ARM exception handling table entry kinds + enum ARMEHTEntryKind { + EHT_GENERIC = 0x00, + EHT_COMPACT = 0x80 + }; + + enum { + /// Special entry for the function never unwind + EXIDX_CANTUNWIND = 0x1 + }; + + /// ARM-defined frame unwinding opcodes + enum ARMUnwindOpcodes { + // Format: 00xxxxxx + // Purpose: vsp = vsp + ((x << 2) + 4) + UNWIND_OPCODE_INC_VSP = 0x00, + + // Format: 01xxxxxx + // Purpose: vsp = vsp - ((x << 2) + 4) + UNWIND_OPCODE_DEC_VSP = 0x40, + + // Format: 10000000 00000000 + // Purpose: refuse to unwind + UNWIND_OPCODE_REFUSE = 0x8000, + + // Format: 1000xxxx xxxxxxxx + // Purpose: pop r[15:12], r[11:4] + // Constraint: x != 0 + UNWIND_OPCODE_POP_REG_MASK_R4 = 0x8000, + + // Format: 1001xxxx + // Purpose: vsp = r[x] + // Constraint: x != 13 && x != 15 + UNWIND_OPCODE_SET_VSP = 0x90, + + // Format: 10100xxx + // Purpose: pop r[(4+x):4] + UNWIND_OPCODE_POP_REG_RANGE_R4 = 0xa0, + + // Format: 10101xxx + // Purpose: pop r14, r[(4+x):4] + UNWIND_OPCODE_POP_REG_RANGE_R4_R14 = 0xa8, + + // Format: 10110000 + // Purpose: finish + UNWIND_OPCODE_FINISH = 0xb0, + + // Format: 10110001 0000xxxx + // Purpose: pop r[3:0] + // Constraint: x != 0 + UNWIND_OPCODE_POP_REG_MASK = 0xb100, + + // Format: 10110010 x(uleb128) + // Purpose: vsp = vsp + ((x << 2) + 0x204) + UNWIND_OPCODE_INC_VSP_ULEB128 = 0xb2, + + // Format: 10110011 xxxxyyyy + // Purpose: pop d[(x+y):x] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX = 0xb300, + + // Format: 10111xxx + // Purpose: pop d[(8+x):8] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX_D8 = 0xb8, + + // Format: 11000xxx + // Purpose: pop wR[(10+x):10] + UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE_WR10 = 0xc0, + + // Format: 11000110 xxxxyyyy + // Purpose: pop wR[(x+y):x] + UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE = 0xc600, + + // Format: 11000111 0000xxxx + // Purpose: pop wCGR[3:0] + // Constraint: x != 0 + UNWIND_OPCODE_POP_WIRELESS_MMX_REG_MASK = 0xc700, + + // Format: 11001000 xxxxyyyy + // Purpose: pop d[(16+x+y):(16+x)] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 = 0xc800, + + // Format: 11001001 xxxxyyyy + // Purpose: pop d[(x+y):x] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD = 0xc900, + + // Format: 11010xxx + // Purpose: pop d[(8+x):8] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0 + }; + +} + +#endif // ARM_UNWIND_OP_H diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 123ada6..2c3388c 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -43,6 +43,41 @@ emitSPUpdate(MachineBasicBlock &MBB, MRI, MIFlags); } + +void Thumb1FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const Thumb1InstrInfo &TII = + *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); + const Thumb1RegisterInfo *RegInfo = + static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); + } else { + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); + } + } + } + MBB.erase(I); +} + void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -124,14 +159,17 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + bool HasFP = hasFP(MF); + if (HasFP) + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; // Adjust FP so it point to the stack slot that contains the previous FP. - if (hasFP(MF)) { + if (HasFP) { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) .setMIFlags(MachineInstr::FrameSetup)); @@ -146,7 +184,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); - if (STI.isTargetELF() && hasFP(MF)) + if (STI.isTargetELF() && HasFP) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index bcfc516..5a300af 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -45,6 +45,10 @@ public: const TargetRegisterInfo *TRI) const; bool hasReservedCallFrame(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 57cc7d8..609d502 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -296,47 +296,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, } } -static void emitSPUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo &TII, DebugLoc dl, - const Thumb1RegisterInfo &MRI, - int NumBytes) { - emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, - MRI); -} - -void Thumb1RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - emitSPUpdate(MBB, I, TII, dl, *this, -Amount); - } else { - assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(MBB, I, TII, dl, *this, Amount); - } - } - } - MBB.erase(I); -} - /// emitThumbConstant - Emit a series of instructions to materialize a /// constant. static void emitThumbConstant(MachineBasicBlock &MBB, @@ -593,9 +552,9 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { unsigned VReg = 0; - unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -603,13 +562,8 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, DebugLoc dl = MI.getDebugLoc(); MachineInstrBuilder MIB(*MBB.getParent(), &MI); - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - unsigned FrameReg = ARM::SP; - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + MF.getFrameInfo()->getStackSize() + SPAdj; @@ -646,15 +600,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special handling of dbg_value instructions. if (MI.isDebugValue()) { - MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); return; } // Modify MI as necessary to handle as much of 'Offset' as possible assert(AFI->isThumbFunction() && "This eliminateFrameIndex only supports Thumb1!"); - if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII)) + if (rewriteFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) return; // If we get here, the immediate doesn't fit into the instruction. We folded @@ -687,11 +641,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true); if (UseRR) // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. - MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); + MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, + false); } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); bool UseRR = false; @@ -708,11 +663,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII, *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); - MI.getOperand(i).ChangeToRegister(VReg, false, false, true); + MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true); if (UseRR) // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. - MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); + MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, + false); } else { llvm_unreachable("Unexpected opcode!"); } diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index f2e4b08..ebbab36 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -43,11 +43,6 @@ public: unsigned PredReg = 0, unsigned MIFlags = MachineInstr::NoFlags) const; - /// Code Generation virtual methods... - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be // however much remains to be handled. Return 'true' if no further // work is required. @@ -62,7 +57,8 @@ public: const TargetRegisterClass *RC, unsigned Reg) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; }; } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index f468861..604abf9 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -470,18 +470,19 @@ void CppWriter::printAttributes(const AttributeSet &PAL, nl(Out); if (!PAL.isEmpty()) { Out << '{'; in(); nl(Out); - Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out); - Out << "AttributeWithIndex PAWI;"; nl(Out); + Out << "SmallVector<AttributeSet, 4> Attrs;"; nl(Out); + Out << "AttributeSet PAS;"; in(); nl(Out); for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { - unsigned index = PAL.getSlot(i).Index; - AttrBuilder attrs(PAL.getSlot(i).Attrs); - Out << "PAWI.Index = " << index << "U;\n"; - Out << " {\n AttrBuilder B;\n"; - -#define HANDLE_ATTR(X) \ - if (attrs.contains(Attribute::X)) \ - Out << " B.addAttribute(Attribute::" #X ");\n"; \ - attrs.removeAttribute(Attribute::X); + unsigned index = PAL.getSlotIndex(i); + AttrBuilder attrs(PAL.getSlotAttributes(i), index); + Out << "{"; in(); nl(Out); + Out << "AttrBuilder B;"; nl(Out); + +#define HANDLE_ATTR(X) \ + if (attrs.contains(Attribute::X)) { \ + Out << "B.addAttribute(Attribute::" #X ");"; nl(Out); \ + attrs.removeAttribute(Attribute::X); \ + } HANDLE_ATTR(SExt); HANDLE_ATTR(ZExt); @@ -499,6 +500,7 @@ void CppWriter::printAttributes(const AttributeSet &PAL, HANDLE_ATTR(OptimizeForSize); HANDLE_ATTR(StackProtect); HANDLE_ATTR(StackProtectReq); + HANDLE_ATTR(StackProtectStrong); HANDLE_ATTR(NoCapture); HANDLE_ATTR(NoRedZone); HANDLE_ATTR(NoImplicitFloat); @@ -509,14 +511,23 @@ void CppWriter::printAttributes(const AttributeSet &PAL, HANDLE_ATTR(NonLazyBind); HANDLE_ATTR(MinSize); #undef HANDLE_ATTR - if (attrs.contains(Attribute::StackAlignment)) - Out << " B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")\n"; - attrs.removeAttribute(Attribute::StackAlignment); + + if (attrs.contains(Attribute::StackAlignment)) { + Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment()<<')'; + nl(Out); + attrs.removeAttribute(Attribute::StackAlignment); + } + assert(!attrs.hasAttributes() && "Unhandled attribute!"); - Out << " PAWI.Attrs = Attribute::get(mod->getContext(), B);\n }"; - nl(Out); - Out << "Attrs.push_back(PAWI);"; + Out << "PAS = AttributeSet::get(mod->getContext(), "; + if (index == ~0U) + Out << "~0U,"; + else + Out << index << "U,"; + Out << " B);"; out(); nl(Out); + Out << "}"; out(); nl(Out); nl(Out); + Out << "Attrs.push_back(PAS);"; nl(Out); } Out << name << "_PAL = AttributeSet::get(mod->getContext(), Attrs);"; nl(Out); @@ -1888,23 +1899,24 @@ void CppWriter::printModuleBody() { void CppWriter::printProgram(const std::string& fname, const std::string& mName) { - Out << "#include <llvm/LLVMContext.h>\n"; - Out << "#include <llvm/Module.h>\n"; - Out << "#include <llvm/DerivedTypes.h>\n"; - Out << "#include <llvm/Constants.h>\n"; - Out << "#include <llvm/GlobalVariable.h>\n"; - Out << "#include <llvm/Function.h>\n"; - Out << "#include <llvm/CallingConv.h>\n"; - Out << "#include <llvm/BasicBlock.h>\n"; - Out << "#include <llvm/Instructions.h>\n"; - Out << "#include <llvm/InlineAsm.h>\n"; - Out << "#include <llvm/Support/FormattedStream.h>\n"; - Out << "#include <llvm/Support/MathExtras.h>\n"; Out << "#include <llvm/Pass.h>\n"; Out << "#include <llvm/PassManager.h>\n"; + Out << "#include <llvm/ADT/SmallVector.h>\n"; Out << "#include <llvm/Analysis/Verifier.h>\n"; Out << "#include <llvm/Assembly/PrintModulePass.h>\n"; + Out << "#include <llvm/IR/BasicBlock.h>\n"; + Out << "#include <llvm/IR/CallingConv.h>\n"; + Out << "#include <llvm/IR/Constants.h>\n"; + Out << "#include <llvm/IR/DerivedTypes.h>\n"; + Out << "#include <llvm/IR/Function.h>\n"; + Out << "#include <llvm/IR/GlobalVariable.h>\n"; + Out << "#include <llvm/IR/InlineAsm.h>\n"; + Out << "#include <llvm/IR/Instructions.h>\n"; + Out << "#include <llvm/IR/LLVMContext.h>\n"; + Out << "#include <llvm/IR/Module.h>\n"; + Out << "#include <llvm/Support/FormattedStream.h>\n"; + Out << "#include <llvm/Support/MathExtras.h>\n"; Out << "#include <algorithm>\n"; Out << "using namespace llvm;\n\n"; Out << "Module* " << fname << "();\n\n"; diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index aee43ba..b5b887e 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_target(HexagonCodeGen HexagonExpandPredSpillCode.cpp HexagonFrameLowering.cpp HexagonHardwareLoops.cpp + HexagonFixupHwLoops.cpp HexagonMachineScheduler.cpp HexagonMCInstLower.cpp HexagonInstrInfo.cpp diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 45f857b..dfbefc8 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -21,14 +21,16 @@ namespace llvm { class FunctionPass; + class ModulePass; class TargetMachine; class MachineInstr; - class MCInst; + class HexagonMCInst; class HexagonAsmPrinter; class HexagonTargetMachine; class raw_ostream; - FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM); + FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel); FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM); FunctionPass *createHexagonFPMoverPass(TargetMachine &TM); FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM); @@ -53,7 +55,7 @@ namespace llvm { TargetAsmBackend *createHexagonAsmBackend(const Target &, const std::string &); */ - void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI, + void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI, HexagonAsmPrinter &AP); } // end namespace llvm; diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 58b89d1..88cd3fb 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -14,12 +14,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" -#include "HexagonAsmPrinter.h" #include "Hexagon.h" -#include "HexagonMCInst.h" +#include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" -#include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "MCTargetDesc/HexagonMCInst.h" #include "InstPrinter/HexagonInstPrinter.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -220,8 +220,8 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!"); for (unsigned Index = 0; Index < Size; Index++) { HexagonMCInst MCI; - MCI.setStartPacket(Index == 0); - MCI.setEndPacket(Index == (Size-1)); + MCI.setPacketStart(Index == 0); + MCI.setPacketEnd(Index == (Size-1)); HexagonLowerToMC(BundleMIs[Index], MCI, *this); OutStreamer.EmitInstruction(MCI); @@ -230,8 +230,8 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { else { HexagonMCInst MCI; if (MI->getOpcode() == Hexagon::ENDLOOP0) { - MCI.setStartPacket(true); - MCI.setEndPacket(true); + MCI.setPacketStart(true); + MCI.setPacketEnd(true); } HexagonLowerToMC(MI, MCI, *this); OutStreamer.EmitInstruction(MCI); diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp new file mode 100644 index 0000000..240cc95 --- /dev/null +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -0,0 +1,183 @@ +//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// The loop start address in the LOOPn instruction is encoded as a distance +// from the LOOPn instruction itself. If the start address is too far from +// the LOOPn instruction, the loop needs to be set up manually, i.e. via +// direct transfers to SAn and LCn. +// This pass will identify and convert such LOOPn instructions to a proper +// form. +//===----------------------------------------------------------------------===// + + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/PassSupport.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +namespace llvm { + void initializeHexagonFixupHwLoopsPass(PassRegistry&); +} + +namespace { + struct HexagonFixupHwLoops : public MachineFunctionPass { + public: + static char ID; + + HexagonFixupHwLoops() : MachineFunctionPass(ID) { + initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// \brief Maximum distance between the loop instr and the basic block. + /// Just an estimate. + static const unsigned MAX_LOOP_DISTANCE = 200; + + /// \brief Check the offset between each loop instruction and + /// the loop basic block to determine if we can use the LOOP instruction + /// or if we need to set the LC/SA registers explicitly. + bool fixupLoopInstrs(MachineFunction &MF); + + /// \brief Add the instruction to set the LC and SA registers explicitly. + void convertLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII, + RegScavenger &RS); + + }; + + char HexagonFixupHwLoops::ID = 0; +} + +INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup", + "Hexagon Hardware Loops Fixup", false, false) + +FunctionPass *llvm::createHexagonFixupHwLoops() { + return new HexagonFixupHwLoops(); +} + + +/// \brief Returns true if the instruction is a hardware loop instruction. +static bool isHardwareLoop(const MachineInstr *MI) { + return MI->getOpcode() == Hexagon::LOOP0_r || + MI->getOpcode() == Hexagon::LOOP0_i; +} + + +bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { + bool Changed = fixupLoopInstrs(MF); + return Changed; +} + + +/// \brief For Hexagon, if the loop label is to far from the +/// loop instruction then we need to set the LC0 and SA0 registers +/// explicitly instead of using LOOP(start,count). This function +/// checks the distance, and generates register assignments if needed. +/// +/// This function makes two passes over the basic blocks. The first +/// pass computes the offset of the basic block from the start. +/// The second pass checks all the loop instructions. +bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { + + // Offset of the current instruction from the start. + unsigned InstOffset = 0; + // Map for each basic block to it's first instruction. + DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset; + + // First pass - compute the offset of each basic block. + for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); + MBB != MBBe; ++MBB) { + BlockToInstOffset[MBB] = InstOffset; + InstOffset += (MBB->size() * 4); + } + + // Second pass - check each loop instruction to see if it needs to + // be converted. + InstOffset = 0; + bool Changed = false; + RegScavenger RS; + + // Loop over all the basic blocks. + for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); + MBB != MBBe; ++MBB) { + InstOffset = BlockToInstOffset[MBB]; + RS.enterBasicBlock(MBB); + + // Loop over all the instructions. + MachineBasicBlock::iterator MIE = MBB->end(); + MachineBasicBlock::iterator MII = MBB->begin(); + while (MII != MIE) { + if (isHardwareLoop(MII)) { + RS.forward(MII); + assert(MII->getOperand(0).isMBB() && + "Expect a basic block as loop operand"); + int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; + unsigned Dist = Sub > 0 ? Sub : -Sub; + if (Dist > MAX_LOOP_DISTANCE) { + // Convert to explicity setting LC0 and SA0. + convertLoopInstr(MF, MII, RS); + MII = MBB->erase(MII); + Changed = true; + } else { + ++MII; + } + } else { + ++MII; + } + InstOffset += 4; + } + } + + return Changed; +} + + +/// \brief convert a loop instruction to a sequence of instructions that +/// set the LC0 and SA0 register explicitly. +void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII, + RegScavenger &RS) { + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + MachineBasicBlock *MBB = MII->getParent(); + DebugLoc DL = MII->getDebugLoc(); + unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0); + + // First, set the LC0 with the trip count. + if (MII->getOperand(1).isReg()) { + // Trip count is a register + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + .addReg(MII->getOperand(1).getReg()); + } else { + // Trip count is an immediate. + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch) + .addImm(MII->getOperand(1).getImm()); + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + .addReg(Scratch); + } + // Then, set the SA0 with the loop start address. + BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch) + .addMBB(MII->getOperand(0).getMBB()); + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0) + .addReg(Scratch); +} diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 9043cf9..d6a9329 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -327,6 +327,21 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters( return true; } +void HexagonFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + MachineInstr &MI = *I; + + if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) { + // Hexagon_TODO: add code + } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) { + // Hexagon_TODO: add code + } else { + llvm_unreachable("Cannot handle this call frame pseudo instruction"); + } + MBB.erase(I); +} + int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { return MF.getFrameInfo()->getObjectOffset(FI); diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index ad87f11..a62c76a 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -35,6 +35,11 @@ public: MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 2a00a9f..62aed13 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -27,9 +27,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hwloops" -#include "Hexagon.h" -#include "HexagonTargetMachine.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -37,79 +35,194 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/Constants.h" #include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + #include <algorithm> +#include <vector> using namespace llvm; +#ifndef NDEBUG +static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1)); +#endif + STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); +namespace llvm { + void initializeHexagonHardwareLoopsPass(PassRegistry&); +} + namespace { class CountValue; struct HexagonHardwareLoops : public MachineFunctionPass { - MachineLoopInfo *MLI; - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; + MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + MachineDominatorTree *MDT; + const HexagonTargetMachine *TM; + const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; +#ifndef NDEBUG + static int Counter; +#endif public: - static char ID; // Pass identification, replacement for typeid + static char ID; - HexagonHardwareLoops() : MachineFunctionPass(ID) {} + HexagonHardwareLoops() : MachineFunctionPass(ID) { + initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); const char *getPassName() const { return "Hexagon Hardware Loops"; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } private: - /// getCanonicalInductionVariable - Check to see if the loop has a canonical - /// induction variable. - /// Should be defined in MachineLoop. Based upon version in class Loop. - const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const; - - /// getTripCount - Return a loop-invariant LLVM register indicating the - /// number of times the loop will be executed. If the trip-count cannot - /// be determined, this return null. - CountValue *getTripCount(MachineLoop *L) const; - - /// isInductionOperation - Return true if the instruction matches the - /// pattern for an opertion that defines an induction variable. - bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const; + /// Kinds of comparisons in the compare instructions. + struct Comparison { + enum Kind { + EQ = 0x01, + NE = 0x02, + L = 0x04, // Less-than property. + G = 0x08, // Greater-than property. + U = 0x40, // Unsigned property. + LTs = L, + LEs = L | EQ, + GTs = G, + GEs = G | EQ, + LTu = L | U, + LEu = L | EQ | U, + GTu = G | U, + GEu = G | EQ | U + }; + + static Kind getSwappedComparison(Kind Cmp) { + assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator"); + if ((Cmp & L) || (Cmp & G)) + return (Kind)(Cmp ^ (L|G)); + return Cmp; + } + }; - /// isInvalidOperation - Return true if the instruction is not valid within - /// a hardware loop. + /// \brief Find the register that contains the loop controlling + /// induction variable. + /// If successful, it will return true and set the \p Reg, \p IVBump + /// and \p IVOp arguments. Otherwise it will return false. + /// The returned induction register is the register R that follows the + /// following induction pattern: + /// loop: + /// R = phi ..., [ R.next, LatchBlock ] + /// R.next = R + #bump + /// if (R.next < #N) goto loop + /// IVBump is the immediate value added to R, and IVOp is the instruction + /// "R.next = R + #bump". + bool findInductionRegister(MachineLoop *L, unsigned &Reg, + int64_t &IVBump, MachineInstr *&IVOp) const; + + /// \brief Analyze the statements in a loop to determine if the loop + /// has a computable trip count and, if so, return a value that represents + /// the trip count expression. + CountValue *getLoopTripCount(MachineLoop *L, + SmallVector<MachineInstr*, 2> &OldInsts); + + /// \brief Return the expression that represents the number of times + /// a loop iterates. The function takes the operands that represent the + /// loop start value, loop end value, and induction value. Based upon + /// these operands, the function attempts to compute the trip count. + /// If the trip count is not directly available (as an immediate value, + /// or a register), the function will attempt to insert computation of it + /// to the loop's preheader. + CountValue *computeCount(MachineLoop *Loop, + const MachineOperand *Start, + const MachineOperand *End, + unsigned IVReg, + int64_t IVBump, + Comparison::Kind Cmp) const; + + /// \brief Return true if the instruction is not valid within a hardware + /// loop. bool isInvalidLoopOperation(const MachineInstr *MI) const; - /// containsInavlidInstruction - Return true if the loop contains an - /// instruction that inhibits using the hardware loop. + /// \brief Return true if the loop contains an instruction that inhibits + /// using the hardware loop. bool containsInvalidInstruction(MachineLoop *L) const; - /// converToHardwareLoop - Given a loop, check if we can convert it to a - /// hardware loop. If so, then perform the conversion and return true. + /// \brief Given a loop, check if we can convert it to a hardware loop. + /// If so, then perform the conversion and return true. bool convertToHardwareLoop(MachineLoop *L); + /// \brief Return true if the instruction is now dead. + bool isDead(const MachineInstr *MI, + SmallVector<MachineInstr*, 1> &DeadPhis) const; + + /// \brief Remove the instruction if it is now dead. + void removeIfDead(MachineInstr *MI); + + /// \brief Make sure that the "bump" instruction executes before the + /// compare. We need that for the IV fixup, so that the compare + /// instruction would not use a bumped value that has not yet been + /// defined. If the instructions are out of order, try to reorder them. + bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); + + /// \brief Get the instruction that loads an immediate value into \p R, + /// or 0 if such an instruction does not exist. + MachineInstr *defWithImmediate(unsigned R); + + /// \brief Get the immediate value referenced to by \p MO, either for + /// immediate operands, or for register operands, where the register + /// was defined with an immediate value. + int64_t getImmediate(MachineOperand &MO); + + /// \brief Reset the given machine operand to now refer to a new immediate + /// value. Assumes that the operand was already referencing an immediate + /// value, either directly, or via a register. + void setImmediate(MachineOperand &MO, int64_t Val); + + /// \brief Fix the data flow of the induction varible. + /// The desired flow is: phi ---> bump -+-> comparison-in-latch. + /// | + /// +-> back to phi + /// where "bump" is the increment of the induction variable: + /// iv = iv + #const. + /// Due to some prior code transformations, the actual flow may look + /// like this: + /// phi -+-> bump ---> back to phi + /// | + /// +-> comparison-in-latch (against upper_bound-bump), + /// i.e. the comparison that controls the loop execution may be using + /// the value of the induction variable from before the increment. + /// + /// Return true if the loop's flow is the desired one (i.e. it's + /// either been fixed, or no fixing was necessary). + /// Otherwise, return false. This can happen if the induction variable + /// couldn't be identified, or if the value in the latch's comparison + /// cannot be adjusted to reflect the post-bump value. + bool fixupInductionVariable(MachineLoop *L); + + /// \brief Given a loop, if it does not have a preheader, create one. + /// Return the block that is the preheader. + MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); }; char HexagonHardwareLoops::ID = 0; +#ifndef NDEBUG + int HexagonHardwareLoops::Counter = 0; +#endif - - // CountValue class - Abstraction for a trip count of a loop. A - // smaller vesrsion of the MachineOperand class without the concerns - // of changing the operand representation. + /// \brief Abstraction for a trip count of a loop. A smaller vesrsion + /// of the MachineOperand class without the concerns of changing the + /// operand representation. class CountValue { public: enum CountValueType { @@ -119,101 +232,62 @@ namespace { private: CountValueType Kind; union Values { - unsigned RegNum; - int64_t ImmVal; - Values(unsigned r) : RegNum(r) {} - Values(int64_t i) : ImmVal(i) {} + struct { + unsigned Reg; + unsigned Sub; + } R; + unsigned ImmVal; } Contents; - bool isNegative; public: - CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r), - isNegative(neg) {} - explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i), - isNegative(i < 0) {} - CountValueType getType() const { return Kind; } + explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) { + Kind = t; + if (Kind == CV_Register) { + Contents.R.Reg = v; + Contents.R.Sub = u; + } else { + Contents.ImmVal = v; + } + } bool isReg() const { return Kind == CV_Register; } bool isImm() const { return Kind == CV_Immediate; } - bool isNeg() const { return isNegative; } unsigned getReg() const { assert(isReg() && "Wrong CountValue accessor"); - return Contents.RegNum; + return Contents.R.Reg; } - void setReg(unsigned Val) { - Contents.RegNum = Val; + unsigned getSubReg() const { + assert(isReg() && "Wrong CountValue accessor"); + return Contents.R.Sub; } - int64_t getImm() const { + unsigned getImm() const { assert(isImm() && "Wrong CountValue accessor"); - if (isNegative) { - return -Contents.ImmVal; - } return Contents.ImmVal; } - void setImm(int64_t Val) { - Contents.ImmVal = Val; - } void print(raw_ostream &OS, const TargetMachine *TM = 0) const { - if (isReg()) { OS << PrintReg(getReg()); } - if (isImm()) { OS << getImm(); } - } - }; - - struct HexagonFixupHwLoops : public MachineFunctionPass { - public: - static char ID; // Pass identification, replacement for typeid. - - HexagonFixupHwLoops() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); + const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0; + if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } + if (isImm()) { OS << Contents.ImmVal; } } - - private: - /// Maximum distance between the loop instr and the basic block. - /// Just an estimate. - static const unsigned MAX_LOOP_DISTANCE = 200; - - /// fixupLoopInstrs - Check the offset between each loop instruction and - /// the loop basic block to determine if we can use the LOOP instruction - /// or if we need to set the LC/SA registers explicitly. - bool fixupLoopInstrs(MachineFunction &MF); - - /// convertLoopInstr - Add the instruction to set the LC and SA registers - /// explicitly. - void convertLoopInstr(MachineFunction &MF, - MachineBasicBlock::iterator &MII, - RegScavenger &RS); - }; +} // end anonymous namespace - char HexagonFixupHwLoops::ID = 0; -} // end anonymous namespace +INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", + "Hexagon Hardware Loops", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", + "Hexagon Hardware Loops", false, false) -/// isHardwareLoop - Returns true if the instruction is a hardware loop -/// instruction. +/// \brief Returns true if the instruction is a hardware loop instruction. static bool isHardwareLoop(const MachineInstr *MI) { return MI->getOpcode() == Hexagon::LOOP0_r || MI->getOpcode() == Hexagon::LOOP0_i; } -/// isCompareEquals - Returns true if the instruction is a compare equals -/// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI) { - return MI->getOpcode() == Hexagon::CMPEQri; -} - - -/// createHexagonHardwareLoops - Factory for creating -/// the hardware loop phase. FunctionPass *llvm::createHexagonHardwareLoops() { return new HexagonHardwareLoops(); } @@ -224,45 +298,149 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - // get the loop information MLI = &getAnalysis<MachineLoopInfo>(); - // get the register information MRI = &MF.getRegInfo(); - // the target specific instructio info. - TII = MF.getTarget().getInstrInfo(); + MDT = &getAnalysis<MachineDominatorTree>(); + TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget()); + TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo()); + TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo()); for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) { MachineLoop *L = *I; - if (!L->getParentLoop()) { + if (!L->getParentLoop()) Changed |= convertToHardwareLoop(L); - } } return Changed; } -/// getCanonicalInductionVariable - Check to see if the loop has a canonical -/// induction variable. We check for a simple recurrence pattern - an -/// integer recurrence that decrements by one each time through the loop and -/// ends at zero. If so, return the phi node that corresponds to it. -/// -/// Based upon the similar code in LoopInfo except this code is specific to -/// the machine. -/// This method assumes that the IndVarSimplify pass has been run by 'opt'. + +bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, + unsigned &Reg, + int64_t &IVBump, + MachineInstr *&IVOp + ) const { + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + if (!Header || !Preheader || !Latch) + return false; + + // This pair represents an induction register together with an immediate + // value that will be added to it in each loop iteration. + typedef std::pair<unsigned,int64_t> RegisterBump; + + // Mapping: R.next -> (R, bump), where R, R.next and bump are derived + // from an induction operation + // R.next = R + bump + // where bump is an immediate value. + typedef std::map<unsigned,RegisterBump> InductionMap; + + InductionMap IndMap; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *Phi = &*I; + + // Have a PHI instruction. Get the operand that corresponds to the + // latch block, and see if is a result of an addition of form "reg+imm", + // where the "reg" is defined by the PHI node we are looking at. + for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { + if (Phi->getOperand(i+1).getMBB() != Latch) + continue; + + unsigned PhiOpReg = Phi->getOperand(i).getReg(); + MachineInstr *DI = MRI->getVRegDef(PhiOpReg); + unsigned UpdOpc = DI->getOpcode(); + bool isAdd = (UpdOpc == Hexagon::ADD_ri); + + if (isAdd) { + // If the register operand to the add is the PHI we're + // looking at, this meets the induction pattern. + unsigned IndReg = DI->getOperand(1).getReg(); + if (MRI->getVRegDef(IndReg) == Phi) { + unsigned UpdReg = DI->getOperand(0).getReg(); + int64_t V = DI->getOperand(2).getImm(); + IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); + } + } + } // for (i) + } // for (instr) + + SmallVector<MachineOperand,2> Cond; + MachineBasicBlock *TB = 0, *FB = 0; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + if (NotAnalyzed) + return false; + + unsigned CSz = Cond.size(); + assert (CSz == 1 || CSz == 2); + unsigned PredR = Cond[CSz-1].getReg(); + + MachineInstr *PredI = MRI->getVRegDef(PredR); + if (!PredI->isCompare()) + return false; + + unsigned CmpReg1 = 0, CmpReg2 = 0; + int CmpImm = 0, CmpMask = 0; + bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2, + CmpMask, CmpImm); + // Fail if the compare was not analyzed, or it's not comparing a register + // with an immediate value. Not checking the mask here, since we handle + // the individual compare opcodes (including CMPb) later on. + if (!CmpAnalyzed) + return false; + + // Exactly one of the input registers to the comparison should be among + // the induction registers. + InductionMap::iterator IndMapEnd = IndMap.end(); + InductionMap::iterator F = IndMapEnd; + if (CmpReg1 != 0) { + InductionMap::iterator F1 = IndMap.find(CmpReg1); + if (F1 != IndMapEnd) + F = F1; + } + if (CmpReg2 != 0) { + InductionMap::iterator F2 = IndMap.find(CmpReg2); + if (F2 != IndMapEnd) { + if (F != IndMapEnd) + return false; + F = F2; + } + } + if (F == IndMapEnd) + return false; + + Reg = F->second.first; + IVBump = F->second.second; + IVOp = MRI->getVRegDef(F->first); + return true; +} + + +/// \brief Analyze the statements in a loop to determine if the loop has +/// a computable trip count and, if so, return a value that represents +/// the trip count expression. /// -const MachineInstr -*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const { +/// This function iterates over the phi nodes in the loop to check for +/// induction variable patterns that are used in the calculation for +/// the number of time the loop is executed. +CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, + SmallVector<MachineInstr*, 2> &OldInsts) { MachineBasicBlock *TopMBB = L->getTopBlock(); MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); assert(PI != TopMBB->pred_end() && "Loop must have more than one incoming edge!"); MachineBasicBlock *Backedge = *PI++; - if (PI == TopMBB->pred_end()) return 0; // dead loop + if (PI == TopMBB->pred_end()) // dead loop? + return 0; MachineBasicBlock *Incoming = *PI++; - if (PI != TopMBB->pred_end()) return 0; // multiple backedges? + if (PI != TopMBB->pred_end()) // multiple backedges? + return 0; - // make sure there is one incoming and one backedge and determine which + // Make sure there is one incoming and one backedge and determine which // is which. if (L->contains(Incoming)) { if (L->contains(Backedge)) @@ -271,139 +449,433 @@ const MachineInstr } else if (!L->contains(Backedge)) return 0; - // Loop over all of the PHI nodes, looking for a canonical induction variable: - // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". - // - The recurrence comes from the backedge. - // - the definition is an induction operatio.n - for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end(); - I != E && I->isPHI(); ++I) { - const MachineInstr *MPhi = &*I; - unsigned DefReg = MPhi->getOperand(0).getReg(); - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { - // Check each operand for the value from the backedge. - MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB(); - if (L->contains(MBB)) { // operands comes from the backedge - // Check if the definition is an induction operation. - const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); - if (isInductionOperation(DI, DefReg)) { - return MPhi; - } - } + // Look for the cmp instruction to determine if we can get a useful trip + // count. The trip count can be either a register or an immediate. The + // location of the value depends upon the type (reg or imm). + MachineBasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return 0; + + unsigned IVReg = 0; + int64_t IVBump = 0; + MachineInstr *IVOp; + bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); + if (!FoundIV) + return 0; + + MachineBasicBlock *Preheader = L->getLoopPreheader(); + + MachineOperand *InitialValue = 0; + MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); + for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { + MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); + if (MBB == Preheader) + InitialValue = &IV_Phi->getOperand(i); + else if (MBB == Latch) + IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. + } + if (!InitialValue) + return 0; + + SmallVector<MachineOperand,2> Cond; + MachineBasicBlock *TB = 0, *FB = 0; + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + if (NotAnalyzed) + return 0; + + MachineBasicBlock *Header = L->getHeader(); + // TB must be non-null. If FB is also non-null, one of them must be + // the header. Otherwise, branch to TB could be exiting the loop, and + // the fall through can go to the header. + assert (TB && "Latch block without a branch?"); + assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); + if (!TB || (FB && TB != Header && FB != Header)) + return 0; + + // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch + // to put imm(0), followed by P in the vector Cond. + // If TB is not the header, it means that the "not-taken" path must lead + // to the header. + bool Negated = (Cond.size() > 1) ^ (TB != Header); + unsigned PredReg = Cond[Cond.size()-1].getReg(); + MachineInstr *CondI = MRI->getVRegDef(PredReg); + unsigned CondOpc = CondI->getOpcode(); + + unsigned CmpReg1 = 0, CmpReg2 = 0; + int Mask = 0, ImmValue = 0; + bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2, + Mask, ImmValue); + if (!AnalyzedCmp) + return 0; + + // The comparison operator type determines how we compute the loop + // trip count. + OldInsts.push_back(CondI); + OldInsts.push_back(IVOp); + + // Sadly, the following code gets information based on the position + // of the operands in the compare instruction. This has to be done + // this way, because the comparisons check for a specific relationship + // between the operands (e.g. is-less-than), rather than to find out + // what relationship the operands are in (as on PPC). + Comparison::Kind Cmp; + bool isSwapped = false; + const MachineOperand &Op1 = CondI->getOperand(1); + const MachineOperand &Op2 = CondI->getOperand(2); + const MachineOperand *EndValue = 0; + + if (Op1.isReg()) { + if (Op2.isImm() || Op1.getReg() == IVReg) + EndValue = &Op2; + else { + EndValue = &Op1; + isSwapped = true; } } - return 0; -} -/// getTripCount - Return a loop-invariant LLVM value indicating the -/// number of times the loop will be executed. The trip count can -/// be either a register or a constant value. If the trip-count -/// cannot be determined, this returns null. -/// -/// We find the trip count from the phi instruction that defines the -/// induction variable. We follow the links to the CMP instruction -/// to get the trip count. -/// -/// Based upon getTripCount in LoopInfo. -/// -CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const { - // Check that the loop has a induction variable. - const MachineInstr *IV_Inst = getCanonicalInductionVariable(L); - if (IV_Inst == 0) return 0; - - // Canonical loops will end with a 'cmpeq_ri IV, Imm', - // if Imm is 0, get the count from the PHI opnd - // if Imm is -M, than M is the count - // Otherwise, Imm is the count - const MachineOperand *IV_Opnd; - const MachineOperand *InitialValue; - if (!L->contains(IV_Inst->getOperand(2).getMBB())) { - InitialValue = &IV_Inst->getOperand(1); - IV_Opnd = &IV_Inst->getOperand(3); - } else { - InitialValue = &IV_Inst->getOperand(3); - IV_Opnd = &IV_Inst->getOperand(1); - } - - // Look for the cmp instruction to determine if we - // can get a useful trip count. The trip count can - // be either a register or an immediate. The location - // of the value depends upon the type (reg or imm). - for (MachineRegisterInfo::reg_iterator - RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); - RI != RE; ++RI) { - IV_Opnd = &RI.getOperand(); - const MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI)) { - const MachineOperand &MO = MI->getOperand(2); - assert(MO.isImm() && "IV Cmp Operand should be 0"); - int64_t ImmVal = MO.getImm(); - - const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); - assert(L->contains(IV_DefInstr->getParent()) && - "IV definition should occurs in loop"); - int64_t iv_value = IV_DefInstr->getOperand(2).getImm(); - - if (ImmVal == 0) { - // Make sure the induction variable changes by one on each iteration. - if (iv_value != 1 && iv_value != -1) { + if (!EndValue) + return 0; + + switch (CondOpc) { + case Hexagon::CMPEQri: + case Hexagon::CMPEQrr: + Cmp = !Negated ? Comparison::EQ : Comparison::NE; + break; + case Hexagon::CMPLTrr: + Cmp = !Negated ? Comparison::LTs : Comparison::GEs; + break; + case Hexagon::CMPLTUrr: + Cmp = !Negated ? Comparison::LTu : Comparison::GEu; + break; + case Hexagon::CMPGTUri: + case Hexagon::CMPGTUrr: + Cmp = !Negated ? Comparison::GTu : Comparison::LEu; + break; + case Hexagon::CMPGTri: + case Hexagon::CMPGTrr: + Cmp = !Negated ? Comparison::GTs : Comparison::LEs; + break; + // Very limited support for byte/halfword compares. + case Hexagon::CMPbEQri_V4: + case Hexagon::CMPhEQri_V4: { + if (IVBump != 1) + return 0; + + int64_t InitV, EndV; + // Since the comparisons are "ri", the EndValue should be an + // immediate. Check it just in case. + assert(EndValue->isImm() && "Unrecognized latch comparison"); + EndV = EndValue->getImm(); + // Allow InitialValue to be a register defined with an immediate. + if (InitialValue->isReg()) { + if (!defWithImmediate(InitialValue->getReg())) return 0; - } - return new CountValue(InitialValue->getReg(), iv_value > 0); + InitV = getImmediate(*InitialValue); } else { - assert(InitialValue->isReg() && "Expecting register for init value"); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg()); - if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) { - int64_t count = ImmVal - DefInstr->getOperand(1).getImm(); - if ((count % iv_value) != 0) { - return 0; - } - return new CountValue(count/iv_value); - } + assert(InitialValue->isImm()); + InitV = InitialValue->getImm(); + } + if (InitV >= EndV) + return 0; + if (CondOpc == Hexagon::CMPbEQri_V4) { + if (!isInt<8>(InitV) || !isInt<8>(EndV)) + return 0; + } else { // Hexagon::CMPhEQri_V4 + if (!isInt<16>(InitV) || !isInt<16>(EndV)) + return 0; } + Cmp = !Negated ? Comparison::EQ : Comparison::NE; + break; } + default: + return 0; } - return 0; + + if (isSwapped) + Cmp = Comparison::getSwappedComparison(Cmp); + + if (InitialValue->isReg()) { + unsigned R = InitialValue->getReg(); + MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); + if (!MDT->properlyDominates(DefBB, Header)) + return 0; + OldInsts.push_back(MRI->getVRegDef(R)); + } + if (EndValue->isReg()) { + unsigned R = EndValue->getReg(); + MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); + if (!MDT->properlyDominates(DefBB, Header)) + return 0; + } + + return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); } -/// isInductionOperation - return true if the operation is matches the -/// pattern that defines an induction variable: -/// add iv, c -/// -bool -HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI, - unsigned IVReg) const { - return (MI->getOpcode() == - Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg); +/// \brief Helper function that returns the expression that represents the +/// number of times a loop iterates. The function takes the operands that +/// represent the loop start value, loop end value, and induction value. +/// Based upon these operands, the function attempts to compute the trip count. +CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, + const MachineOperand *Start, + const MachineOperand *End, + unsigned IVReg, + int64_t IVBump, + Comparison::Kind Cmp) const { + // Cannot handle comparison EQ, i.e. while (A == B). + if (Cmp == Comparison::EQ) + return 0; + + // Check if either the start or end values are an assignment of an immediate. + // If so, use the immediate value rather than the register. + if (Start->isReg()) { + const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); + if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI) + Start = &StartValInstr->getOperand(1); + } + if (End->isReg()) { + const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); + if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI) + End = &EndValInstr->getOperand(1); + } + + assert (Start->isReg() || Start->isImm()); + assert (End->isReg() || End->isImm()); + + bool CmpLess = Cmp & Comparison::L; + bool CmpGreater = Cmp & Comparison::G; + bool CmpHasEqual = Cmp & Comparison::EQ; + + // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. + // If loop executes while iv is "less" with the iv value going down, then + // the iv must wrap. + if (CmpLess && IVBump < 0) + return 0; + // If loop executes while iv is "greater" with the iv value going up, then + // the iv must wrap. + if (CmpGreater && IVBump > 0) + return 0; + + if (Start->isImm() && End->isImm()) { + // Both, start and end are immediates. + int64_t StartV = Start->getImm(); + int64_t EndV = End->getImm(); + int64_t Dist = EndV - StartV; + if (Dist == 0) + return 0; + + bool Exact = (Dist % IVBump) == 0; + + if (Cmp == Comparison::NE) { + if (!Exact) + return 0; + if ((Dist < 0) ^ (IVBump < 0)) + return 0; + } + + // For comparisons that include the final value (i.e. include equality + // with the final value), we need to increase the distance by 1. + if (CmpHasEqual) + Dist = Dist > 0 ? Dist+1 : Dist-1; + + // assert (CmpLess => Dist > 0); + assert ((!CmpLess || Dist > 0) && "Loop should never iterate!"); + // assert (CmpGreater => Dist < 0); + assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!"); + + // "Normalized" distance, i.e. with the bump set to +-1. + int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump + : (-Dist + (-IVBump-1)) / (-IVBump); + assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); + + uint64_t Count = Dist1; + + if (Count > 0xFFFFFFFFULL) + return 0; + + return new CountValue(CountValue::CV_Immediate, Count); + } + + // A general case: Start and End are some values, but the actual + // iteration count may not be available. If it is not, insert + // a computation of it into the preheader. + + // If the induction variable bump is not a power of 2, quit. + // Othwerise we'd need a general integer division. + if (!isPowerOf2_64(abs(IVBump))) + return 0; + + MachineBasicBlock *PH = Loop->getLoopPreheader(); + assert (PH && "Should have a preheader by now"); + MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); + DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc() + : DebugLoc(); + + // If Start is an immediate and End is a register, the trip count + // will be "reg - imm". Hexagon's "subtract immediate" instruction + // is actually "reg + -imm". + + // If the loop IV is going downwards, i.e. if the bump is negative, + // then the iteration count (computed as End-Start) will need to be + // negated. To avoid the negation, just swap Start and End. + if (IVBump < 0) { + std::swap(Start, End); + IVBump = -IVBump; + } + // Cmp may now have a wrong direction, e.g. LEs may now be GEs. + // Signedness, and "including equality" are preserved. + + bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) + bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) + + int64_t StartV = 0, EndV = 0; + if (Start->isImm()) + StartV = Start->getImm(); + if (End->isImm()) + EndV = End->getImm(); + + int64_t AdjV = 0; + // To compute the iteration count, we would need this computation: + // Count = (End - Start + (IVBump-1)) / IVBump + // or, when CmpHasEqual: + // Count = (End - Start + (IVBump-1)+1) / IVBump + // The "IVBump-1" part is the adjustment (AdjV). We can avoid + // generating an instruction specifically to add it if we can adjust + // the immediate values for Start or End. + + if (CmpHasEqual) { + // Need to add 1 to the total iteration count. + if (Start->isImm()) + StartV--; + else if (End->isImm()) + EndV++; + else + AdjV += 1; + } + + if (Cmp != Comparison::NE) { + if (Start->isImm()) + StartV -= (IVBump-1); + else if (End->isImm()) + EndV += (IVBump-1); + else + AdjV += (IVBump-1); + } + + unsigned R = 0, SR = 0; + if (Start->isReg()) { + R = Start->getReg(); + SR = Start->getSubReg(); + } else { + R = End->getReg(); + SR = End->getSubReg(); + } + const TargetRegisterClass *RC = MRI->getRegClass(R); + // Hardware loops cannot handle 64-bit registers. If it's a double + // register, it has to have a subregister. + if (!SR && RC == &Hexagon::DoubleRegsRegClass) + return 0; + const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; + + // Compute DistR (register with the distance between Start and End). + unsigned DistR, DistSR; + + // Avoid special case, where the start value is an imm(0). + if (Start->isImm() && StartV == 0) { + DistR = End->getReg(); + DistSR = End->getSubReg(); + } else { + const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) : + (RegToImm ? TII->get(Hexagon::SUB_ri) : + TII->get(Hexagon::ADD_ri)); + unsigned SubR = MRI->createVirtualRegister(IntRC); + MachineInstrBuilder SubIB = + BuildMI(*PH, InsertPos, DL, SubD, SubR); + + if (RegToReg) { + SubIB.addReg(End->getReg(), 0, End->getSubReg()) + .addReg(Start->getReg(), 0, Start->getSubReg()); + } else if (RegToImm) { + SubIB.addImm(EndV) + .addReg(Start->getReg(), 0, Start->getSubReg()); + } else { // ImmToReg + SubIB.addReg(End->getReg(), 0, End->getSubReg()) + .addImm(-StartV); + } + DistR = SubR; + DistSR = 0; + } + + // From DistR, compute AdjR (register with the adjusted distance). + unsigned AdjR, AdjSR; + + if (AdjV == 0) { + AdjR = DistR; + AdjSR = DistSR; + } else { + // Generate CountR = ADD DistR, AdjVal + unsigned AddR = MRI->createVirtualRegister(IntRC); + const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri); + BuildMI(*PH, InsertPos, DL, AddD, AddR) + .addReg(DistR, 0, DistSR) + .addImm(AdjV); + + AdjR = AddR; + AdjSR = 0; + } + + // From AdjR, compute CountR (register with the final count). + unsigned CountR, CountSR; + + if (IVBump == 1) { + CountR = AdjR; + CountSR = AdjSR; + } else { + // The IV bump is a power of two. Log_2(IV bump) is the shift amount. + unsigned Shift = Log2_32(IVBump); + + // Generate NormR = LSR DistR, Shift. + unsigned LsrR = MRI->createVirtualRegister(IntRC); + const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri); + BuildMI(*PH, InsertPos, DL, LsrD, LsrR) + .addReg(AdjR, 0, AdjSR) + .addImm(Shift); + + CountR = LsrR; + CountSR = 0; + } + + return new CountValue(CountValue::CV_Register, CountR, CountSR); } -/// isInvalidOperation - Return true if the operation is invalid within -/// hardware loop. -bool -HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const { + +/// \brief Return true if the operation is invalid within hardware loop. +bool HexagonHardwareLoops::isInvalidLoopOperation( + const MachineInstr *MI) const { // call is not allowed because the callee may use a hardware loop - if (MI->getDesc().isCall()) { + if (MI->getDesc().isCall()) return true; - } + // do not allow nested hardware loops - if (isHardwareLoop(MI)) { + if (isHardwareLoop(MI)) return true; - } + // check if the instruction defines a hardware loop register for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef() && - (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 || - MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (R == Hexagon::LC0 || R == Hexagon::LC1 || + R == Hexagon::SA0 || R == Hexagon::SA1) return true; - } } return false; } -/// containsInvalidInstruction - Return true if the loop contains -/// an instruction that inhibits the use of the hardware loop function. -/// + +/// \brief - Return true if the loop contains an instruction that inhibits +/// the use of the hardware loop function. bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { const std::vector<MachineBasicBlock*> Blocks = L->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { @@ -411,58 +883,184 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { const MachineInstr *MI = &*MII; - if (isInvalidLoopOperation(MI)) { + if (isInvalidLoopOperation(MI)) return true; - } } } return false; } -/// converToHardwareLoop - check if the loop is a candidate for -/// converting to a hardware loop. If so, then perform the -/// transformation. + +/// \brief Returns true if the instruction is dead. This was essentially +/// copied from DeadMachineInstructionElim::isDead, but with special cases +/// for inline asm, physical registers and instructions with side effects +/// removed. +bool HexagonHardwareLoops::isDead(const MachineInstr *MI, + SmallVector<MachineInstr*, 1> &DeadPhis) const { + // Examine each operand. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + + unsigned Reg = MO.getReg(); + if (MRI->use_nodbg_empty(Reg)) + continue; + + typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator; + + // This instruction has users, but if the only user is the phi node for the + // parent block, and the only use of that phi node is this instruction, then + // this instruction is dead: both it (and the phi node) can be removed. + use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); + use_nodbg_iterator End = MRI->use_nodbg_end(); + if (llvm::next(I) != End || !I.getOperand().getParent()->isPHI()) + return false; + + MachineInstr *OnePhi = I.getOperand().getParent(); + for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { + const MachineOperand &OPO = OnePhi->getOperand(j); + if (!OPO.isReg() || !OPO.isDef()) + continue; + + unsigned OPReg = OPO.getReg(); + use_nodbg_iterator nextJ; + for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); + J != End; J = nextJ) { + nextJ = llvm::next(J); + MachineOperand &Use = J.getOperand(); + MachineInstr *UseMI = Use.getParent(); + + // If the phi node has a user that is not MI, bail... + if (MI != UseMI) + return false; + } + } + DeadPhis.push_back(OnePhi); + } + + // If there are no defs with uses, the instruction is dead. + return true; +} + +void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { + // This procedure was essentially copied from DeadMachineInstructionElim. + + SmallVector<MachineInstr*, 1> DeadPhis; + if (isDead(MI, DeadPhis)) { + DEBUG(dbgs() << "HW looping will remove: " << *MI); + + // It is possible that some DBG_VALUE instructions refer to this + // instruction. Examine each def operand for such references; + // if found, mark the DBG_VALUE as undef (but don't delete it). + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + MachineRegisterInfo::use_iterator nextI; + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), + E = MRI->use_end(); I != E; I = nextI) { + nextI = llvm::next(I); // I is invalidated by the setReg + MachineOperand &Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + if (UseMI == MI) + continue; + if (Use.isDebug()) + UseMI->getOperand(0).setReg(0U); + // This may also be a "instr -> phi -> instr" case which can + // be removed too. + } + } + + MI->eraseFromParent(); + for (unsigned i = 0; i < DeadPhis.size(); ++i) + DeadPhis[i]->eraseFromParent(); + } +} + +/// \brief Check if the loop is a candidate for converting to a hardware +/// loop. If so, then perform the transformation. /// -/// This function works on innermost loops first. A loop can -/// be converted if it is a counting loop; either a register -/// value or an immediate. +/// This function works on innermost loops first. A loop can be converted +/// if it is a counting loop; either a register value or an immediate. /// -/// The code makes several assumptions about the representation -/// of the loop in llvm. +/// The code makes several assumptions about the representation of the loop +/// in llvm. bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { + // This is just for sanity. + assert(L->getHeader() && "Loop without a header?"); + bool Changed = false; // Process nested loops first. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) Changed |= convertToHardwareLoop(*I); - } + // If a nested loop has been converted, then we can't convert this loop. - if (Changed) { + if (Changed) return Changed; + +#ifndef NDEBUG + // Stop trying after reaching the limit (if any). + int Limit = HWLoopLimit; + if (Limit >= 0) { + if (Counter >= HWLoopLimit) + return false; + Counter++; } - // Are we able to determine the trip count for the loop? - CountValue *TripCount = getTripCount(L); - if (TripCount == 0) { - return false; - } +#endif + // Does the loop contain any invalid instructions? - if (containsInvalidInstruction(L)) { + if (containsInvalidInstruction(L)) return false; - } - MachineBasicBlock *Preheader = L->getLoopPreheader(); - // No preheader means there's not place for the loop instr. - if (Preheader == 0) { + + // Is the induction variable bump feeding the latch condition? + if (!fixupInductionVariable(L)) return false; - } - MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); MachineBasicBlock *LastMBB = L->getExitingBlock(); // Don't generate hw loop if the loop has more than one exit. - if (LastMBB == 0) { + if (LastMBB == 0) return false; - } + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); - if (LastI == LastMBB->end()) { + if (LastI == LastMBB->end()) return false; + + // Ensure the loop has a preheader: the loop instruction will be + // placed there. + bool NewPreheader = false; + MachineBasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = createPreheaderForLoop(L); + if (!Preheader) + return false; + NewPreheader = true; + } + MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); + + SmallVector<MachineInstr*, 2> OldInsts; + // Are we able to determine the trip count for the loop? + CountValue *TripCount = getLoopTripCount(L, OldInsts); + if (TripCount == 0) + return false; + + // Is the trip count available in the preheader? + if (TripCount->isReg()) { + // There will be a use of the register inserted into the preheader, + // so make sure that the register is actually defined at that point. + MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); + MachineBasicBlock *BBDef = TCDef->getParent(); + if (!NewPreheader) { + if (!MDT->dominates(BBDef, Preheader)) + return false; + } else { + // If we have just created a preheader, the dominator tree won't be + // aware of it. Check if the definition of the register dominates + // the header, but is not the header itself. + if (!MDT->properlyDominates(BBDef, L->getHeader())) + return false; + } } // Determine the loop start. @@ -470,53 +1068,53 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { if (L->getLoopLatch() != LastMBB) { // When the exit and latch are not the same, use the latch block as the // start. - // The loop start address is used only after the 1st iteration, and the loop - // latch may contains instrs. that need to be executed after the 1st iter. + // The loop start address is used only after the 1st iteration, and the + // loop latch may contains instrs. that need to be executed after the + // first iteration. LoopStart = L->getLoopLatch(); // Make sure the latch is a successor of the exit, otherwise it won't work. - if (!LastMBB->isSuccessor(LoopStart)) { + if (!LastMBB->isSuccessor(LoopStart)) return false; - } } - // Convert the loop to a hardware loop + // Convert the loop to a hardware loop. DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); - DebugLoc InsertPosDL; + DebugLoc DL; if (InsertPos != Preheader->end()) - InsertPosDL = InsertPos->getDebugLoc(); + DL = InsertPos->getDebugLoc(); if (TripCount->isReg()) { // Create a copy of the loop count register. - MachineFunction *MF = LastMBB->getParent(); - const TargetRegisterClass *RC = - MF->getRegInfo().getRegClass(TripCount->getReg()); - unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, InsertPosDL, - TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); - if (TripCount->isNeg()) { - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, InsertPosDL, - TII->get(Hexagon::NEG), CountReg).addReg(CountReg1); - } - + unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) + .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); // Add the Loop instruction to the beginning of the loop. - BuildMI(*Preheader, InsertPos, InsertPosDL, - TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg); + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r)) + .addMBB(LoopStart) + .addReg(CountReg); } else { - assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); - // Add the Loop immediate instruction to the beginning of the loop. + assert(TripCount->isImm() && "Expecting immediate value for trip count"); + // Add the Loop immediate instruction to the beginning of the loop, + // if the immediate fits in the instructions. Otherwise, we need to + // create a new virtual register. int64_t CountImm = TripCount->getImm(); - BuildMI(*Preheader, InsertPos, InsertPosDL, - TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm); + if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) { + unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg) + .addImm(CountImm); + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r)) + .addMBB(LoopStart).addReg(CountReg); + } else + BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i)) + .addMBB(LoopStart).addImm(CountImm); } - // Make sure the loop start always has a reference in the CFG. We need to - // create a BlockAddress operand to get this mechanism to work both the + // Make sure the loop start always has a reference in the CFG. We need + // to create a BlockAddress operand to get this mechanism to work both the // MachineBasicBlock and BasicBlock objects need the flag set. LoopStart->setHasAddressTaken(); // This line is needed to set the hasAddressTaken flag on the BasicBlock - // object + // object. BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); // Replace the loop branch with an endloop instruction. @@ -529,13 +1127,12 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { // - a conditional branch to the loop start. if (LastI->getOpcode() == Hexagon::JMP_c || LastI->getOpcode() == Hexagon::JMP_cNot) { - // delete one and change/add an uncond. branch to out of the loop + // Delete one and change/add an uncond. branch to out of the loop. MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); LastI = LastMBB->erase(LastI); if (!L->contains(BranchTarget)) { - if (LastI != LastMBB->end()) { - TII->RemoveBranch(*LastMBB); - } + if (LastI != LastMBB->end()) + LastI = LastMBB->erase(LastI); SmallVector<MachineOperand, 0> Cond; TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL); } @@ -545,110 +1142,414 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { } delete TripCount; + // The induction operation and the comparison may now be + // unneeded. If these are unneeded, then remove them. + for (unsigned i = 0; i < OldInsts.size(); ++i) + removeIfDead(OldInsts[i]); + ++NumHWLoops; return true; } -/// createHexagonFixupHwLoops - Factory for creating the hardware loop -/// phase. -FunctionPass *llvm::createHexagonFixupHwLoops() { - return new HexagonFixupHwLoops(); + +bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, + MachineInstr *CmpI) { + assert (BumpI != CmpI && "Bump and compare in the same instruction?"); + + MachineBasicBlock *BB = BumpI->getParent(); + if (CmpI->getParent() != BB) + return false; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + // Check if things are in order to begin with. + for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I) + if (&*I == CmpI) + return true; + + // Out of order. + unsigned PredR = CmpI->getOperand(0).getReg(); + bool FoundBump = false; + instr_iterator CmpIt = CmpI, NextIt = llvm::next(CmpIt); + for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { + MachineInstr *In = &*I; + for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { + MachineOperand &MO = In->getOperand(i); + if (MO.isReg() && MO.isUse()) { + if (MO.getReg() == PredR) // Found an intervening use of PredR. + return false; + } + } + + if (In == BumpI) { + instr_iterator After = BumpI; + instr_iterator From = CmpI; + BB->splice(llvm::next(After), BB, From); + FoundBump = true; + break; + } + } + assert (FoundBump && "Cannot determine instruction order"); + return FoundBump; } -bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n"); - bool Changed = fixupLoopInstrs(MF); - return Changed; +MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) { + MachineInstr *DI = MRI->getVRegDef(R); + unsigned DOpc = DI->getOpcode(); + switch (DOpc) { + case Hexagon::TFRI: + case Hexagon::TFRI64: + case Hexagon::CONST32_Int_Real: + case Hexagon::CONST64_Int_Real: + return DI; + } + return 0; } -/// fixupLoopInsts - For Hexagon, if the loop label is to far from the -/// loop instruction then we need to set the LC0 and SA0 registers -/// explicitly instead of using LOOP(start,count). This function -/// checks the distance, and generates register assignments if needed. -/// -/// This function makes two passes over the basic blocks. The first -/// pass computes the offset of the basic block from the start. -/// The second pass checks all the loop instructions. -bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { - - // Offset of the current instruction from the start. - unsigned InstOffset = 0; - // Map for each basic block to it's first instruction. - DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset; - - // First pass - compute the offset of each basic block. - for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); - MBB != MBBe; ++MBB) { - BlockToInstOffset[MBB] = InstOffset; - InstOffset += (MBB->size() * 4); - } - - // Second pass - check each loop instruction to see if it needs to - // be converted. - InstOffset = 0; - bool Changed = false; - RegScavenger RS; - - // Loop over all the basic blocks. - for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); - MBB != MBBe; ++MBB) { - InstOffset = BlockToInstOffset[MBB]; - RS.enterBasicBlock(MBB); - - // Loop over all the instructions. - MachineBasicBlock::iterator MIE = MBB->end(); - MachineBasicBlock::iterator MII = MBB->begin(); - while (MII != MIE) { - if (isHardwareLoop(MII)) { - RS.forward(MII); - assert(MII->getOperand(0).isMBB() && - "Expect a basic block as loop operand"); - int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; - diff = (diff > 0 ? diff : -diff); - if ((unsigned)diff > MAX_LOOP_DISTANCE) { - // Convert to explicity setting LC0 and SA0. - convertLoopInstr(MF, MII, RS); - MII = MBB->erase(MII); - Changed = true; - } else { - ++MII; + +int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) { + if (MO.isImm()) + return MO.getImm(); + assert(MO.isReg()); + unsigned R = MO.getReg(); + MachineInstr *DI = defWithImmediate(R); + assert(DI && "Need an immediate operand"); + // All currently supported "define-with-immediate" instructions have the + // actual immediate value in the operand(1). + int64_t v = DI->getOperand(1).getImm(); + return v; +} + + +void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { + if (MO.isImm()) { + MO.setImm(Val); + return; + } + + assert(MO.isReg()); + unsigned R = MO.getReg(); + MachineInstr *DI = defWithImmediate(R); + if (MRI->hasOneNonDBGUse(R)) { + // If R has only one use, then just change its defining instruction to + // the new immediate value. + DI->getOperand(1).setImm(Val); + return; + } + + const TargetRegisterClass *RC = MRI->getRegClass(R); + unsigned NewR = MRI->createVirtualRegister(RC); + MachineBasicBlock &B = *DI->getParent(); + DebugLoc DL = DI->getDebugLoc(); + BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR) + .addImm(Val); + MO.setReg(NewR); +} + + +bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + + if (!Header || !Preheader || !Latch) + return false; + + // These data structures follow the same concept as the corresponding + // ones in findInductionRegister (where some comments are). + typedef std::pair<unsigned,int64_t> RegisterBump; + typedef std::pair<unsigned,RegisterBump> RegisterInduction; + typedef std::set<RegisterInduction> RegisterInductionSet; + + // Register candidates for induction variables, with their associated bumps. + RegisterInductionSet IndRegs; + + // Look for induction patterns: + // vreg1 = PHI ..., [ latch, vreg2 ] + // vreg2 = ADD vreg1, imm + typedef MachineBasicBlock::instr_iterator instr_iterator; + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *Phi = &*I; + + // Have a PHI instruction. + for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { + if (Phi->getOperand(i+1).getMBB() != Latch) + continue; + + unsigned PhiReg = Phi->getOperand(i).getReg(); + MachineInstr *DI = MRI->getVRegDef(PhiReg); + unsigned UpdOpc = DI->getOpcode(); + bool isAdd = (UpdOpc == Hexagon::ADD_ri); + + if (isAdd) { + // If the register operand to the add/sub is the PHI we are looking + // at, this meets the induction pattern. + unsigned IndReg = DI->getOperand(1).getReg(); + if (MRI->getVRegDef(IndReg) == Phi) { + unsigned UpdReg = DI->getOperand(0).getReg(); + int64_t V = DI->getOperand(2).getImm(); + IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); } - } else { - ++MII; } - InstOffset += 4; + } // for (i) + } // for (instr) + + if (IndRegs.empty()) + return false; + + MachineBasicBlock *TB = 0, *FB = 0; + SmallVector<MachineOperand,2> Cond; + // AnalyzeBranch returns true if it fails to analyze branch. + bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); + if (NotAnalyzed) + return false; + + // Check if the latch branch is unconditional. + if (Cond.empty()) + return false; + + if (TB != Header && FB != Header) + // The latch does not go back to the header. Not a latch we know and love. + return false; + + // Expecting a predicate register as a condition. It won't be a hardware + // predicate register at this point yet, just a vreg. + // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0) + // into Cond, followed by the predicate register. For non-negated branches + // it's just the register. + unsigned CSz = Cond.size(); + if (CSz != 1 && CSz != 2) + return false; + + unsigned P = Cond[CSz-1].getReg(); + MachineInstr *PredDef = MRI->getVRegDef(P); + + if (!PredDef->isCompare()) + return false; + + SmallSet<unsigned,2> CmpRegs; + MachineOperand *CmpImmOp = 0; + + // Go over all operands to the compare and look for immediate and register + // operands. Assume that if the compare has a single register use and a + // single immediate operand, then the register is being compared with the + // immediate value. + for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { + MachineOperand &MO = PredDef->getOperand(i); + if (MO.isReg()) { + // Skip all implicit references. In one case there was: + // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use> + if (MO.isImplicit()) + continue; + if (MO.isUse()) { + unsigned R = MO.getReg(); + if (!defWithImmediate(R)) { + CmpRegs.insert(MO.getReg()); + continue; + } + // Consider the register to be the "immediate" operand. + if (CmpImmOp) + return false; + CmpImmOp = &MO; + } + } else if (MO.isImm()) { + if (CmpImmOp) // A second immediate argument? Confusing. Bail out. + return false; + CmpImmOp = &MO; } } - return Changed; + if (CmpRegs.empty()) + return false; + + // Check if the compared register follows the order we want. Fix if needed. + for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); + I != E; ++I) { + // This is a success. If the register used in the comparison is one that + // we have identified as a bumped (updated) induction register, there is + // nothing to do. + if (CmpRegs.count(I->first)) + return true; + + // Otherwise, if the register being compared comes out of a PHI node, + // and has been recognized as following the induction pattern, and is + // compared against an immediate, we can fix it. + const RegisterBump &RB = I->second; + if (CmpRegs.count(RB.first)) { + if (!CmpImmOp) + return false; + + int64_t CmpImm = getImmediate(*CmpImmOp); + int64_t V = RB.second; + if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit). + return false; + if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit). + return false; + CmpImm += V; + // Some forms of cmp-immediate allow u9 and s10. Assume the worst case + // scenario, i.e. an 8-bit value. + if (CmpImmOp->isImm() && !isInt<8>(CmpImm)) + return false; + + // Make sure that the compare happens after the bump. Otherwise, + // after the fixup, the compare would use a yet-undefined register. + MachineInstr *BumpI = MRI->getVRegDef(I->first); + bool Order = orderBumpCompare(BumpI, PredDef); + if (!Order) + return false; + + // Finally, fix the compare instruction. + setImmediate(*CmpImmOp, CmpImm); + for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) { + MachineOperand &MO = PredDef->getOperand(i); + if (MO.isReg() && MO.getReg() == RB.first) { + MO.setReg(I->first); + return true; + } + } + } + } + return false; } -/// convertLoopInstr - convert a loop instruction to a sequence of instructions -/// that set the lc and sa register explicitly. -void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, - MachineBasicBlock::iterator &MII, - RegScavenger &RS) { - const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); - MachineBasicBlock *MBB = MII->getParent(); - DebugLoc DL = MII->getDebugLoc(); - unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0); - - // First, set the LC0 with the trip count. - if (MII->getOperand(1).isReg()) { - // Trip count is a register - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) - .addReg(MII->getOperand(1).getReg()); + +/// \brief Create a preheader for a given loop. +MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( + MachineLoop *L) { + if (MachineBasicBlock *TmpPH = L->getLoopPreheader()) + return TmpPH; + + MachineBasicBlock *Header = L->getHeader(); + MachineBasicBlock *Latch = L->getLoopLatch(); + MachineFunction *MF = Header->getParent(); + DebugLoc DL; + + if (!Latch || Header->hasAddressTaken()) + return 0; + + typedef MachineBasicBlock::instr_iterator instr_iterator; + typedef MachineBasicBlock::pred_iterator pred_iterator; + + // Verify that all existing predecessors have analyzable branches + // (or no branches at all). + typedef std::vector<MachineBasicBlock*> MBBVector; + MBBVector Preds(Header->pred_begin(), Header->pred_end()); + SmallVector<MachineOperand,2> Tmp1; + MachineBasicBlock *TB = 0, *FB = 0; + + if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false)) + return 0; + + for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { + MachineBasicBlock *PB = *I; + if (PB != Latch) { + bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false); + if (NotAnalyzed) + return 0; + } + } + + MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); + MF->insert(Header, NewPH); + + if (Header->pred_size() > 2) { + // Ensure that the header has only two predecessors: the preheader and + // the loop latch. Any additional predecessors of the header should + // join at the newly created preheader. Inspect all PHI nodes from the + // header and create appropriate corresponding PHI nodes in the preheader. + + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *PN = &*I; + + const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); + MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); + NewPH->insert(NewPH->end(), NewPN); + + unsigned PR = PN->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(PR); + unsigned NewPR = MRI->createVirtualRegister(RC); + NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); + + // Copy all non-latch operands of a header's PHI node to the newly + // created PHI node in the preheader. + for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { + unsigned PredR = PN->getOperand(i).getReg(); + MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); + if (PredB == Latch) + continue; + + NewPN->addOperand(MachineOperand::CreateReg(PredR, false)); + NewPN->addOperand(MachineOperand::CreateMBB(PredB)); + } + + // Remove copied operands from the old PHI node and add the value + // coming from the preheader's PHI. + for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { + MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); + if (PredB != Latch) { + PN->RemoveOperand(i+1); + PN->RemoveOperand(i); + } + } + PN->addOperand(MachineOperand::CreateReg(NewPR, false)); + PN->addOperand(MachineOperand::CreateMBB(NewPH)); + } + } else { - // Trip count is an immediate. - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch) - .addImm(MII->getOperand(1).getImm()); - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) - .addReg(Scratch); - } - // Then, set the SA0 with the loop start address. - BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch) - .addMBB(MII->getOperand(0).getMBB()); - BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch); + assert(Header->pred_size() == 2); + + // The header has only two predecessors, but the non-latch predecessor + // is not a preheader (e.g. it has other successors, etc.) + // In such a case we don't need any extra PHI nodes in the new preheader, + // all we need is to adjust existing PHIs in the header to now refer to + // the new preheader. + for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *PN = &*I; + for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { + MachineOperand &MO = PN->getOperand(i+1); + if (MO.getMBB() != Latch) + MO.setMBB(NewPH); + } + } + } + + // "Reroute" the CFG edges to link in the new preheader. + // If any of the predecessors falls through to the header, insert a branch + // to the new preheader in that place. + SmallVector<MachineOperand,1> Tmp2; + SmallVector<MachineOperand,1> EmptyCond; + + TB = FB = 0; + + for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { + MachineBasicBlock *PB = *I; + if (PB != Latch) { + Tmp2.clear(); + bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false); + (void)NotAnalyzed; // supress compiler warning + assert (!NotAnalyzed && "Should be analyzable!"); + if (TB != Header && (Tmp2.empty() || FB != Header)) + TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL); + PB->ReplaceUsesOfBlockWith(Header, NewPH); + } + } + + // It can happen that the latch block will fall through into the header. + // Insert an unconditional branch to the header. + TB = FB = 0; + bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false); + (void)LatchNotAnalyzed; // supress compiler warning + assert (!LatchNotAnalyzed && "Should be analyzable!"); + if (!TB && !FB) + TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL); + + // Finally, the branch from the preheader to the header. + TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL); + NewPH->addSuccessor(Header); + + return NewPH; } diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index db292f2..3a1c48b 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -15,18 +15,29 @@ #include "Hexagon.h" #include "HexagonISelLowering.h" #include "HexagonTargetMachine.h" -#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" - using namespace llvm; +static +cl::opt<unsigned> +MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders", + cl::Hidden, cl::init(2), + cl::desc("Maximum number of uses of a global address such that we still us a" + "constant extended instruction")); //===----------------------------------------------------------------------===// // Instruction Selector Implementation //===----------------------------------------------------------------------===// +namespace llvm { + void initializeHexagonDAGToDAGISelPass(PassRegistry&); +} + //===--------------------------------------------------------------------===// /// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine /// instructions for SelectionDAG operations. @@ -40,19 +51,24 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { // Keep a reference to HexagonTargetMachine. HexagonTargetMachine& TM; const HexagonInstrInfo *TII; - + DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap; public: - explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine) - : SelectionDAGISel(targetmachine), + explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(targetmachine, OptLevel), Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()), TM(targetmachine), TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) { - + initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } + bool hasNumUsesBelowThresGA(SDNode *N) const; SDNode *Select(SDNode *N); // Complex Pattern Selectors. + inline bool foldGlobalAddress(SDValue &N, SDValue &R); + inline bool foldGlobalAddressGP(SDValue &N, SDValue &R); + bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP); bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2); bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2); bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2); @@ -97,7 +113,14 @@ public: SDNode *SelectAdd(SDNode *N); bool isConstExtProfitable(SDNode *N) const; - // Include the pieces autogenerated from the target description. +// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range +// [1..128], used in cmpb.gtu instructions. +inline SDValue XformU7ToU7M1Imm(signed Imm) { + assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); + return CurDAG->getTargetConstant(Imm - 1, MVT::i8); +} + +// Include the pieces autogenerated from the target description. #include "HexagonGenDAGISel.inc" }; } // end anonymous namespace @@ -106,10 +129,23 @@ public: /// createHexagonISelDag - This pass converts a legalized DAG into a /// Hexagon-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) { - return new HexagonDAGToDAGISel(TM); +FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new HexagonDAGToDAGISel(TM, OptLevel); +} + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection"; + PassInfo *PI = new PassInfo(Name, "hexagon-isel", + &SelectionDAGISel::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) } + static bool IsS11_0_Offset(SDNode * S) { ConstantSDNode *N = cast<ConstantSDNode>(S); @@ -608,8 +644,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) { // Offset value must be within representable range // and must have correct alignment properties. if (TII->isValidAutoIncImm(StoredVT, Val)) { - SDValue Ops[] = { Value, Base, - CurDAG->getTargetConstant(Val, MVT::i32), Chain}; + SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value, + Chain}; unsigned Opcode = 0; // Figure out the post inc version of opcode. @@ -1519,3 +1555,69 @@ bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const { return (UseCount <= 1); } + +//===--------------------------------------------------------------------===// +// Return 'true' if use count of the global address is below threshold. +//===--------------------------------------------------------------------===// +bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const { + assert(N->getOpcode() == ISD::TargetGlobalAddress && + "Expecting a target global address"); + + // Always try to fold the address. + if (TM.getOptLevel() == CodeGenOpt::Aggressive) + return true; + + GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); + DenseMap<const GlobalValue *, unsigned>::const_iterator GI = + GlobalAddressUseCountMap.find(GA->getGlobal()); + + if (GI == GlobalAddressUseCountMap.end()) + return false; + + return GI->second <= MaxNumOfUsesForConstExtenders; +} + +//===--------------------------------------------------------------------===// +// Return true if the non GP-relative global address can be folded. +//===--------------------------------------------------------------------===// +inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) { + return foldGlobalAddressImpl(N, R, false); +} + +//===--------------------------------------------------------------------===// +// Return true if the GP-relative global address can be folded. +//===--------------------------------------------------------------------===// +inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) { + return foldGlobalAddressImpl(N, R, true); +} + +//===--------------------------------------------------------------------===// +// Fold offset of the global address if number of uses are below threshold. +//===--------------------------------------------------------------------===// +bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R, + bool ShouldLookForGP) { + if (N.getOpcode() == ISD::ADD) { + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) || + (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1); + GlobalAddressSDNode *GA = + dyn_cast<GlobalAddressSDNode>(N0.getOperand(0)); + + if (Const && GA && + (GA->getOpcode() == ISD::TargetGlobalAddress)) { + if ((N0.getOpcode() == HexagonISD::CONST32) && + !hasNumUsesBelowThresGA(GA)) + return false; + R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), + Const->getDebugLoc(), + N.getValueType(), + GA->getOffset() + + (uint64_t)Const->getSExtValue()); + return true; + } + } + } + return false; +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 16cec5c..fac931a 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -304,15 +304,9 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, // Analyze return values of ISD::RET CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); + // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; @@ -321,12 +315,17 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + RetOps.push_back(Flag); - return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain); + return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, + &RetOps[0], RetOps.size()); } @@ -1016,8 +1015,8 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, DebugLoc dl = Op.getDebugLoc(); Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); - HexagonTargetObjectFile &TLOF = - (HexagonTargetObjectFile&)getObjFileLowering(); + const HexagonTargetObjectFile &TLOF = + static_cast<const HexagonTargetObjectFile &>(getObjFileLowering()); if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result); } @@ -1053,8 +1052,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setPrefLoopAlignment(4); // Limits for inline expansion of memcpy/memmove - maxStoresPerMemcpy = 6; - maxStoresPerMemmove = 6; + MaxStoresPerMemcpy = 6; + MaxStoresPerMemmove = 6; // // Library calls for unsupported operations @@ -1364,11 +1363,18 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i64, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::CTLZ , MVT::i32, Expand); + setOperationAction(ISD::CTLZ , MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 5a415eb..65dab85 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -52,6 +52,8 @@ namespace llvm { WrapperCP, WrapperCombineII, WrapperCombineRR, + WrapperCombineRI_V4, + WrapperCombineIR_V4, WrapperPackhl, WrapperSplatB, WrapperSplatH, diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 71c620b..587fa7d 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -13,19 +13,19 @@ // *** Must match HexagonBaseInfo.h *** //===----------------------------------------------------------------------===// -class Type<bits<5> t> { +class IType<bits<5> t> { bits<5> Value = t; } -def TypePSEUDO : Type<0>; -def TypeALU32 : Type<1>; -def TypeCR : Type<2>; -def TypeJR : Type<3>; -def TypeJ : Type<4>; -def TypeLD : Type<5>; -def TypeST : Type<6>; -def TypeSYSTEM : Type<7>; -def TypeXTYPE : Type<8>; -def TypeMARKER : Type<31>; +def TypePSEUDO : IType<0>; +def TypeALU32 : IType<1>; +def TypeCR : IType<2>; +def TypeJR : IType<3>; +def TypeJ : IType<4>; +def TypeLD : IType<5>; +def TypeST : IType<6>; +def TypeSYSTEM : IType<7>; +def TypeXTYPE : IType<8>; +def TypeENDLOOP: IType<31>; // Maintain list of valid subtargets for each instruction. class SubTarget<bits<4> value> { @@ -44,8 +44,8 @@ def HasV5SubT : SubTarget<0x8>; def NoV5SubT : SubTarget<0x7>; // Addressing modes for load/store instructions -class AddrModeType<bits<4> value> { - bits<4> Value = value; +class AddrModeType<bits<3> value> { + bits<3> Value = value; } def NoAddrMode : AddrModeType<0>; // No addressing mode @@ -55,14 +55,35 @@ def BaseImmOffset : AddrModeType<3>; // Indirect with offset def BaseLongOffset : AddrModeType<4>; // Indirect with long offset def BaseRegOffset : AddrModeType<5>; // Indirect with register offset +class MemAccessSize<bits<3> value> { + bits<3> Value = value; +} + +def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction. +def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb). +def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh). +def WordAccess : MemAccessSize<3>;// Word access instrution (memw). +def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd) + + //===----------------------------------------------------------------------===// // Intruction Class Declaration + //===----------------------------------------------------------------------===// -class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr, InstrItinClass itin, Type type> : Instruction { - field bits<32> Inst; +class OpcodeHexagon { + field bits<32> Inst = ?; // Default to an invalid insn. + bits<4> IClass = 0; // ICLASS + bits<2> IParse = 0; // Parse bits. + + let Inst{31-28} = IClass; + let Inst{15-14} = IParse; + + bits<1> zero = 0; +} +class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr, InstrItinClass itin, IType type> + : Instruction, OpcodeHexagon { let Namespace = "Hexagon"; dag OutOperandList = outs; @@ -73,48 +94,63 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, let Itinerary = itin; let Size = 4; - // *** Must match HexagonBaseInfo.h *** + // *** Must match MCTargetDesc/HexagonBaseInfo.h *** + // Instruction type according to the ISA. - Type HexagonType = type; - let TSFlags{4-0} = HexagonType.Value; + IType Type = type; + let TSFlags{4-0} = Type.Value; + // Solo instructions, i.e., those that cannot be in a packet with others. - bits<1> isHexagonSolo = 0; - let TSFlags{5} = isHexagonSolo; + bits<1> isSolo = 0; + let TSFlags{5} = isSolo; + // Predicated instructions. bits<1> isPredicated = 0; let TSFlags{6} = isPredicated; + bits<1> isPredicatedFalse = 0; + let TSFlags{7} = isPredicatedFalse; bits<1> isPredicatedNew = 0; - let TSFlags{7} = isPredicatedNew; - - // Stores that can be newified. + let TSFlags{8} = isPredicatedNew; + + // New-value insn helper fields. + bits<1> isNewValue = 0; + let TSFlags{9} = isNewValue; // New-value consumer insn. + bits<1> hasNewValue = 0; + let TSFlags{10} = hasNewValue; // New-value producer insn. + bits<3> opNewValue = 0; + let TSFlags{13-11} = opNewValue; // New-value produced operand. + bits<2> opNewBits = 0; + let TSFlags{15-14} = opNewBits; // New-value opcode bits location: 0, 8, 16. bits<1> isNVStorable = 0; - let TSFlags{8} = isNVStorable; - - // New-value store instructions. + let TSFlags{16} = isNVStorable; // Store that can become new-value store. bits<1> isNVStore = 0; - let TSFlags{9} = isNVStore; + let TSFlags{17} = isNVStore; // New-value store insn. // Immediate extender helper fields. bits<1> isExtendable = 0; - let TSFlags{10} = isExtendable; // Insn may be extended. + let TSFlags{18} = isExtendable; // Insn may be extended. bits<1> isExtended = 0; - let TSFlags{11} = isExtended; // Insn must be extended. + let TSFlags{19} = isExtended; // Insn must be extended. bits<3> opExtendable = 0; - let TSFlags{14-12} = opExtendable; // Which operand may be extended. + let TSFlags{22-20} = opExtendable; // Which operand may be extended. bits<1> isExtentSigned = 0; - let TSFlags{15} = isExtentSigned; // Signed or unsigned range. + let TSFlags{23} = isExtentSigned; // Signed or unsigned range. bits<5> opExtentBits = 0; - let TSFlags{20-16} = opExtentBits; //Number of bits of range before extending. + let TSFlags{28-24} = opExtentBits; //Number of bits of range before extending. // If an instruction is valid on a subtarget (v2-v5), set the corresponding // bit from validSubTargets. v2 is the least significant bit. // By default, instruction is valid on all subtargets. SubTarget validSubTargets = HasV2SubT; - let TSFlags{24-21} = validSubTargets.Value; + let TSFlags{32-29} = validSubTargets.Value; - // Addressing mode for load/store instrutions. + // Addressing mode for load/store instructions. AddrModeType addrMode = NoAddrMode; - let TSFlags{28-25} = addrMode.Value; + let TSFlags{35-33} = addrMode.Value; + + // Memory access size for mem access instructions (load/store) + MemAccessSize accessSize = NoMemAccess; + let TSFlags{38-36} = accessSize.Value; // Fields used for relation models. string BaseOpcode = ""; @@ -124,6 +160,11 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, string InputType = ""; // Input is "imm" or "reg" type. string isMEMri = "false"; // Set to "true" for load/store with MEMri operand. string isFloat = "false"; // Set to "true" for the floating-point load/store. + string isBrTaken = ""; // Set to "true"/"false" for jump instructions + + let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"), + ""); + let PNewValue = !if(isPredicatedNew, "new", ""); // *** Must match MCTargetDesc/HexagonBaseInfo.h *** } @@ -134,187 +175,143 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, // LD Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> { - bits<5> rd; - bits<5> rs; - bits<13> imm13; -} +class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD>; -class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> { - bits<5> rd; - bits<5> rs; - bits<13> imm13; - let mayLoad = 1; -} +let mayLoad = 1 in +class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; // LD Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD> { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<13> imm13; -} +class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +let mayLoad = 1 in +class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. -class STInst<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> { - bits<5> rd; - bits<5> rs; - bits<13> imm13; -} +let mayStore = 1 in +class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST>; -class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> { - bits<5> rd; - bits<5> rs; - bits<13> imm13; - let mayStore = 1; -} +class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; -// SYSTEM Instruction Class in V4 can take SLOT0 only -// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. -class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", SYS, TypeSYSTEM> { - bits<5> rd; - bits<5> rs; - bits<13> imm13; -} +let mayStore = 1 in +class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ST0, TypeST>; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. -class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST> { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<13> imm13; -} +class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; + +// SYSTEM Instruction Class in V4 can take SLOT0 only +// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. +class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, SYS, TypeSYSTEM>; // ALU32 Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", ALU32, TypeALU32> { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<16> imm16; - bits<16> imm16_2; -} +class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU32, TypeALU32>; // ALU64 Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. -class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", ALU64, TypeXTYPE> { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<16> imm16; - bits<16> imm16_2; -} +class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE>; + +class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : ALU64Inst<outs, ins, asmstr, pattern, cstr>; -class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE> { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<16> imm16; - bits<16> imm16_2; -} // M Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. -class MInst<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", M, TypeXTYPE> { - bits<5> rd; - bits<5> rs; - bits<5> rt; -} +class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE>; // M Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. -class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE> { - bits<5> rd; - bits<5> rs; - bits<5> rt; -} +class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : MInst<outs, ins, asmstr, pattern, cstr>; // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. -class SInst<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", S, TypeXTYPE> { - bits<5> rd; - bits<5> rs; - bits<5> rt; -} +class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE>; // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. -class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE> { -// : InstHexagon<outs, ins, asmstr, pattern, cstr, S> { -// : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> { - bits<5> rd; - bits<5> rs; - bits<5> rt; -} +class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : SInst<outs, ins, asmstr, pattern, cstr>; // J Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class JType<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", J, TypeJ> { - bits<16> imm16; -} +class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, J, TypeJ>; // JR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class JRType<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", JR, TypeJR> { - bits<5> rs; - bits<5> pu; // Predicate register -} +class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, JR, TypeJR>; // CR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. -class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", CR, TypeCR> { - bits<5> rs; - bits<10> imm10; -} +class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, CR, TypeCR>; -class Marker<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", MARKER, TypeMARKER> { - let isCodeGenOnly = 1; - let isPseudo = 1; -} +let isCodeGenOnly = 1, isPseudo = 1 in +class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ENDLOOP, TypeENDLOOP>; -class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO, TypePSEUDO> { - let isCodeGenOnly = 1; - let isPseudo = 1; -} +let isCodeGenOnly = 1, isPseudo = 1 in +class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>; + +let isCodeGenOnly = 1, isPseudo = 1 in +class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr=""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>; //===----------------------------------------------------------------------===// // Intruction Classes Definitions - @@ -324,75 +321,52 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> // // ALU32 patterns //. -class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern> - : ALU32Type<outs, ins, asmstr, pattern> { -} +class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU32Inst<outs, ins, asmstr, pattern, cstr>; -class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern> - : ALU32Type<outs, ins, asmstr, pattern> { - let rt{0-4} = 0; -} +class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU32Inst<outs, ins, asmstr, pattern, cstr>; -class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern> - : ALU32Type<outs, ins, asmstr, pattern> { - let rt{0-4} = 0; -} +class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU32Inst<outs, ins, asmstr, pattern, cstr>; -class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern> - : ALU32Type<outs, ins, asmstr, pattern> { - let rt{0-4} = 0; -} +class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU32Inst<outs, ins, asmstr, pattern, cstr>; // // ALU64 patterns. // -class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern> - : ALU64Type<outs, ins, asmstr, pattern> { -} - -class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern> - : ALU64Type<outs, ins, asmstr, pattern> { - let rt{0-4} = 0; -} - -// J Type Instructions. -class JInst<dag outs, dag ins, string asmstr, list<dag> pattern> - : JType<outs, ins, asmstr, pattern> { -} - -// JR type Instructions. -class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern> - : JRType<outs, ins, asmstr, pattern> { -} +class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU64Inst<outs, ins, asmstr, pattern, cstr>; +class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr = ""> + : ALU64Inst<outs, ins, asmstr, pattern, cstr>; // Post increment ST Instruction. -class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : STInstPost<outs, ins, asmstr, pattern, cstr> { - let rt{0-4} = 0; -} +class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; -class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : STInstPost<outs, ins, asmstr, pattern, cstr> { - let rt{0-4} = 0; - let mayStore = 1; -} +let mayStore = 1 in +class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : STInst<outs, ins, asmstr, pattern, cstr>; // Post increment LD Instruction. -class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : LDInstPost<outs, ins, asmstr, pattern, cstr> { - let rt{0-4} = 0; -} - -class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : LDInstPost<outs, ins, asmstr, pattern, cstr> { - let rt{0-4} = 0; - let mayLoad = 1; -} +class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; + +let mayLoad = 1 in +class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : LDInst<outs, ins, asmstr, pattern, cstr>; //===----------------------------------------------------------------------===// // V4 Instruction Format Definitions + diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 05f1e23..9fda0da 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -17,9 +17,9 @@ // *** Must match BaseInfo.h *** //----------------------------------------------------------------------------// -def TypeMEMOP : Type<9>; -def TypeNV : Type<10>; -def TypePREFIX : Type<30>; +def TypeMEMOP : IType<9>; +def TypeNV : IType<10>; +def TypePREFIX : IType<30>; //----------------------------------------------------------------------------// // Intruction Classes Definitions + @@ -28,36 +28,38 @@ def TypePREFIX : Type<30>; // // NV type instructions. // -class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4, TypeNV> { - bits<5> rd; - bits<5> rs; - bits<13> imm13; -} +class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV>; + +class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : NVInst<outs, ins, asmstr, pattern, cstr>; // Definition of Post increment new value store. -class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV> { - bits<5> rd; - bits<5> rs; - bits<5> rt; - bits<13> imm13; -} +class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : NVInst<outs, ins, asmstr, pattern, cstr>; // Post increment ST Instruction. -class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr> - : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> { - let rt{0-4} = 0; -} +let mayStore = 1 in +class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : NVInst<outs, ins, asmstr, pattern, cstr>; + +// New-value conditional branch. +class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : NVInst<outs, ins, asmstr, pattern, cstr>; + +let mayLoad = 1, mayStore = 1 in +class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, cstr, MEM_V4, TypeMEMOP>; -class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4, TypeMEMOP> { - bits<5> rd; - bits<5> rs; - bits<6> imm6; -} +class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : MEMInst<outs, ins, asmstr, pattern, cstr>; let isCodeGenOnly = 1 in class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []> diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3b1ae09..d30cdda 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -305,6 +305,88 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { } +/// \brief For a comparison instruction, return the source registers in +/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it +/// compares against in CmpValue. Return true if the comparison instruction +/// can be analyzed. +bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { + unsigned Opc = MI->getOpcode(); + + // Set mask and the first source register. + switch (Opc) { + case Hexagon::CMPEHexagon4rr: + case Hexagon::CMPEQri: + case Hexagon::CMPEQrr: + case Hexagon::CMPGT64rr: + case Hexagon::CMPGTU64rr: + case Hexagon::CMPGTUri: + case Hexagon::CMPGTUrr: + case Hexagon::CMPGTri: + case Hexagon::CMPGTrr: + case Hexagon::CMPLTUrr: + case Hexagon::CMPLTrr: + SrcReg = MI->getOperand(1).getReg(); + Mask = ~0; + break; + case Hexagon::CMPbEQri_V4: + case Hexagon::CMPbEQrr_sbsb_V4: + case Hexagon::CMPbEQrr_ubub_V4: + case Hexagon::CMPbGTUri_V4: + case Hexagon::CMPbGTUrr_V4: + case Hexagon::CMPbGTrr_V4: + SrcReg = MI->getOperand(1).getReg(); + Mask = 0xFF; + break; + case Hexagon::CMPhEQri_V4: + case Hexagon::CMPhEQrr_shl_V4: + case Hexagon::CMPhEQrr_xor_V4: + case Hexagon::CMPhGTUri_V4: + case Hexagon::CMPhGTUrr_V4: + case Hexagon::CMPhGTrr_shl_V4: + SrcReg = MI->getOperand(1).getReg(); + Mask = 0xFFFF; + break; + } + + // Set the value/second source register. + switch (Opc) { + case Hexagon::CMPEHexagon4rr: + case Hexagon::CMPEQrr: + case Hexagon::CMPGT64rr: + case Hexagon::CMPGTU64rr: + case Hexagon::CMPGTUrr: + case Hexagon::CMPGTrr: + case Hexagon::CMPbEQrr_sbsb_V4: + case Hexagon::CMPbEQrr_ubub_V4: + case Hexagon::CMPbGTUrr_V4: + case Hexagon::CMPbGTrr_V4: + case Hexagon::CMPhEQrr_shl_V4: + case Hexagon::CMPhEQrr_xor_V4: + case Hexagon::CMPhGTUrr_V4: + case Hexagon::CMPhGTrr_shl_V4: + case Hexagon::CMPLTUrr: + case Hexagon::CMPLTrr: + SrcReg2 = MI->getOperand(2).getReg(); + return true; + + case Hexagon::CMPEQri: + case Hexagon::CMPGTUri: + case Hexagon::CMPGTri: + case Hexagon::CMPbEQri_V4: + case Hexagon::CMPbGTUri_V4: + case Hexagon::CMPhEQri_V4: + case Hexagon::CMPhGTUri_V4: + SrcReg2 = 0; + Value = MI->getOperand(2).getImm(); + return true; + } + + return false; +} + + void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -344,6 +426,18 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg); return; } + if (Hexagon::PredRegsRegClass.contains(SrcReg) && + Hexagon::IntRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR_RsPd), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + Hexagon::PredRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR_PdRs), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } llvm_unreachable("Unimplemented"); } @@ -608,30 +702,6 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { case Hexagon::STriw_abs_setimm_V4: // V4 global address load. - case Hexagon::LDrid_GP_cPt_V4 : - case Hexagon::LDrid_GP_cNotPt_V4 : - case Hexagon::LDrid_GP_cdnPt_V4 : - case Hexagon::LDrid_GP_cdnNotPt_V4 : - case Hexagon::LDrib_GP_cPt_V4 : - case Hexagon::LDrib_GP_cNotPt_V4 : - case Hexagon::LDrib_GP_cdnPt_V4 : - case Hexagon::LDrib_GP_cdnNotPt_V4 : - case Hexagon::LDriub_GP_cPt_V4 : - case Hexagon::LDriub_GP_cNotPt_V4 : - case Hexagon::LDriub_GP_cdnPt_V4 : - case Hexagon::LDriub_GP_cdnNotPt_V4 : - case Hexagon::LDrih_GP_cPt_V4 : - case Hexagon::LDrih_GP_cNotPt_V4 : - case Hexagon::LDrih_GP_cdnPt_V4 : - case Hexagon::LDrih_GP_cdnNotPt_V4 : - case Hexagon::LDriuh_GP_cPt_V4 : - case Hexagon::LDriuh_GP_cNotPt_V4 : - case Hexagon::LDriuh_GP_cdnPt_V4 : - case Hexagon::LDriuh_GP_cdnNotPt_V4 : - case Hexagon::LDriw_GP_cPt_V4 : - case Hexagon::LDriw_GP_cNotPt_V4 : - case Hexagon::LDriw_GP_cdnPt_V4 : - case Hexagon::LDriw_GP_cdnNotPt_V4 : case Hexagon::LDd_GP_cPt_V4 : case Hexagon::LDd_GP_cNotPt_V4 : case Hexagon::LDd_GP_cdnPt_V4 : @@ -658,22 +728,6 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { case Hexagon::LDw_GP_cdnNotPt_V4 : // V4 global address store. - case Hexagon::STrid_GP_cPt_V4 : - case Hexagon::STrid_GP_cNotPt_V4 : - case Hexagon::STrid_GP_cdnPt_V4 : - case Hexagon::STrid_GP_cdnNotPt_V4 : - case Hexagon::STrib_GP_cPt_V4 : - case Hexagon::STrib_GP_cNotPt_V4 : - case Hexagon::STrib_GP_cdnPt_V4 : - case Hexagon::STrib_GP_cdnNotPt_V4 : - case Hexagon::STrih_GP_cPt_V4 : - case Hexagon::STrih_GP_cNotPt_V4 : - case Hexagon::STrih_GP_cdnPt_V4 : - case Hexagon::STrih_GP_cdnNotPt_V4 : - case Hexagon::STriw_GP_cPt_V4 : - case Hexagon::STriw_GP_cNotPt_V4 : - case Hexagon::STriw_GP_cdnPt_V4 : - case Hexagon::STriw_GP_cdnNotPt_V4 : case Hexagon::STd_GP_cPt_V4 : case Hexagon::STd_GP_cNotPt_V4 : case Hexagon::STd_GP_cdnPt_V4 : @@ -692,18 +746,6 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { case Hexagon::STw_GP_cdnNotPt_V4 : // V4 predicated global address new value store. - case Hexagon::STrib_GP_cPt_nv_V4 : - case Hexagon::STrib_GP_cNotPt_nv_V4 : - case Hexagon::STrib_GP_cdnPt_nv_V4 : - case Hexagon::STrib_GP_cdnNotPt_nv_V4 : - case Hexagon::STrih_GP_cPt_nv_V4 : - case Hexagon::STrih_GP_cNotPt_nv_V4 : - case Hexagon::STrih_GP_cdnPt_nv_V4 : - case Hexagon::STrih_GP_cdnNotPt_nv_V4 : - case Hexagon::STriw_GP_cPt_nv_V4 : - case Hexagon::STriw_GP_cNotPt_nv_V4 : - case Hexagon::STriw_GP_cdnPt_nv_V4 : - case Hexagon::STriw_GP_cdnNotPt_nv_V4 : case Hexagon::STb_GP_cPt_nv_V4 : case Hexagon::STb_GP_cNotPt_nv_V4 : case Hexagon::STb_GP_cdnPt_nv_V4 : @@ -1095,7 +1137,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STrib_indexed_nv_V4: case Hexagon::STrib_indexed_shl_nv_V4: case Hexagon::STrib_shl_nv_V4: - case Hexagon::STrib_GP_nv_V4: case Hexagon::STb_GP_nv_V4: case Hexagon::POST_STbri_nv_V4: case Hexagon::STrib_cPt_nv_V4: @@ -1118,10 +1159,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STb_GP_cNotPt_nv_V4: case Hexagon::STb_GP_cdnPt_nv_V4: case Hexagon::STb_GP_cdnNotPt_nv_V4: - case Hexagon::STrib_GP_cPt_nv_V4: - case Hexagon::STrib_GP_cNotPt_nv_V4: - case Hexagon::STrib_GP_cdnPt_nv_V4: - case Hexagon::STrib_GP_cdnNotPt_nv_V4: case Hexagon::STrib_abs_nv_V4: case Hexagon::STrib_abs_cPt_nv_V4: case Hexagon::STrib_abs_cdnPt_nv_V4: @@ -1138,7 +1175,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STrih_indexed_nv_V4: case Hexagon::STrih_indexed_shl_nv_V4: case Hexagon::STrih_shl_nv_V4: - case Hexagon::STrih_GP_nv_V4: case Hexagon::STh_GP_nv_V4: case Hexagon::POST_SThri_nv_V4: case Hexagon::STrih_cPt_nv_V4: @@ -1161,10 +1197,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STh_GP_cNotPt_nv_V4: case Hexagon::STh_GP_cdnPt_nv_V4: case Hexagon::STh_GP_cdnNotPt_nv_V4: - case Hexagon::STrih_GP_cPt_nv_V4: - case Hexagon::STrih_GP_cNotPt_nv_V4: - case Hexagon::STrih_GP_cdnPt_nv_V4: - case Hexagon::STrih_GP_cdnNotPt_nv_V4: case Hexagon::STrih_abs_nv_V4: case Hexagon::STrih_abs_cPt_nv_V4: case Hexagon::STrih_abs_cdnPt_nv_V4: @@ -1181,7 +1213,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STriw_indexed_nv_V4: case Hexagon::STriw_indexed_shl_nv_V4: case Hexagon::STriw_shl_nv_V4: - case Hexagon::STriw_GP_nv_V4: case Hexagon::STw_GP_nv_V4: case Hexagon::POST_STwri_nv_V4: case Hexagon::STriw_cPt_nv_V4: @@ -1204,10 +1235,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STw_GP_cNotPt_nv_V4: case Hexagon::STw_GP_cdnPt_nv_V4: case Hexagon::STw_GP_cdnNotPt_nv_V4: - case Hexagon::STriw_GP_cPt_nv_V4: - case Hexagon::STriw_GP_cNotPt_nv_V4: - case Hexagon::STriw_GP_cdnPt_nv_V4: - case Hexagon::STriw_GP_cdnNotPt_nv_V4: case Hexagon::STriw_abs_nv_V4: case Hexagon::STriw_abs_cPt_nv_V4: case Hexagon::STriw_abs_cdnPt_nv_V4: @@ -1500,26 +1527,11 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { return Hexagon::JMPR_cPt; // V4 indexed+scaled load. - case Hexagon::LDrid_indexed_cPt_V4: - return Hexagon::LDrid_indexed_cNotPt_V4; - case Hexagon::LDrid_indexed_cNotPt_V4: - return Hexagon::LDrid_indexed_cPt_V4; - case Hexagon::LDrid_indexed_shl_cPt_V4: return Hexagon::LDrid_indexed_shl_cNotPt_V4; case Hexagon::LDrid_indexed_shl_cNotPt_V4: return Hexagon::LDrid_indexed_shl_cPt_V4; - case Hexagon::LDrib_indexed_cPt_V4: - return Hexagon::LDrib_indexed_cNotPt_V4; - case Hexagon::LDrib_indexed_cNotPt_V4: - return Hexagon::LDrib_indexed_cPt_V4; - - case Hexagon::LDriub_indexed_cPt_V4: - return Hexagon::LDriub_indexed_cNotPt_V4; - case Hexagon::LDriub_indexed_cNotPt_V4: - return Hexagon::LDriub_indexed_cPt_V4; - case Hexagon::LDrib_indexed_shl_cPt_V4: return Hexagon::LDrib_indexed_shl_cNotPt_V4; case Hexagon::LDrib_indexed_shl_cNotPt_V4: @@ -1530,16 +1542,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { case Hexagon::LDriub_indexed_shl_cNotPt_V4: return Hexagon::LDriub_indexed_shl_cPt_V4; - case Hexagon::LDrih_indexed_cPt_V4: - return Hexagon::LDrih_indexed_cNotPt_V4; - case Hexagon::LDrih_indexed_cNotPt_V4: - return Hexagon::LDrih_indexed_cPt_V4; - - case Hexagon::LDriuh_indexed_cPt_V4: - return Hexagon::LDriuh_indexed_cNotPt_V4; - case Hexagon::LDriuh_indexed_cNotPt_V4: - return Hexagon::LDriuh_indexed_cPt_V4; - case Hexagon::LDrih_indexed_shl_cPt_V4: return Hexagon::LDrih_indexed_shl_cNotPt_V4; case Hexagon::LDrih_indexed_shl_cNotPt_V4: @@ -1550,11 +1552,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { case Hexagon::LDriuh_indexed_shl_cNotPt_V4: return Hexagon::LDriuh_indexed_shl_cPt_V4; - case Hexagon::LDriw_indexed_cPt_V4: - return Hexagon::LDriw_indexed_cNotPt_V4; - case Hexagon::LDriw_indexed_cNotPt_V4: - return Hexagon::LDriw_indexed_cPt_V4; - case Hexagon::LDriw_indexed_shl_cPt_V4: return Hexagon::LDriw_indexed_shl_cNotPt_V4; case Hexagon::LDriw_indexed_shl_cNotPt_V4: @@ -1680,26 +1677,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { case Hexagon::STw_GP_cNotPt_V4: return Hexagon::STw_GP_cPt_V4; - case Hexagon::STrid_GP_cPt_V4: - return Hexagon::STrid_GP_cNotPt_V4; - case Hexagon::STrid_GP_cNotPt_V4: - return Hexagon::STrid_GP_cPt_V4; - - case Hexagon::STrib_GP_cPt_V4: - return Hexagon::STrib_GP_cNotPt_V4; - case Hexagon::STrib_GP_cNotPt_V4: - return Hexagon::STrib_GP_cPt_V4; - - case Hexagon::STrih_GP_cPt_V4: - return Hexagon::STrih_GP_cNotPt_V4; - case Hexagon::STrih_GP_cNotPt_V4: - return Hexagon::STrih_GP_cPt_V4; - - case Hexagon::STriw_GP_cPt_V4: - return Hexagon::STriw_GP_cNotPt_V4; - case Hexagon::STriw_GP_cNotPt_V4: - return Hexagon::STriw_GP_cPt_V4; - // Load. case Hexagon::LDrid_cPt: return Hexagon::LDrid_cNotPt; @@ -1965,75 +1942,26 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { Hexagon::JMPR_cNotPt; // V4 indexed+scaled load. - case Hexagon::LDrid_indexed_V4: - return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 : - Hexagon::LDrid_indexed_cNotPt_V4; case Hexagon::LDrid_indexed_shl_V4: return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 : Hexagon::LDrid_indexed_shl_cNotPt_V4; - case Hexagon::LDrib_indexed_V4: - return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 : - Hexagon::LDrib_indexed_cNotPt_V4; - case Hexagon::LDriub_indexed_V4: - return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 : - Hexagon::LDriub_indexed_cNotPt_V4; - case Hexagon::LDriub_ae_indexed_V4: - return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 : - Hexagon::LDriub_indexed_cNotPt_V4; case Hexagon::LDrib_indexed_shl_V4: return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 : Hexagon::LDrib_indexed_shl_cNotPt_V4; case Hexagon::LDriub_indexed_shl_V4: return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 : Hexagon::LDriub_indexed_shl_cNotPt_V4; - case Hexagon::LDriub_ae_indexed_shl_V4: - return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 : - Hexagon::LDriub_indexed_shl_cNotPt_V4; - case Hexagon::LDrih_indexed_V4: - return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 : - Hexagon::LDrih_indexed_cNotPt_V4; - case Hexagon::LDriuh_indexed_V4: - return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 : - Hexagon::LDriuh_indexed_cNotPt_V4; - case Hexagon::LDriuh_ae_indexed_V4: - return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 : - Hexagon::LDriuh_indexed_cNotPt_V4; case Hexagon::LDrih_indexed_shl_V4: return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 : Hexagon::LDrih_indexed_shl_cNotPt_V4; case Hexagon::LDriuh_indexed_shl_V4: return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 : Hexagon::LDriuh_indexed_shl_cNotPt_V4; - case Hexagon::LDriuh_ae_indexed_shl_V4: - return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 : - Hexagon::LDriuh_indexed_shl_cNotPt_V4; - case Hexagon::LDriw_indexed_V4: - return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 : - Hexagon::LDriw_indexed_cNotPt_V4; case Hexagon::LDriw_indexed_shl_V4: return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 : Hexagon::LDriw_indexed_shl_cNotPt_V4; // V4 Load from global address - case Hexagon::LDrid_GP_V4: - return !invertPredicate ? Hexagon::LDrid_GP_cPt_V4 : - Hexagon::LDrid_GP_cNotPt_V4; - case Hexagon::LDrib_GP_V4: - return !invertPredicate ? Hexagon::LDrib_GP_cPt_V4 : - Hexagon::LDrib_GP_cNotPt_V4; - case Hexagon::LDriub_GP_V4: - return !invertPredicate ? Hexagon::LDriub_GP_cPt_V4 : - Hexagon::LDriub_GP_cNotPt_V4; - case Hexagon::LDrih_GP_V4: - return !invertPredicate ? Hexagon::LDrih_GP_cPt_V4 : - Hexagon::LDrih_GP_cNotPt_V4; - case Hexagon::LDriuh_GP_V4: - return !invertPredicate ? Hexagon::LDriuh_GP_cPt_V4 : - Hexagon::LDriuh_GP_cNotPt_V4; - case Hexagon::LDriw_GP_V4: - return !invertPredicate ? Hexagon::LDriw_GP_cPt_V4 : - Hexagon::LDriw_GP_cNotPt_V4; - case Hexagon::LDd_GP_V4: return !invertPredicate ? Hexagon::LDd_GP_cPt_V4 : Hexagon::LDd_GP_cNotPt_V4; @@ -2116,19 +2044,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { Hexagon::STrid_indexed_shl_cNotPt_V4; // V4 Store to global address - case Hexagon::STrid_GP_V4: - return !invertPredicate ? Hexagon::STrid_GP_cPt_V4 : - Hexagon::STrid_GP_cNotPt_V4; - case Hexagon::STrib_GP_V4: - return !invertPredicate ? Hexagon::STrib_GP_cPt_V4 : - Hexagon::STrib_GP_cNotPt_V4; - case Hexagon::STrih_GP_V4: - return !invertPredicate ? Hexagon::STrih_GP_cPt_V4 : - Hexagon::STrih_GP_cNotPt_V4; - case Hexagon::STriw_GP_V4: - return !invertPredicate ? Hexagon::STriw_GP_cPt_V4 : - Hexagon::STriw_GP_cNotPt_V4; - case Hexagon::STd_GP_V4: return !invertPredicate ? Hexagon::STd_GP_cPt_V4 : Hexagon::STd_GP_cNotPt_V4; @@ -2215,38 +2130,141 @@ PredicateInstruction(MachineInstr *MI, assert (isPredicable(MI) && "Expected predicable instruction"); bool invertJump = (!Cond.empty() && Cond[0].isImm() && (Cond[0].getImm() == 0)); + + // This will change MI's opcode to its predicate version. + // However, its operand list is still the old one, i.e. the + // non-predicate one. MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump))); - // - // This assumes that the predicate is always the first operand - // in the set of inputs. - // - MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); - int oper; - for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) { - MachineOperand MO = MI->getOperand(oper); - if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) { - break; - } - if (MO.isReg()) { - MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(), - MO.isImplicit(), MO.isKill(), - MO.isDead(), MO.isUndef(), - MO.isDebug()); - } else if (MO.isImm()) { - MI->getOperand(oper+1).ChangeToImmediate(MO.getImm()); - } else { - llvm_unreachable("Unexpected operand type"); + int oper = -1; + unsigned int GAIdx = 0; + + // Indicates whether the current MI has a GlobalAddress operand + bool hasGAOpnd = false; + std::vector<MachineOperand> tmpOpnds; + + // Indicates whether we need to shift operands to right. + bool needShift = true; + + // The predicate is ALWAYS the FIRST input operand !!! + if (MI->getNumOperands() == 0) { + // The non-predicate version of MI does not take any operands, + // i.e. no outs and no ins. In this condition, the predicate + // operand will be directly placed at Operands[0]. No operand + // shift is needed. + // Example: BARRIER + needShift = false; + oper = -1; + } + else if ( MI->getOperand(MI->getNumOperands()-1).isReg() + && MI->getOperand(MI->getNumOperands()-1).isDef() + && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) { + // The non-predicate version of MI does not have any input operands. + // In this condition, we extend the length of Operands[] by one and + // copy the original last operand to the newly allocated slot. + // At this moment, it is just a place holder. Later, we will put + // predicate operand directly into it. No operand shift is needed. + // Example: r0=BARRIER (this is a faked insn used here for illustration) + MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); + needShift = false; + oper = MI->getNumOperands() - 2; + } + else { + // We need to right shift all input operands by one. Duplicate the + // last operand into the newly allocated slot. + MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); + } + + if (needShift) + { + // Operands[ MI->getNumOperands() - 2 ] has been copied into + // Operands[ MI->getNumOperands() - 1 ], so we start from + // Operands[ MI->getNumOperands() - 3 ]. + // oper is a signed int. + // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1. + for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) + { + MachineOperand &MO = MI->getOperand(oper); + + // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7] + // <Def0> <Def1> <Use0> <Use1> <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1> + // /\~ + // /||\~ + // || + // Predicate Operand here + if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) { + break; + } + if (MO.isReg()) { + MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(), + MO.isImplicit(), MO.isKill(), + MO.isDead(), MO.isUndef(), + MO.isDebug()); + } + else if (MO.isImm()) { + MI->getOperand(oper+1).ChangeToImmediate(MO.getImm()); + } + else if (MO.isGlobal()) { + // MI can not have more than one GlobalAddress operand. + assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd"); + + // There is no member function called "ChangeToGlobalAddress" in the + // MachineOperand class (not like "ChangeToRegister" and + // "ChangeToImmediate"). So we have to remove them from Operands[] list + // first, and then add them back after we have inserted the predicate + // operand. tmpOpnds[] is to remember these operands before we remove + // them. + tmpOpnds.push_back(MO); + + // Operands[oper] is a GlobalAddress operand; + // Operands[oper+1] has been copied into Operands[oper+2]; + hasGAOpnd = true; + GAIdx = oper; + continue; + } + else { + assert(false && "Unexpected operand type"); + } } } int regPos = invertJump ? 1 : 0; MachineOperand PredMO = Cond[regPos]; + + // [oper] now points to the last explicit Def. Predicate operand must be + // located at [oper+1]. See diagram above. + // This assumes that the predicate is always the first operand, + // i.e. Operands[0+numResults], in the set of inputs + // It is better to have an assert here to check this. But I don't know how + // to write this assert because findFirstPredOperandIdx() would return -1 + if (oper < -1) oper = -1; MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(), PredMO.isImplicit(), PredMO.isKill(), PredMO.isDead(), PredMO.isUndef(), PredMO.isDebug()); + if (hasGAOpnd) + { + unsigned int i; + + // Operands[GAIdx] is the original GlobalAddress operand, which is + // already copied into tmpOpnds[0]. + // Operands[GAIdx] now stores a copy of Operands[GAIdx-1] + // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2], + // so we start from [GAIdx+2] + for (i = GAIdx + 2; i < MI->getNumOperands(); ++i) + tmpOpnds.push_back(MI->getOperand(i)); + + // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ] + // It is very important that we always remove from the end of Operands[] + // MI->getNumOperands() is at least 2 if program goes to here. + for (i = MI->getNumOperands() - 1; i > GAIdx; --i) + MI->RemoveOperand(i); + + for (i = 0; i < tmpOpnds.size(); ++i) + MI->addOperand(tmpOpnds[i]); + } + return true; } @@ -2352,7 +2370,9 @@ isValidOffset(const int Opcode, const int Offset) const { switch(Opcode) { case Hexagon::LDriw: + case Hexagon::LDriw_indexed: case Hexagon::LDriw_f: + case Hexagon::STriw_indexed: case Hexagon::STriw: case Hexagon::STriw_f: assert((Offset % 4 == 0) && "Offset has incorrect alignment"); @@ -2360,8 +2380,10 @@ isValidOffset(const int Opcode, const int Offset) const { (Offset <= Hexagon_MEMW_OFFSET_MAX); case Hexagon::LDrid: + case Hexagon::LDrid_indexed: case Hexagon::LDrid_f: case Hexagon::STrid: + case Hexagon::STrid_indexed: case Hexagon::STrid_f: assert((Offset % 8 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMD_OFFSET_MIN) && @@ -2435,6 +2457,9 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::LDriw_pred: return true; + case Hexagon::LOOP0_i: + return isUInt<10>(Offset); + // INLINEASM is very special. case Hexagon::INLINEASM: return true; @@ -2643,28 +2668,16 @@ isConditionalLoad (const MachineInstr* MI) const { case Hexagon::POST_LDriub_cPt : case Hexagon::POST_LDriub_cNotPt : return QRI.Subtarget.hasV4TOps(); - case Hexagon::LDrid_indexed_cPt_V4 : - case Hexagon::LDrid_indexed_cNotPt_V4 : case Hexagon::LDrid_indexed_shl_cPt_V4 : case Hexagon::LDrid_indexed_shl_cNotPt_V4 : - case Hexagon::LDrib_indexed_cPt_V4 : - case Hexagon::LDrib_indexed_cNotPt_V4 : case Hexagon::LDrib_indexed_shl_cPt_V4 : case Hexagon::LDrib_indexed_shl_cNotPt_V4 : - case Hexagon::LDriub_indexed_cPt_V4 : - case Hexagon::LDriub_indexed_cNotPt_V4 : case Hexagon::LDriub_indexed_shl_cPt_V4 : case Hexagon::LDriub_indexed_shl_cNotPt_V4 : - case Hexagon::LDrih_indexed_cPt_V4 : - case Hexagon::LDrih_indexed_cNotPt_V4 : case Hexagon::LDrih_indexed_shl_cPt_V4 : case Hexagon::LDrih_indexed_shl_cNotPt_V4 : - case Hexagon::LDriuh_indexed_cPt_V4 : - case Hexagon::LDriuh_indexed_cNotPt_V4 : case Hexagon::LDriuh_indexed_shl_cPt_V4 : case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : - case Hexagon::LDriw_indexed_cPt_V4 : - case Hexagon::LDriw_indexed_cNotPt_V4 : case Hexagon::LDriw_indexed_shl_cPt_V4 : case Hexagon::LDriw_indexed_shl_cNotPt_V4 : return QRI.Subtarget.hasV4TOps(); @@ -2747,14 +2760,6 @@ isConditionalStore (const MachineInstr* MI) const { return QRI.Subtarget.hasV4TOps(); // V4 global address store before promoting to dot new. - case Hexagon::STrid_GP_cPt_V4 : - case Hexagon::STrid_GP_cNotPt_V4 : - case Hexagon::STrib_GP_cPt_V4 : - case Hexagon::STrib_GP_cNotPt_V4 : - case Hexagon::STrih_GP_cPt_V4 : - case Hexagon::STrih_GP_cNotPt_V4 : - case Hexagon::STriw_GP_cPt_V4 : - case Hexagon::STriw_GP_cNotPt_V4 : case Hexagon::STd_GP_cPt_V4 : case Hexagon::STd_GP_cNotPt_V4 : case Hexagon::STb_GP_cPt_V4 : diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 29e3eb1..4e36dfb 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -66,6 +66,10 @@ public: const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; + virtual bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 8b183b9..082772a 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -808,7 +808,7 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], // JR + //===----------------------------------------------------------------------===// def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; // Jump to address from register. let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1, @@ -1195,57 +1195,65 @@ let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { //===----------------------------------------------------------------------===// // Multiply and use lower result. // Rd=+mpyi(Rs,#u8) -def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), +let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in +def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Ext:$src2), "$dst =+ mpyi($src1, #$src2)", [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - u8ImmPred:$src2))]>; + u8ExtPred:$src2))]>; // Rd=-mpyi(Rs,#u8) -def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2), +def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), "$dst =- mpyi($src1, #$src2)", - [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - n8ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (ineg (mul (i32 IntRegs:$src1), + u8ImmPred:$src2)))]>; // Rd=mpyi(Rs,#m9) // s9 is NOT the same as m9 - but it works.. so far. // Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8) // depending on the value of m9. See Arch Spec. -def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2), +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9, +CextOpcode = "MPYI", InputType = "imm" in +def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2), "$dst = mpyi($src1, #$src2)", [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - s9ImmPred:$src2))]>; + s9ExtPred:$src2))]>, ImmRegRel; // Rd=mpyi(Rs,Rt) +let CextOpcode = "MPYI", InputType = "reg" in def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = mpyi($src1, $src2)", [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; + (i32 IntRegs:$src2)))]>, ImmRegRel; // Rx+=mpyi(Rs,#u8) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8, +CextOpcode = "MPYI_acc", InputType = "imm" in def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3), + (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3), "$dst += mpyi($src2, #$src3)", [(set (i32 IntRegs:$dst), - (add (mul (i32 IntRegs:$src2), u8ImmPred:$src3), + (add (mul (i32 IntRegs:$src2), u8ExtPred:$src3), (i32 IntRegs:$src1)))], - "$src1 = $dst">; + "$src1 = $dst">, ImmRegRel; // Rx+=mpyi(Rs,Rt) +let CextOpcode = "MPYI_acc", InputType = "reg" in def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst += mpyi($src2, $src3)", [(set (i32 IntRegs:$dst), (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), (i32 IntRegs:$src1)))], - "$src1 = $dst">; + "$src1 = $dst">, ImmRegRel; // Rx-=mpyi(Rs,#u8) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8 in def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3), + (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3), "$dst -= mpyi($src2, #$src3)", [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), - u8ImmPred:$src3)))], + u8ExtPred:$src3)))], "$src1 = $dst">; // Multiply and use upper result. @@ -1314,7 +1322,7 @@ def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, // Rxx-=mpyu(Rs,Rt) def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst += mpyu($src2, $src3)", + "$dst -= mpyu($src2, $src3)", [(set (i64 DoubleRegs:$dst), (sub (i64 DoubleRegs:$src1), (mul (i64 (anyext (i32 IntRegs:$src2))), @@ -1322,37 +1330,43 @@ def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst), "$src1 = $dst">; +let InputType = "reg", CextOpcode = "ADD_acc" in def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst += add($src2, $src3)", [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)), (i32 IntRegs:$src1)))], - "$src1 = $dst">; + "$src1 = $dst">, ImmRegRel; +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, +InputType = "imm", CextOpcode = "ADD_acc" in def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, - IntRegs:$src2, s8Imm:$src3), + IntRegs:$src2, s8Ext:$src3), "$dst += add($src2, #$src3)", [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2), - s8ImmPred:$src3), + s8_16ExtPred:$src3), (i32 IntRegs:$src1)))], - "$src1 = $dst">; + "$src1 = $dst">, ImmRegRel; +let CextOpcode = "SUB_acc", InputType = "reg" in def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst -= add($src2, $src3)", [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), (i32 IntRegs:$src3))))], - "$src1 = $dst">; + "$src1 = $dst">, ImmRegRel; +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8, +CextOpcode = "SUB_acc", InputType = "imm" in def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, - IntRegs:$src2, s8Imm:$src3), + IntRegs:$src2, s8Ext:$src3), "$dst -= add($src2, #$src3)", [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), - s8ImmPred:$src3)))], - "$src1 = $dst">; + s8_16ExtPred:$src3)))], + "$src1 = $dst">, ImmRegRel; //===----------------------------------------------------------------------===// // MTYPE/MPYH - @@ -1405,35 +1419,71 @@ def STd_GP : STInst2<(outs), []>, Requires<[NoV4T]>; -let hasCtrlDep = 1, isPredicable = 1 in -def POST_STdri : STInstPI<(outs IntRegs:$dst), - (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset), - "memd($src2++#$offset) = $src1", - [(set IntRegs:$dst, - (post_store (i64 DoubleRegs:$src1), (i32 IntRegs:$src2), - s4_3ImmPred:$offset))], - "$src2 = $dst">; +//===----------------------------------------------------------------------===// +// Post increment store +//===----------------------------------------------------------------------===// -// if ([!]Pv) memd(Rx++#s4:3)=Rtt -// if (Pv) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, neverHasSideEffects = 1, - isPredicated = 1 in -def POST_STdri_cPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, - s4_3Imm:$offset), - "if ($src1) memd($src3++#$offset) = $src2", - [], - "$src3 = $dst">; - -// if (!Pv) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, neverHasSideEffects = 1, isPredicated = 1, - isPredicated = 1 in -def POST_STdri_cNotPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, - s4_3Imm:$offset), - "if (!$src1) memd($src3++#$offset) = $src2", +multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, + bit isNot, bit isPredNew> { + let PNewValue = !if(isPredNew, "new", "") in + def NAME : STInst2PI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2++#$offset) = $src3", [], - "$src3 = $dst">; + "$src2 = $dst">; +} + +multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC, + Operand ImmOp, bit PredNot> { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME# : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>; + // Predicate new + let Predicates = [HasV4T], validSubTargets = HasV4SubT in + defm _cdn#NAME#_V4 : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>; + } +} + +let hasCtrlDep = 1, isNVStorable = 1, neverHasSideEffects = 1 in +multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC, + Operand ImmOp> { + + let hasCtrlDep = 1, BaseOpcode = "POST_"#BaseOp in { + let isPredicable = 1 in + def NAME : STInst2PI<(outs IntRegs:$dst), + (ins IntRegs:$src1, ImmOp:$offset, RC:$src2), + #mnemonic#"($src1++#$offset) = $src2", + [], + "$src1 = $dst">; + + let isPredicated = 1 in { + defm Pt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 0 >; + defm NotPt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 1 >; + } + } +} + +defm POST_STbri: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel; +defm POST_SThri: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel; +defm POST_STwri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; + +let isNVStorable = 0 in +defm POST_STdri: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm>, AddrModeRel; + +def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2, + s4_3ImmPred:$offset), + (POST_STbri IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>; + +def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2, + s4_3ImmPred:$offset), + (POST_SThri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>; + +def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset), + (POST_STwri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>; + +def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2, + s4_3ImmPred:$offset), + (POST_STdri IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>; //===----------------------------------------------------------------------===// // multiclass for the store instructions with MEMri operand. @@ -1595,32 +1645,6 @@ def STb_GP : STInst2<(outs), []>, Requires<[NoV4T]>; -// memb(Rx++#s4:0)=Rt -let hasCtrlDep = 1, isPredicable = 1 in -def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1, - IntRegs:$src2, - s4Imm:$offset), - "memb($src2++#$offset) = $src1", - [(set IntRegs:$dst, - (post_truncsti8 (i32 IntRegs:$src1), (i32 IntRegs:$src2), - s4_0ImmPred:$offset))], - "$src2 = $dst">; - -// if ([!]Pv) memb(Rx++#s4:0)=Rt -// if (Pv) memb(Rx++#s4:0)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_STbri_cPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if ($src1) memb($src3++#$offset) = $src2", - [],"$src3 = $dst">; - -// if (!Pv) memb(Rx++#s4:0)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_STbri_cNotPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if (!$src1) memb($src3++#$offset) = $src2", - [],"$src3 = $dst">; - let neverHasSideEffects = 1 in def STrih_GP : STInst2<(outs), (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), @@ -1636,31 +1660,6 @@ def STh_GP : STInst2<(outs), Requires<[NoV4T]>; // memh(Rx++#s4:1)=Rt.H -// memh(Rx++#s4:1)=Rt -let hasCtrlDep = 1, isPredicable = 1 in -def POST_SThri : STInstPI<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset), - "memh($src2++#$offset) = $src1", - [(set IntRegs:$dst, - (post_truncsti16 (i32 IntRegs:$src1), (i32 IntRegs:$src2), - s4_1ImmPred:$offset))], - "$src2 = $dst">; - -// if ([!]Pv) memh(Rx++#s4:1)=Rt -// if (Pv) memh(Rx++#s4:1)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_SThri_cPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if ($src1) memh($src3++#$offset) = $src2", - [],"$src3 = $dst">; - -// if (!Pv) memh(Rx++#s4:1)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_SThri_cNotPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if (!$src1) memh($src3++#$offset) = $src2", - [],"$src3 = $dst">; - // Store word. // Store predicate. @@ -1684,32 +1683,6 @@ def STw_GP : STInst2<(outs), []>, Requires<[NoV4T]>; -let hasCtrlDep = 1, isPredicable = 1 in -def POST_STwri : STInstPI<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset), - "memw($src2++#$offset) = $src1", - [(set IntRegs:$dst, - (post_store (i32 IntRegs:$src1), (i32 IntRegs:$src2), - s4_2ImmPred:$offset))], - "$src2 = $dst">; - -// if ([!]Pv) memw(Rx++#s4:2)=Rt -// if (Pv) memw(Rx++#s4:2)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_STwri_cPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1) memw($src3++#$offset) = $src2", - [],"$src3 = $dst">; - -// if (!Pv) memw(Rx++#s4:2)=Rt -let hasCtrlDep = 1, isPredicated = 1 in -def POST_STwri_cNotPt : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1) memw($src3++#$offset) = $src2", - [],"$src3 = $dst">; - - - // Allocate stack frame. let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in { def ALLOCFRAME : STInst2<(outs), @@ -1912,7 +1885,7 @@ def SDHexagonBARRIER: SDTypeProfile<0, 0, []>; def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER, [SDNPHasChain]>; -let hasSideEffects = 1, isHexagonSolo = 1 in +let hasSideEffects = 1, isSolo = 1 in def BARRIER : SYSInst<(outs), (ins), "barrier", [(HexagonBARRIER)]>; @@ -1987,9 +1960,9 @@ def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2), let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1, Defs = [PC, LC0], Uses = [SA0, LC0] in { -def ENDLOOP0 : Marker<(outs), (ins brtarget:$offset), - ":endloop0", - []>; +def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset), + ":endloop0", + []>; } // Support for generating global address. @@ -2852,23 +2825,42 @@ def : Pat <(i32 (zext (i1 PredRegs:$src1))), // i1 -> i64 def : Pat <(i64 (zext (i1 PredRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>; + (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, + Requires<[NoV4T]>; // i32 -> i64 def : Pat <(i64 (zext (i32 IntRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>, + Requires<[NoV4T]>; // i8 -> i64 def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[NoV4T]>; // i16 -> i64 def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[NoV4T]>; // i32 -> i64 def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), (i32 (LDriw ADDRriS11_0:$src1))>; @@ -2889,15 +2881,41 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))), // Any extended 64-bit load. // anyext i32 -> i64 def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; + +// When there is an offset we should prefer the pattern below over the pattern above. +// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc) +// So this complexity below is comfortably higher to allow for choosing the below. +// If this is not done then we generate addresses such as +// ******************************************** +// r1 = add (r0, #4) +// r1 = memw(r1 + #0) +// instead of +// r1 = memw(r0 + #4) +// ******************************************** +let AddedComplexity = 100 in +def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[NoV4T]>; // anyext i16 -> i64. def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), - (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[NoV4T]>; // Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs). def : Pat<(i64 (zext (i32 IntRegs:$src1))), - (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>; + (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>, + Requires<[NoV4T]>; // Multiply 64-bit unsigned and use upper result. def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 372de9a..e1b2f88 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -21,6 +21,17 @@ def IMMEXT_c : T_Immext<(ins calltarget:$imm)>; def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>; def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>; +// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address. +def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>; + +// Fold (add (CONST32_GP tglobaladdr:$addr) <offset>) into a global address. +def FoldGlobalAddrGP : ComplexPattern<i32, 1, "foldGlobalAddressGP", [], []>; + +def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr), + (HexagonCONST32 node:$addr), [{ + return hasNumUsesBelowThresGA(N->getOperand(0).getNode()); +}]>; + // Hexagon V4 Architecture spec defines 8 instruction classes: // LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the // compiler) @@ -251,6 +262,54 @@ def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst), []>, Requires<[HasV4T]>; +// Rd=cmp.eq(Rs,#s8) +let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2, +isExtentSigned = 1, opExtentBits = 8 in +def V4_A4_rcmpeqi : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, s8Ext:$s8), + "$Rd = cmp.eq($Rs, #$s8)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (seteq (i32 IntRegs:$Rs), + s8ExtPred:$s8)))))]>, + Requires<[HasV4T]>; + +// Preserve the TSTBIT generation +def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), + (i32 IntRegs:$src1))), 0)))), + (i32 (MUX_ii (i1 (TSTBIT_rr (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + 1, 0))>; + +// Interfered with tstbit generation, above pattern preserves, see : tstbit.ll +// Rd=cmp.ne(Rs,#s8) +let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2, +isExtentSigned = 1, opExtentBits = 8 in +def V4_A4_rcmpneqi : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, s8Ext:$s8), + "$Rd = !cmp.eq($Rs, #$s8)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (setne (i32 IntRegs:$Rs), + s8ExtPred:$s8)))))]>, + Requires<[HasV4T]>; + +// Rd=cmp.eq(Rs,Rt) +let validSubTargets = HasV4SubT in +def V4_A4_rcmpeq : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = cmp.eq($Rs, $Rt)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (seteq (i32 IntRegs:$Rs), + IntRegs:$Rt)))))]>, + Requires<[HasV4T]>; + +// Rd=cmp.ne(Rs,Rt) +let validSubTargets = HasV4SubT in +def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd), + (ins IntRegs:$Rs, IntRegs:$Rt), + "$Rd = !cmp.eq($Rs, $Rt)", + [(set (i32 IntRegs:$Rd), + (i32 (zext (i1 (setne (i32 IntRegs:$Rs), + IntRegs:$Rt)))))]>, + Requires<[HasV4T]>; //===----------------------------------------------------------------------===// // ALU32 - @@ -280,6 +339,19 @@ def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst), []>, Requires<[HasV4T]>; +def HexagonWrapperCombineRI_V4 : + SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>; +def HexagonWrapperCombineIR_V4 : + SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>; + +def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i), + (COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>, + Requires<[HasV4T]>; + +def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r), + (COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>, + Requires<[HasV4T]>; + let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst), @@ -299,120 +371,95 @@ def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst), // These absolute set addressing mode instructions accept immediate as // an operand. We have duplicated these patterns to take global address. -let neverHasSideEffects = 1 in +let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1, +validSubTargets = HasV4SubT in { def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memd($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memd($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memb(Re=#U6) -let neverHasSideEffects = 1 in def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memb($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memb($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memh(Re=#U6) -let neverHasSideEffects = 1 in def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memh($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memh($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memub(Re=#U6) -let neverHasSideEffects = 1 in def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memub($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memub($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memuh(Re=#U6) -let neverHasSideEffects = 1 in def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memuh($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memuh($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memw(Re=#U6) -let neverHasSideEffects = 1 in def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u6Imm:$addr), - "$dst1 = memw($dst2=#$addr)", + (ins u0AlwaysExt:$addr), + "$dst1 = memw($dst2=##$addr)", []>, Requires<[HasV4T]>; +} // Following patterns are defined for absolute set addressing mode // instruction which take global address as operand. -let neverHasSideEffects = 1 in +let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1, +validSubTargets = HasV4SubT in { def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memd($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memb(Re=#U6) -let neverHasSideEffects = 1 in def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memb($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memh(Re=#U6) -let neverHasSideEffects = 1 in def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memh($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memub(Re=#U6) -let neverHasSideEffects = 1 in def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memub($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memuh(Re=#U6) -let neverHasSideEffects = 1 in def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memuh($dst2=##$addr)", []>, Requires<[HasV4T]>; // Rd=memw(Re=#U6) -let neverHasSideEffects = 1 in def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdress:$addr), + (ins globaladdressExt:$addr), "$dst1 = memw($dst2=##$addr)", []>, Requires<[HasV4T]>; - -// Load doubleword. -// -// Make sure that in post increment load, the first operand is always the post -// increment operand. -// -// Rdd=memd(Rs+Rt<<#u2) -// Special case pattern for indexed load without offset which is easier to -// match. AddedComplexity of this pattern should be lower than base+offset load -// and lower yet than the more generic version with offset/shift below -// Similar approach is taken for all other base+index loads. -let AddedComplexity = 10, isPredicable = 1 in -def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memd($src1+$src2<<#0)", - [(set (i64 DoubleRegs:$dst), - (i64 (load (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; +} // multiclass for load instructions with base + register offset // addressing mode @@ -512,534 +559,42 @@ def : Pat <(i64 (load (add IntRegs:$src1, Requires<[HasV4T]>; } -//// Load doubleword conditionally. -// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2) -// if (Pv) Rd=memd(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memd($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memd(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memd($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memd(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memd($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memd(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memd($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// Rdd=memd(Rt<<#u2+#U6) - -//// Load byte. -// Rd=memb(Rs+Rt<<#u2) -let AddedComplexity = 10, isPredicable = 1 in -def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memb($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (sextloadi8 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 10, isPredicable = 1 in -def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memub($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (zextloadi8 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 10, isPredicable = 1 in -def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memub($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (extloadi8 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 40, isPredicable = 1 in -def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), - "$dst=memub($src1+$src2<<#$offset)", - [(set (i32 IntRegs:$dst), - (i32 (extloadi8 (add (i32 IntRegs:$src1), - (shl (i32 IntRegs:$src2), - u2ImmPred:$offset)))))]>, - Requires<[HasV4T]>; - -//// Load byte conditionally. -// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2) -// if (Pv) Rd=memb(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memb($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memb(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memb($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memb(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memb($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memb(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memb($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -//// Load unsigned byte conditionally. -// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2) -// if (Pv) Rd=memub(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memub($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memub(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memub($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memub(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memub($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memub(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memub($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// Rd=memb(Rt<<#u2+#U6) - -//// Load halfword -// Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 10, isPredicable = 1 in -def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memh($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (sextloadi16 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 10, isPredicable = 1 in -def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memuh($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (zextloadi16 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 10, isPredicable = 1 in -def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memuh($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (extloadi16 (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 40, isPredicable = 1 in -def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), - "$dst=memuh($src1+$src2<<#$offset)", - [(set (i32 IntRegs:$dst), - (i32 (extloadi16 (add (i32 IntRegs:$src1), - (shl (i32 IntRegs:$src2), - u2ImmPred:$offset)))))]>, - Requires<[HasV4T]>; - -//// Load halfword conditionally. -// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2) -// if (Pv) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -//// Load unsigned halfword conditionally. -// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2) -// if (Pv) Rd=memuh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memuh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memuh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memuh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memuh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memuh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memuh($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// Rd=memh(Rt<<#u2+#U6) - -//// Load word. -// Load predicate: Fix for bug 5279. -let neverHasSideEffects = 1 in -def LDriw_pred_V4 : LDInst2<(outs PredRegs:$dst), - (ins MEMri:$addr), - "Error; should not emit", - []>, - Requires<[HasV4T]>; - -// Rd=memw(Re=#U6) - -// Rd=memw(Rs+Rt<<#u2) -let AddedComplexity = 10, isPredicable = 1 in -def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst=memw($src1+$src2<<#0)", - [(set (i32 IntRegs:$dst), - (i32 (load (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))))]>, - Requires<[HasV4T]>; - -//// Load word conditionally. -// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2) -// if (Pv) Rd=memw(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst=memw($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst=memw($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst=memw($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst=memw($src2+$src3<<#0)", - []>, - Requires<[HasV4T]>; - -/// Load from global offset - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDrid_GP_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memd(#$global+$offset)", - []>, - Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrid_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memd(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrid_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memd(##$global+$offset)", - []>, +// 'def pats' for load instruction base + register offset and +// zero immediate value. +let AddedComplexity = 10 in { +def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))), + (LDrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrid_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memd(##$global+$offset)", - []>, +def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))), + (LDrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrid_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memd(##$global+$offset)", - []>, +def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))), + (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, Requires<[HasV4T]>; -let isPredicable = 1, neverHasSideEffects = 1 in -def LDrib_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memb(#$global+$offset)", - []>, +def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))), + (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrib_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memb(##$global+$offset)", - []>, +def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))), + (LDrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrib_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memb(##$global+$offset)", - []>, +def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))), + (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrib_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memb(##$global+$offset)", - []>, +def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))), + (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrib_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memb(##$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDriub_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memub(#$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memub(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memub(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memub(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memub(##$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDrih_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memh(#$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrih_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrih_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrih_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDrih_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDriuh_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memuh(#$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memuh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memuh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memuh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memuh(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let isPredicable = 1, neverHasSideEffects = 1 in -def LDriw_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst=memw(#$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1) $dst=memw(##$global+$offset)", - []>, +def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))), + (LDriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>, Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1) $dst=memw(##$global+$offset)", - []>, - Requires<[HasV4T]>; - - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if ($src1.new) $dst=memw(##$global+$offset)", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def LDriw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), - "if (!$src1.new) $dst=memw(##$global+$offset)", - []>, - Requires<[HasV4T]>; - +} let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst), @@ -1364,82 +919,73 @@ def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), (i32 (LDw_GP_V4 tglobaladdr:$global))>, Requires<[HasV4T]>; -def : Pat <(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memd(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; +// zext i1->i64 +def : Pat <(i64 (zext (i1 PredRegs:$src1))), + (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, + Requires<[HasV4T]>; + +// zext i32->i64 +def : Pat <(i64 (zext (i32 IntRegs:$src1))), + (i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>, + Requires<[HasV4T]>; +// zext i8->i64 +def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// zext i16->i64 +def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// anyext i16->i64 +def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1), + s11_1ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1, + s11_1ExtPred:$offset)))>, + Requires<[HasV4T]>; + +// zext i32->i64 +def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; -// Map from load(globaladdress + x) -> memb(#foo + x) let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; +def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[HasV4T]>; -// Map from load(globaladdress + x) -> memub(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; +// anyext i32->i64 +def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>, + Requires<[HasV4T]>; -// Map from load(globaladdress + x) -> memuh(#foo + x) let AddedComplexity = 100 in -def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; +def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[HasV4T]>; -// Map from load(globaladdress + x) -> memh(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - - -// Map from load(globaladdress + x) -> memuh(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress + x) -> memw(#foo + x) -let AddedComplexity = 100 in -def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[HasV4T]>; //===----------------------------------------------------------------------===// @@ -1457,62 +1003,65 @@ def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global), /// last operand. /// -// memd(Re=#U6)=Rtt +// memd(Re=#U)=Rtt +let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in { def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins DoubleRegs:$src1, u6Imm:$src2), - "memd($dst1=#$src2) = $src1", + (ins DoubleRegs:$src1, u0AlwaysExt:$src2), + "memd($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memb(Re=#U6)=Rs +// memb(Re=#U)=Rs def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, u6Imm:$src2), - "memb($dst1=#$src2) = $src1", + (ins IntRegs:$src1, u0AlwaysExt:$src2), + "memb($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memh(Re=#U6)=Rs +// memh(Re=#U)=Rs def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, u6Imm:$src2), - "memh($dst1=#$src2) = $src1", + (ins IntRegs:$src1, u0AlwaysExt:$src2), + "memh($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memw(Re=#U6)=Rs +// memw(Re=#U)=Rs def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, u6Imm:$src2), - "memw($dst1=#$src2) = $src1", + (ins IntRegs:$src1, u0AlwaysExt:$src2), + "memw($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; +} -// memd(Re=#U6)=Rtt +// memd(Re=#U)=Rtt +let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in { def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins DoubleRegs:$src1, globaladdress:$src2), + (ins DoubleRegs:$src1, globaladdressExt:$src2), "memd($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memb(Re=#U6)=Rs +// memb(Re=#U)=Rs def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, globaladdress:$src2), + (ins IntRegs:$src1, globaladdressExt:$src2), "memb($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memh(Re=#U6)=Rs +// memh(Re=#U)=Rs def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, globaladdress:$src2), + (ins IntRegs:$src1, globaladdressExt:$src2), "memh($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memw(Re=#U6)=Rs +// memw(Re=#U)=Rs def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, globaladdress:$src2), + (ins IntRegs:$src1, globaladdressExt:$src2), "memw($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; - +} // multiclass for store instructions with base + register offset addressing // mode @@ -1632,13 +1181,14 @@ def : Pat<(store (i64 DoubleRegs:$src4), } // memd(Ru<<#u2+#U6)=Rtt -let AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, AddedComplexity = 10, +validSubTargets = HasV4SubT in def STrid_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4), "memd($src1<<#$src2+#$src3) = $src4", [(store (i64 DoubleRegs:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u6ImmPred:$src3))]>, + u0AlwaysExtPred:$src3))]>, Requires<[HasV4T]>; // memd(Rx++#s4:3)=Rtt @@ -1652,34 +1202,12 @@ def STrid_shl_V4 : STInst<(outs), // if ([!]Pv[.new]) memd(#u6)=Rtt // TODO: needs to be implemented. -// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt -// if (Pv) memd(Rx++#s4:3)=Rtt -// if (Pv.new) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, neverHasSideEffects = 1, - isPredicated = 1 in -def POST_STdri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, - s4_3Imm:$offset), - "if ($src1.new) memd($src3++#$offset) = $src2", - [], - "$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memd(Rx++#s4:3)=Rtt -// if (!Pv.new) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, neverHasSideEffects = 1, - isPredicated = 1 in -def POST_STdri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, - s4_3Imm:$offset), - "if (!$src1.new) memd($src3++#$offset) = $src2", - [], - "$src3 = $dst">, - Requires<[HasV4T]>; - - +//===----------------------------------------------------------------------===// // multiclass for store instructions with base + immediate offset // addressing mode and immediate stored value. +// mem[bhw](Rx++#s4:3)=#s8 +// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6 +//===----------------------------------------------------------------------===// multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot, bit isPredNew> { let PNewValue = !if(isPredNew, "new", "") in @@ -1718,9 +1246,9 @@ multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> { let addrMode = BaseImmOffset, InputType = "imm", validSubTargets = HasV4SubT in { - defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel; - defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel; - defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel; + defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel, PredNewRel; + defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel, PredNewRel; + defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel, PredNewRel; } let Predicates = [HasV4T], AddedComplexity = 10 in { @@ -1741,13 +1269,14 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)), Requires<[HasV4T]>; // memb(Ru<<#u2+#U6)=Rt -let AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, +validSubTargets = HasV4SubT in def STrib_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memb($src1<<#$src2+#$src3) = $src4", [(truncstorei8 (i32 IntRegs:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u6ImmPred:$src3))]>, + u0AlwaysExtPred:$src3))]>, Requires<[HasV4T]>; // memb(Rx++#s4:0:circ(Mu))=Rt @@ -1757,30 +1286,6 @@ def STrib_shl_V4 : STInst<(outs), // memb(gp+#u16:0)=Rt -// Store byte conditionally. -// if ([!]Pv[.new]) memb(#u6)=Rt -// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt -// if (Pv) memb(Rx++#s4:0)=Rt -// if (Pv.new) memb(Rx++#s4:0)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if ($src1.new) memb($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memb(Rx++#s4:0)=Rt -// if (!Pv.new) memb(Rx++#s4:0)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if (!$src1.new) memb($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - - // Store halfword. // TODO: needs to be implemented // memh(Re=#U6)=Rt.H @@ -1795,13 +1300,14 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), // memh(Ru<<#u2+#U6)=Rt.H // memh(Ru<<#u2+#U6)=Rt -let AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, +validSubTargets = HasV4SubT in def STrih_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memh($src1<<#$src2+#$src3) = $src4", [(truncstorei16 (i32 IntRegs:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u6ImmPred:$src3))]>, + u0AlwaysExtPred:$src3))]>, Requires<[HasV4T]>; // memh(Rx++#s4:1:circ(Mu))=Rt.H @@ -1823,28 +1329,6 @@ def STrih_shl_V4 : STInst<(outs), // if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H // TODO: Needs to be implemented. -// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt -// if (Pv) memh(Rx++#s4:1)=Rt -// if (Pv.new) memh(Rx++#s4:1)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if ($src1.new) memh($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memh(Rx++#s4:1)=Rt -// if (!Pv.new) memh(Rx++#s4:1)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if (!$src1.new) memh($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - - // Store word. // memw(Re=#U6)=Rt // TODO: Needs to be implemented. @@ -1863,13 +1347,14 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)), Requires<[HasV4T]>; // memw(Ru<<#u2+#U6)=Rt -let AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, +validSubTargets = HasV4SubT in def STriw_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memw($src1<<#$src2+#$src3) = $src4", [(store (i32 IntRegs:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u6ImmPred:$src3))]>, + u0AlwaysExtPred:$src3))]>, Requires<[HasV4T]>; // memw(Rx++#s4:2)=Rt @@ -1880,188 +1365,9 @@ def STriw_shl_V4 : STInst<(outs), // memw(gp+#u16:2)=Rt -// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt -// if (Pv) memw(Rx++#s4:2)=Rt -// if (Pv.new) memw(Rx++#s4:2)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1.new) memw($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memw(Rx++#s4:2)=Rt -// if (!Pv.new) memw(Rx++#s4:2)=Rt -let hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1.new) memw($src3++#$offset) = $src2", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - - -/// store to global address - -let isPredicable = 1, neverHasSideEffects = 1 in -def STrid_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src), - "memd(#$global+$offset) = $src", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrid_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - DoubleRegs:$src2), - "if ($src1) memd(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrid_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - DoubleRegs:$src2), - "if (!$src1) memd(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrid_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - DoubleRegs:$src2), - "if ($src1.new) memd(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrid_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - DoubleRegs:$src2), - "if (!$src1.new) memd(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let isPredicable = 1, neverHasSideEffects = 1 in -def STrib_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memb(#$global+$offset) = $src", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrib_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memb(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrib_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memb(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrib_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memb(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrib_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memb(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let isPredicable = 1, neverHasSideEffects = 1 in -def STrih_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memh(#$global+$offset) = $src", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrih_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memh(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrih_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memh(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrih_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memh(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STrih_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memh(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let isPredicable = 1, neverHasSideEffects = 1 in -def STriw_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memw(#$global+$offset) = $src", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STriw_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memw(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STriw_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memw(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STriw_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memw(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def STriw_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memw(##$global+$offset) = $src2", - []>, - Requires<[HasV4T]>; - // memd(#global)=Rtt -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1, +validSubTargets = HasV4SubT in def STd_GP_V4 : STInst2<(outs), (ins globaladdress:$global, DoubleRegs:$src), "memd(#$global) = $src", @@ -2069,7 +1375,8 @@ def STd_GP_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memd(##global) = Rtt -let neverHasSideEffects = 1, isPredicated = 1 in +let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1, +isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { def STd_GP_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), "if ($src1) memd(##$global) = $src2", @@ -2077,7 +1384,6 @@ def STd_GP_cPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memd(##global) = Rtt -let neverHasSideEffects = 1, isPredicated = 1 in def STd_GP_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), "if (!$src1) memd(##$global) = $src2", @@ -2085,7 +1391,6 @@ def STd_GP_cNotPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memd(##global) = Rtt -let neverHasSideEffects = 1, isPredicated = 1 in def STd_GP_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), "if ($src1.new) memd(##$global) = $src2", @@ -2093,15 +1398,16 @@ def STd_GP_cdnPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memd(##global) = Rtt -let neverHasSideEffects = 1, isPredicated = 1 in def STd_GP_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), "if (!$src1.new) memd(##$global) = $src2", []>, Requires<[HasV4T]>; +} // memb(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, +validSubTargets = HasV4SubT in def STb_GP_V4 : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memb(#$global) = $src", @@ -2109,7 +1415,8 @@ def STb_GP_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memb(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, +isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { def STb_GP_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memb(##$global) = $src2", @@ -2117,7 +1424,6 @@ def STb_GP_cPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memb(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STb_GP_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memb(##$global) = $src2", @@ -2125,7 +1431,6 @@ def STb_GP_cNotPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memb(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STb_GP_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memb(##$global) = $src2", @@ -2133,15 +1438,16 @@ def STb_GP_cdnPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memb(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STb_GP_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memb(##$global) = $src2", []>, Requires<[HasV4T]>; +} // memh(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, +validSubTargets = HasV4SubT in def STh_GP_V4 : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memh(#$global) = $src", @@ -2149,7 +1455,8 @@ def STh_GP_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memh(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, +isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { def STh_GP_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memh(##$global) = $src2", @@ -2157,7 +1464,6 @@ def STh_GP_cPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memh(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STh_GP_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memh(##$global) = $src2", @@ -2165,7 +1471,6 @@ def STh_GP_cNotPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memh(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STh_GP_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memh(##$global) = $src2", @@ -2173,15 +1478,16 @@ def STh_GP_cdnPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memh(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STh_GP_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memh(##$global) = $src2", []>, Requires<[HasV4T]>; +} // memw(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, +validSubTargets = HasV4SubT in def STw_GP_V4 : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memw(#$global) = $src", @@ -2189,7 +1495,8 @@ def STw_GP_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memw(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, +isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { def STw_GP_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memw(##$global) = $src2", @@ -2197,7 +1504,6 @@ def STw_GP_cPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memw(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STw_GP_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memw(##$global) = $src2", @@ -2205,7 +1511,6 @@ def STw_GP_cNotPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (Pv) memw(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STw_GP_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memw(##$global) = $src2", @@ -2213,12 +1518,12 @@ def STw_GP_cdnPt_V4 : STInst2<(outs), Requires<[HasV4T]>; // if (!Pv) memw(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1 in def STw_GP_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memw(##$global) = $src2", []>, Requires<[HasV4T]>; +} // 64 bit atomic store def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), @@ -2277,72 +1582,6 @@ def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, Requires<[HasV4T]>; -def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i64 DoubleRegs:$src1)), - (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memd(#foo + x) -let AddedComplexity = 100 in -def : Pat<(store (i64 DoubleRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memb(#foo + x) -let AddedComplexity = 100 in -def : Pat<(truncstorei8 (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memh(#foo + x) -let AddedComplexity = 100 in -def : Pat<(truncstorei16 (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress + x) -> memw(#foo + x) -let AddedComplexity = 100 in -def : Pat<(store (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - - - //===----------------------------------------------------------------------=== // ST - //===----------------------------------------------------------------------=== @@ -2456,35 +1695,72 @@ mayStore = 1 in { } // memb(Ru<<#u2+#U6)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, +isNVStore = 1, validSubTargets = HasV4SubT in def STrib_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memb($src1<<#$src2+#$src3) = $src4.new", []>, Requires<[HasV4T]>; -// memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in -def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset), - "memb($src2++#$offset) = $src1.new", +//===----------------------------------------------------------------------===// +// Post increment store +// mem[bhwd](Rx++#s4:[0123])=Nt.new +//===----------------------------------------------------------------------===// + +multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp, + bit isNot, bit isPredNew> { + let PNewValue = !if(isPredNew, "new", "") in + def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($src2++#$offset) = $src3.new", [], "$src2 = $dst">, Requires<[HasV4T]>; +} + +multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC, + Operand ImmOp, bit PredNot> { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>; + // Predicate new + let Predicates = [HasV4T], validSubTargets = HasV4SubT in + defm _cdn#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 1>; + } +} + +let hasCtrlDep = 1, isNVStore = 1, neverHasSideEffects = 1 in +multiclass ST_PostInc_nv<string mnemonic, string BaseOp, RegisterClass RC, + Operand ImmOp> { + + let BaseOpcode = "POST_"#BaseOp in { + let isPredicable = 1 in + def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins IntRegs:$src1, ImmOp:$offset, RC:$src2), + mnemonic#"($src1++#$offset) = $src2.new", + [], + "$src1 = $dst">, + Requires<[HasV4T]>; + + let isPredicated = 1 in { + defm Pt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 0 >; + defm NotPt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 1 >; + } + } +} + +let validSubTargets = HasV4SubT in { +defm POST_STbri: ST_PostInc_nv <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel; +defm POST_SThri: ST_PostInc_nv <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel; +defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; +} // memb(Rx++#s4:0:circ(Mu))=Nt.new // memb(Rx++I:circ(Mu))=Nt.new // memb(Rx++Mu)=Nt.new // memb(Rx++Mu:brev)=Nt.new -// memb(gp+#u16:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memb(#$global+$offset) = $src.new", - []>, - Requires<[HasV4T]>; - // memb(#global)=Nt.new let mayStore = 1, neverHasSideEffects = 1 in def STb_GP_nv_V4 : NVInst_V4<(outs), @@ -2493,73 +1769,20 @@ def STb_GP_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new -// if (Pv) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if ($src1) memb($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (Pv.new) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if ($src1.new) memb($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if (!$src1) memb($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv.new) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), - "if (!$src1.new) memb($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - // memh(Ru<<#u2+#U6)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, +isNVStore = 1, validSubTargets = HasV4SubT in def STrih_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memh($src1<<#$src2+#$src3) = $src4.new", []>, Requires<[HasV4T]>; -// memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in -def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset), - "memh($src2++#$offset) = $src1.new", - [], - "$src2 = $dst">, - Requires<[HasV4T]>; - // memh(Rx++#s4:1:circ(Mu))=Nt.new // memh(Rx++I:circ(Mu))=Nt.new // memh(Rx++Mu)=Nt.new // memh(Rx++Mu:brev)=Nt.new -// memh(gp+#u16:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memh(#$global+$offset) = $src.new", - []>, - Requires<[HasV4T]>; - // memh(#global)=Nt.new let mayStore = 1, neverHasSideEffects = 1 in def STh_GP_nv_V4 : NVInst_V4<(outs), @@ -2568,121 +1791,32 @@ def STh_GP_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; - -// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new -// if (Pv) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if ($src1) memh($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (Pv.new) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if ($src1.new) memh($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if (!$src1) memh($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv.new) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), - "if (!$src1.new) memh($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - // memw(Ru<<#u2+#U6)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, +isNVStore = 1, validSubTargets = HasV4SubT in def STriw_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), "memw($src1<<#$src2+#$src3) = $src4.new", []>, Requires<[HasV4T]>; -// memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in -def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset), - "memw($src2++#$offset) = $src1.new", - [], - "$src2 = $dst">, - Requires<[HasV4T]>; - // memw(Rx++#s4:2:circ(Mu))=Nt.new // memw(Rx++I:circ(Mu))=Nt.new // memw(Rx++Mu)=Nt.new // memw(Rx++Mu:brev)=Nt.new // memw(gp+#u16:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memw(#$global+$offset) = $src.new", - []>, - Requires<[HasV4T]>; -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1, +validSubTargets = HasV4SubT in def STw_GP_nv_V4 : NVInst_V4<(outs), (ins globaladdress:$global, IntRegs:$src), "memw(#$global) = $src.new", []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new -// if (Pv) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (Pv.new) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1.new) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - -// if (!Pv.new) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1, - isPredicated = 1 in -def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1.new) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, - Requires<[HasV4T]>; - - - // if (Pv) memb(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1, +isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { def STb_GP_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memb(##$global) = $src2.new", @@ -2690,7 +1824,6 @@ def STb_GP_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memb(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memb(##$global) = $src2.new", @@ -2698,7 +1831,6 @@ def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memb(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memb(##$global) = $src2.new", @@ -2706,7 +1838,6 @@ def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memb(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memb(##$global) = $src2.new", @@ -2714,7 +1845,6 @@ def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memh(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STh_GP_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memh(##$global) = $src2.new", @@ -2722,7 +1852,6 @@ def STh_GP_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memh(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memh(##$global) = $src2.new", @@ -2730,7 +1859,6 @@ def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memh(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memh(##$global) = $src2.new", @@ -2738,7 +1866,6 @@ def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memh(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memh(##$global) = $src2.new", @@ -2746,7 +1873,6 @@ def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memw(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STw_GP_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1) memw(##$global) = $src2.new", @@ -2754,7 +1880,6 @@ def STw_GP_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memw(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1) memw(##$global) = $src2.new", @@ -2762,7 +1887,6 @@ def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memw(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if ($src1.new) memw(##$global) = $src2.new", @@ -2770,108 +1894,12 @@ def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memw(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1 in def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), "if (!$src1.new) memw(##$global) = $src2.new", []>, Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memb(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memb(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memb(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memb(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memh(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memh(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memh(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memh(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1) memw(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1) memw(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if ($src1.new) memw(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; - -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, - IntRegs:$src2), - "if (!$src1.new) memw(##$global+$offset) = $src2.new", - []>, - Requires<[HasV4T]>; +} //===----------------------------------------------------------------------===// // NV/ST - @@ -3061,31 +2089,37 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { // Add and accumulate. // Rd=add(Rs,add(Ru,#s6)) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 6, +validSubTargets = HasV4SubT in def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3), + (ins IntRegs:$src1, IntRegs:$src2, s6Ext:$src3), "$dst = add($src1, add($src2, #$src3))", [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), - s6ImmPred:$src3)))]>, + s6_16ExtPred:$src3)))]>, Requires<[HasV4T]>; // Rd=add(Rs,sub(#s6,Ru)) +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6, +validSubTargets = HasV4SubT in def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3), "$dst = add($src1, sub(#$src2, $src3))", [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (sub s6ImmPred:$src2, + (add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2, (i32 IntRegs:$src3))))]>, Requires<[HasV4T]>; // Generates the same instruction as ADDr_SUBri_V4 but matches different // pattern. // Rd=add(Rs,sub(#s6,Ru)) +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6, +validSubTargets = HasV4SubT in def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3), "$dst = add($src1, sub(#$src2, $src3))", [(set (i32 IntRegs:$dst), - (sub (add (i32 IntRegs:$src1), s6ImmPred:$src2), + (sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2), (i32 IntRegs:$src3)))]>, Requires<[HasV4T]>; @@ -3099,6 +2133,7 @@ def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), // Logical doublewords. // Rdd=and(Rtt,~Rss) +let validSubTargets = HasV4SubT in def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = and($src1, ~$src2)", @@ -3107,6 +2142,7 @@ def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // Rdd=or(Rtt,~Rss) +let validSubTargets = HasV4SubT in def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = or($src1, ~$src2)", @@ -3117,6 +2153,7 @@ def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), // Logical-logical doublewords. // Rxx^=xor(Rss,Rtt) +let validSubTargets = HasV4SubT in def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), "$dst ^= xor($src2, $src3)", @@ -3129,17 +2166,20 @@ def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst), // Logical-logical words. // Rx=or(Ru,and(Rx,#s10)) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, +validSubTargets = HasV4SubT in def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), "$dst = or($src1, and($src2, #$src3))", [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - s10ImmPred:$src3)))], + s10ExtPred:$src3)))], "$src2 = $dst">, Requires<[HasV4T]>; // Rx[&|^]=and(Rs,Rt) // Rx&=and(Rs,Rt) +let validSubTargets = HasV4SubT in def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= and($src2, $src3)", @@ -3150,6 +2190,7 @@ def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx|=and(Rs,Rt) +let validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "reg" in def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= and($src2, $src3)", @@ -3157,9 +2198,10 @@ def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), (i32 IntRegs:$src3))))], "$src1 = $dst">, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Rx^=and(Rs,Rt) +let validSubTargets = HasV4SubT in def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= and($src2, $src3)", @@ -3171,6 +2213,7 @@ def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), // Rx[&|^]=and(Rs,~Rt) // Rx&=and(Rs,~Rt) +let validSubTargets = HasV4SubT in def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= and($src2, ~$src3)", @@ -3181,6 +2224,7 @@ def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx|=and(Rs,~Rt) +let validSubTargets = HasV4SubT in def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= and($src2, ~$src3)", @@ -3191,6 +2235,7 @@ def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx^=and(Rs,~Rt) +let validSubTargets = HasV4SubT in def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= and($src2, ~$src3)", @@ -3202,6 +2247,7 @@ def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), // Rx[&|^]=or(Rs,Rt) // Rx&=or(Rs,Rt) +let validSubTargets = HasV4SubT in def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= or($src2, $src3)", @@ -3212,6 +2258,7 @@ def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx|=or(Rs,Rt) +let validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "reg" in def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= or($src2, $src3)", @@ -3219,9 +2266,10 @@ def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), (i32 IntRegs:$src3))))], "$src1 = $dst">, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Rx^=or(Rs,Rt) +let validSubTargets = HasV4SubT in def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= or($src2, $src3)", @@ -3233,6 +2281,7 @@ def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), // Rx[&|^]=xor(Rs,Rt) // Rx&=xor(Rs,Rt) +let validSubTargets = HasV4SubT in def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= xor($src2, $src3)", @@ -3243,6 +2292,7 @@ def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx|=xor(Rs,Rt) +let validSubTargets = HasV4SubT in def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= xor($src2, $src3)", @@ -3253,6 +2303,7 @@ def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx^=xor(Rs,Rt) +let validSubTargets = HasV4SubT in def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= xor($src2, $src3)", @@ -3263,24 +2314,28 @@ def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rx|=and(Rs,#s10) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, +validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "imm" in def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), "$dst |= and($src2, #$src3)", [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - s10ImmPred:$src3)))], + s10ExtPred:$src3)))], "$src1 = $dst">, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Rx|=or(Rs,#s10) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10, +validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "imm" in def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3), "$dst |= or($src2, #$src3)", [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), - s10ImmPred:$src3)))], + s10ExtPred:$src3)))], "$src1 = $dst">, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Modulo wrap @@ -3327,25 +2382,41 @@ def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), // Multiply and user lower result. // Rd=add(#u6,mpyi(Rs,#U6)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, +validSubTargets = HasV4SubT in def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst), - (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3), + (ins u6Ext:$src1, IntRegs:$src2, u6Imm:$src3), "$dst = add(#$src1, mpyi($src2, #$src3))", [(set (i32 IntRegs:$dst), (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), - u6ImmPred:$src1))]>, + u6ExtPred:$src1))]>, Requires<[HasV4T]>; -// Rd=add(#u6,mpyi(Rs,Rt)) +// Rd=add(##,mpyi(Rs,#U6)) +def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), + (HexagonCONST32 tglobaladdr:$src1)), + (i32 (ADDi_MPYri_V4 tglobaladdr:$src1, IntRegs:$src2, + u6ImmPred:$src3))>; +// Rd=add(#u6,mpyi(Rs,Rt)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, +validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst), - (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3), + (ins u6Ext:$src1, IntRegs:$src2, IntRegs:$src3), "$dst = add(#$src1, mpyi($src2, $src3))", [(set (i32 IntRegs:$dst), (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), - u6ImmPred:$src1))]>, - Requires<[HasV4T]>; + u6ExtPred:$src1))]>, + Requires<[HasV4T]>, ImmRegRel; + +// Rd=add(##,mpyi(Rs,Rt)) +def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), + (HexagonCONST32 tglobaladdr:$src1)), + (i32 (ADDi_MPYrr_V4 tglobaladdr:$src1, IntRegs:$src2, + IntRegs:$src3))>; // Rd=add(Ru,mpyi(#u6:2,Rs)) +let validSubTargets = HasV4SubT in def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3), "$dst = add($src1, mpyi(#$src2, $src3))", @@ -3355,15 +2426,18 @@ def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // Rd=add(Ru,mpyi(Rs,#u6)) +let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 6, +validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3), + (ins IntRegs:$src1, IntRegs:$src2, u6Ext:$src3), "$dst = add($src1, mpyi($src2, #$src3))", [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), - u6ImmPred:$src3)))]>, - Requires<[HasV4T]>; + u6ExtPred:$src3)))]>, + Requires<[HasV4T]>, ImmRegRel; // Rx=add(Ru,mpyi(Rx,Rs)) +let validSubTargets = HasV4SubT, InputType = "reg", CextOpcode = "ADD_MPY" in def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst = add($src1, mpyi($src2, $src3))", @@ -3371,7 +2445,7 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3))))], "$src2 = $dst">, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Polynomial multiply words @@ -3414,92 +2488,107 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), // Shift by immediate and accumulate. // Rx=add(#u8,asl(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = add(#$src1, asl($src2, #$src3))", [(set (i32 IntRegs:$dst), (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; // Rx=add(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = add(#$src1, lsr($src2, #$src3))", [(set (i32 IntRegs:$dst), (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; // Rx=sub(#u8,asl(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = sub(#$src1, asl($src2, #$src3))", [(set (i32 IntRegs:$dst), (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; // Rx=sub(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = sub(#$src1, lsr($src2, #$src3))", [(set (i32 IntRegs:$dst), (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Shift by immediate and logical. //Rx=and(#u8,asl(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = and(#$src1, asl($src2, #$src3))", [(set (i32 IntRegs:$dst), (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Rx=and(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +validSubTargets = HasV4SubT in def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = and(#$src1, lsr($src2, #$src3))", [(set (i32 IntRegs:$dst), (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Rx=or(#u8,asl(Rx,#U5)) -let AddedComplexity = 30 in +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +AddedComplexity = 30, validSubTargets = HasV4SubT in def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = or(#$src1, asl($src2, #$src3))", [(set (i32 IntRegs:$dst), (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Rx=or(#u8,lsr(Rx,#U5)) -let AddedComplexity = 30 in +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, +AddedComplexity = 30, validSubTargets = HasV4SubT in def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), - (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = or(#$src1, lsr($src2, #$src3))", [(set (i32 IntRegs:$dst), (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3), - u8ImmPred:$src1))], + u8ExtPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Shift by register. //Rd=lsl(#s6,Rt) +let validSubTargets = HasV4SubT in { def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2), "$dst = lsl(#$src1, $src2)", [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1, @@ -3547,7 +2636,7 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; - +} //===----------------------------------------------------------------------===// // XTYPE/SHIFT - @@ -3981,7 +3070,61 @@ def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs), // incorrect code for negative numbers. // Pd=cmpb.eq(Rs,#u8) -let isCompare = 1 in +// p=!cmp.eq(r1,r2) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !cmp.eq($src1, $src2)", + [(set (i1 PredRegs:$dst), + (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +// p=!cmp.eq(r1,#s10) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotEQ_ri : ALU32_ri<(outs PredRegs:$dst), + (ins IntRegs:$src1, s10Ext:$src2), + "$dst = !cmp.eq($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (setne (i32 IntRegs:$src1), s10ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// p=!cmp.gt(r1,r2) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !cmp.gt($src1, $src2)", + [(set (i1 PredRegs:$dst), + (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>, + Requires<[HasV4T]>; + +// p=!cmp.gt(r1,#s10) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGT_ri : ALU32_ri<(outs PredRegs:$dst), + (ins IntRegs:$src1, s10Ext:$src2), + "$dst = !cmp.gt($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (not (setgt (i32 IntRegs:$src1), s10ImmPred:$src2)))]>, + Requires<[HasV4T]>; + +// p=!cmp.gtu(r1,r2) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !cmp.gtu($src1, $src2)", + [(set (i1 PredRegs:$dst), + (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>, + Requires<[HasV4T]>; + +// p=!cmp.gtu(r1,#u9) +let isCompare = 1, validSubTargets = HasV4SubT in +def CMPnotGTU_ri : ALU32_ri<(outs PredRegs:$dst), + (ins IntRegs:$src1, u9Ext:$src2), + "$dst = !cmp.gtu($src1, #$src2)", + [(set (i1 PredRegs:$dst), + (not (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)))]>, + Requires<[HasV4T]>; + +let isCompare = 1, validSubTargets = HasV4SubT in def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), "$dst = cmpb.eq($src1, #$src2)", @@ -3989,8 +3132,14 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>, Requires<[HasV4T]>; +def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)), + bb:$offset), + (JMP_cNot (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2), + bb:$offset)>, + Requires<[HasV4T]>; + // Pd=cmpb.eq(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.eq($src1, $src2)", @@ -4000,7 +3149,7 @@ def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), Requires<[HasV4T]>; // Pd=cmpb.eq(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.eq($src1, $src2)", @@ -4010,7 +3159,7 @@ def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst), Requires<[HasV4T]>; // Pd=cmpb.gt(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.gt($src1, $src2)", @@ -4020,29 +3169,237 @@ def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst), Requires<[HasV4T]>; // Pd=cmpb.gtu(Rs,#u7) -let isCompare = 1 in +let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7, +isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u7Imm:$src2), + (ins IntRegs:$src1, u7Ext:$src2), "$dst = cmpb.gtu($src1, #$src2)", [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), - u7ImmPred:$src2))]>, - Requires<[HasV4T]>; + u7ExtPred:$src2))]>, + Requires<[HasV4T]>, ImmRegRel; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_BYTE : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformU7ToU7M1Imm(imm); +}]>; + +// For the sequence +// zext( seteq ( and(Rs, 255), u8)) +// Generate +// Pd=cmpb.eq(Rs, #u8) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setne ( and(Rs, 255), u8)) +// Generate +// Pd=cmpb.eq(Rs, #u8) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( seteq (Rs, and(Rt, 255))) +// Generate +// Pd=cmpb.eq(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt), + (i32 (and (i32 IntRegs:$Rs), 255)))))), + (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setne (Rs, and(Rt, 255))) +// Generate +// Pd=cmpb.eq(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt), + (i32 (and (i32 IntRegs:$Rs), 255)))))), + (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setugt ( and(Rs, 255), u8)) +// Generate +// Pd=cmpb.gtu(Rs, #u8) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setugt ( and(Rs, 254), u8)) +// Generate +// Pd=cmpb.gtu(Rs, #u8) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)), + u8ExtPred:$u8)))), + (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs), + (u8ExtPred:$u8))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setult ( Rs, Rt)) +// Generate +// Pd=cmp.ltu(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) +def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setlt ( Rs, Rt)) +// Generate +// Pd=cmp.lt(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) +def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 1, 0))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setugt ( Rs, Rt)) +// Generate +// Pd=cmp.gtu(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 +def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 1, 0))>, + Requires<[HasV4T]>; + +// This pattern interefers with coremark performance, not implementing at this +// time. +// For the sequence +// zext( setgt ( Rs, Rt)) +// Generate +// Pd=cmp.gt(Rs, Rt) +// if (Pd.new) Rd=#1 +// if (!Pd.new) Rd=#0 + +// For the sequence +// zext( setuge ( Rs, Rt)) +// Generate +// Pd=cmp.ltu(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs) +def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setge ( Rs, Rt)) +// Generate +// Pd=cmp.lt(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs) +def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt), + (i32 IntRegs:$Rs))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setule ( Rs, Rt)) +// Generate +// Pd=cmp.gtu(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setle ( Rs, Rt)) +// Generate +// Pd=cmp.gt(Rs, Rt) +// if (Pd.new) Rd=#0 +// if (!Pd.new) Rd=#1 +def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))), + (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rs), + (i32 IntRegs:$Rt))), + 0, 1))>, + Requires<[HasV4T]>; + +// For the sequence +// zext( setult ( and(Rs, 255), u8)) +// Use the isdigit transformation below + +// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)' +// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. +// The isdigit transformation relies on two 'clever' aspects: +// 1) The data type is unsigned which allows us to eliminate a zero test after +// biasing the expression by 48. We are depending on the representation of +// the unsigned types, and semantics. +// 2) The front end has converted <= 9 into < 10 on entry to LLVM +// +// For the C code: +// retval = ((c>='0') & (c<='9')) ? 1 : 0; +// The code is transformed upstream of llvm into +// retval = (c-48) < 10 ? 1 : 0; +let AddedComplexity = 139 in +def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), + u7StrictPosImmPred:$src2)))), + (i32 (MUX_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1), + (DEC_CONST_BYTE u7StrictPosImmPred:$src2))), + 0, 1))>, + Requires<[HasV4T]>; // Pd=cmpb.gtu(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", +InputType = "reg" in def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.gtu($src1, $src2)", [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), (and (i32 IntRegs:$src2), 255)))]>, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Following instruction is not being extended as it results into the incorrect // code for negative numbers. // Signed half compare(.eq) ri. // Pd=cmph.eq(Rs,#s8) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPhEQri_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), "$dst = cmph.eq($src1, #$src2)", @@ -4056,7 +3413,7 @@ def CMPhEQri_V4 : MInst<(outs PredRegs:$dst), // r0=and(r0,#0xffff) // p0=cmp.eq(r0,#0) // Pd=cmph.eq(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.eq($src1, $src2)", @@ -4071,7 +3428,7 @@ def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst), // r1=asl(r1,16) // p0=cmp.eq(r0,r1) // Pd=cmph.eq(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.eq($src1, $src2)", @@ -4085,19 +3442,20 @@ used in the cmph.gt instruction. // Signed half compare(.gt) ri. // Pd=cmph.gt(Rs,#s8) -let isCompare = 1 in +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8, +isCompare = 1, validSubTargets = HasV4SubT in def CMPhGTri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, s8Imm:$src2), + (ins IntRegs:$src1, s8Ext:$src2), "$dst = cmph.gt($src1, #$src2)", [(set (i1 PredRegs:$dst), (setgt (shl (i32 IntRegs:$src1), (i32 16)), - s8ImmPred:$src2))]>, + s8ExtPred:$src2))]>, Requires<[HasV4T]>; */ // Signed half compare(.gt) rr. // Pd=cmph.gt(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT in def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.gt($src1, $src2)", @@ -4108,24 +3466,41 @@ def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst), // Unsigned half compare rr (.gtu). // Pd=cmph.gtu(Rs,Rt) -let isCompare = 1 in +let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU", +InputType = "reg" in def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.gtu($src1, $src2)", [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535), (and (i32 IntRegs:$src2), 65535)))]>, - Requires<[HasV4T]>; + Requires<[HasV4T]>, ImmRegRel; // Unsigned half compare ri (.gtu). // Pd=cmph.gtu(Rs,#u7) -let isCompare = 1 in +let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7, +isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU", +InputType = "imm" in def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u7Imm:$src2), + (ins IntRegs:$src1, u7Ext:$src2), "$dst = cmph.gtu($src1, #$src2)", [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535), - u7ImmPred:$src2))]>, - Requires<[HasV4T]>; + u7ExtPred:$src2))]>, + Requires<[HasV4T]>, ImmRegRel; + +let validSubTargets = HasV4SubT in +def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = !tstbit($src1, $src2)", + [(set (i1 PredRegs:$dst), + (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>, + Requires<[HasV4T]>; + +let validSubTargets = HasV4SubT in +def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = !tstbit($src1, $src2)", + [(set (i1 PredRegs:$dst), + (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>, + Requires<[HasV4T]>; //===----------------------------------------------------------------------===// // XTYPE/PRED - @@ -4237,227 +3612,156 @@ let isReturn = 1, isTerminator = 1, Requires<[HasV4T]>; } - // Load/Store with absolute addressing mode // memw(#u6)=Rt -multiclass ST_abs<string OpcStr> { - let isPredicable = 1 in - def _abs_V4 : STInst2<(outs), - (ins globaladdress:$absaddr, IntRegs:$src), - !strconcat(OpcStr, "(##$absaddr) = $src"), - []>, - Requires<[HasV4T]>; - - let isPredicated = 1 in - def _abs_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if ($src1)", - !strconcat(OpcStr, "(##$absaddr) = $src2")), - []>, - Requires<[HasV4T]>; - - let isPredicated = 1 in - def _abs_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if (!$src1)", - !strconcat(OpcStr, "(##$absaddr) = $src2")), +multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let PNewValue = !if(isPredNew, "new", "") in + def NAME#_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"(##$absaddr) = $src2", []>, Requires<[HasV4T]>; +} - let isPredicated = 1 in - def _abs_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if ($src1.new)", - !strconcat(OpcStr, "(##$absaddr) = $src2")), - []>, - Requires<[HasV4T]>; +multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>; + } +} - let isPredicated = 1 in - def _abs_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if (!$src1.new)", - !strconcat(OpcStr, "(##$absaddr) = $src2")), +let isNVStorable = 1, isExtended = 1, neverHasSideEffects = 1 in +multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 0, isPredicable = 1 in + def NAME#_V4 : STInst2<(outs), + (ins globaladdressExt:$absaddr, RC:$src), + mnemonic#"(##$absaddr) = $src", []>, Requires<[HasV4T]>; - def _abs_nv_V4 : STInst2<(outs), - (ins globaladdress:$absaddr, IntRegs:$src), - !strconcat(OpcStr, "(##$absaddr) = $src.new"), - []>, - Requires<[HasV4T]>; + let opExtendable = 1, isPredicated = 1 in { + defm Pt : ST_Abs_Pred<mnemonic, RC, 0>; + defm NotPt : ST_Abs_Pred<mnemonic, RC, 1>; + } + } +} - let isPredicated = 1 in - def _abs_cPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if ($src1)", - !strconcat(OpcStr, "(##$absaddr) = $src2.new")), +multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let PNewValue = !if(isPredNew, "new", "") in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"(##$absaddr) = $src2.new", []>, Requires<[HasV4T]>; +} - let isPredicated = 1 in - def _abs_cNotPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if (!$src1)", - !strconcat(OpcStr, "(##$absaddr) = $src2.new")), - []>, - Requires<[HasV4T]>; +multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>; + } +} - let isPredicated = 1 in - def _abs_cdnPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if ($src1.new)", - !strconcat(OpcStr, "(##$absaddr) = $src2.new")), +let mayStore = 1, isNVStore = 1, isExtended = 1, neverHasSideEffects = 1 in +multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 0, isPredicable = 1 in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins globaladdressExt:$absaddr, RC:$src), + mnemonic#"(##$absaddr) = $src.new", []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cdnNotPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), - !strconcat("if (!$src1.new)", - !strconcat(OpcStr, "(##$absaddr) = $src2.new")), - []>, - Requires<[HasV4T]>; + let opExtendable = 1, isPredicated = 1 in { + defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>; + defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>; + } + } } -let AddedComplexity = 30, isPredicable = 1 in -def STrid_abs_V4 : STInst<(outs), - (ins globaladdress:$absaddr, DoubleRegs:$src), - "memd(##$absaddr) = $src", - [(store (i64 DoubleRegs:$src), - (HexagonCONST32 tglobaladdr:$absaddr))]>, - Requires<[HasV4T]>; +let addrMode = Absolute in { + defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>, + ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel; -let AddedComplexity = 30, isPredicated = 1 in -def STrid_abs_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), - "if ($src1) memd(##$absaddr) = $src2", - []>, - Requires<[HasV4T]>; + defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>, + ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel; -let AddedComplexity = 30, isPredicated = 1 in -def STrid_abs_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), - "if (!$src1) memd(##$absaddr) = $src2", - []>, - Requires<[HasV4T]>; + defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>, + ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel; -let AddedComplexity = 30, isPredicated = 1 in -def STrid_abs_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), - "if ($src1.new) memd(##$absaddr) = $src2", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def STrid_abs_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), - "if (!$src1.new) memd(##$absaddr) = $src2", - []>, - Requires<[HasV4T]>; - -defm STrib : ST_abs<"memb">; -defm STrih : ST_abs<"memh">; -defm STriw : ST_abs<"memw">; + let isNVStorable = 0 in + defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel; +} -let Predicates = [HasV4T], AddedComplexity = 30 in +let Predicates = [HasV4T], AddedComplexity = 30 in { def : Pat<(truncstorei8 (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)), (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; -let Predicates = [HasV4T], AddedComplexity = 30 in def : Pat<(truncstorei16 (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)), (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; -let Predicates = [HasV4T], AddedComplexity = 30 in def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)), (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; +def : Pat<(store (i64 DoubleRegs:$src1), + (HexagonCONST32 tglobaladdr:$absaddr)), + (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>; +} -multiclass LD_abs<string OpcStr> { - let isPredicable = 1 in - def _abs_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$absaddr), - !strconcat("$dst = ", !strconcat(OpcStr, "(##$absaddr)")), - []>, - Requires<[HasV4T]>; - - let isPredicated = 1 in - def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - !strconcat("if ($src1) $dst = ", - !strconcat(OpcStr, "(##$absaddr)")), +multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let PNewValue = !if(isPredNew, "new", "") in + def NAME : LDInst2<(outs RC:$dst), + (ins PredRegs:$src1, globaladdressExt:$absaddr), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#"$dst = "#mnemonic#"(##$absaddr)", []>, Requires<[HasV4T]>; +} - let isPredicated = 1 in - def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - !strconcat("if (!$src1) $dst = ", - !strconcat(OpcStr, "(##$absaddr)")), - []>, - Requires<[HasV4T]>; +multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>; + // Predicate new + defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>; + } +} - let isPredicated = 1 in - def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - !strconcat("if ($src1.new) $dst = ", - !strconcat(OpcStr, "(##$absaddr)")), +let isExtended = 1, neverHasSideEffects = 1 in +multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { + let opExtendable = 1, isPredicable = 1 in + def NAME#_V4 : LDInst2<(outs RC:$dst), + (ins globaladdressExt:$absaddr), + "$dst = "#mnemonic#"(##$absaddr)", []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - !strconcat("if (!$src1.new) $dst = ", - !strconcat(OpcStr, "(##$absaddr)")), - []>, - Requires<[HasV4T]>; + let opExtendable = 2, isPredicated = 1 in { + defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>; + defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>; + } + } } -let AddedComplexity = 30 in -def LDrid_abs_V4 : LDInst<(outs DoubleRegs:$dst), - (ins globaladdress:$absaddr), - "$dst = memd(##$absaddr)", - [(set (i64 DoubleRegs:$dst), - (load (HexagonCONST32 tglobaladdr:$absaddr)))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def LDrid_abs_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - "if ($src1) $dst = memd(##$absaddr)", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def LDrid_abs_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - "if (!$src1) $dst = memd(##$absaddr)", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def LDrid_abs_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - "if ($src1.new) $dst = memd(##$absaddr)", - []>, - Requires<[HasV4T]>; - -let AddedComplexity = 30, isPredicated = 1 in -def LDrid_abs_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$absaddr), - "if (!$src1.new) $dst = memd(##$absaddr)", - []>, - Requires<[HasV4T]>; - -defm LDrib : LD_abs<"memb">; -defm LDriub : LD_abs<"memub">; -defm LDrih : LD_abs<"memh">; -defm LDriuh : LD_abs<"memuh">; -defm LDriw : LD_abs<"memw">; - +let addrMode = Absolute in { + defm LDrib_abs : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel; + defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel; + defm LDrih_abs : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel; + defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel; + defm LDriw_abs : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel; + defm LDrid_abs : LD_Abs<"memd", "LDrid", DoubleRegs>, AddrModeRel; +} let Predicates = [HasV4T], AddedComplexity = 30 in def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))), @@ -4577,172 +3881,167 @@ defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>; defm STriw_ind : ST_indirect_lo<"memw", store>; // Store - absolute addressing mode: These instruction take constant -// value as the extended operand +// value as the extended operand. multiclass ST_absimm<string OpcStr> { - let isPredicable = 1 in +let isExtended = 1, opExtendable = 0, isPredicable = 1, +validSubTargets = HasV4SubT in def _abs_V4 : STInst2<(outs), - (ins u6Imm:$src1, IntRegs:$src2), - !strconcat(OpcStr, "(#$src1) = $src2"), + (ins u0AlwaysExt:$src1, IntRegs:$src2), + !strconcat(OpcStr, "(##$src1) = $src2"), []>, Requires<[HasV4T]>; - let isPredicated = 1 in +let isExtended = 1, opExtendable = 1, isPredicated = 1, +validSubTargets = HasV4SubT in { def _abs_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), - !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3")), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), + !strconcat("if ($src1)", !strconcat(OpcStr, "(##$src2) = $src3")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), - !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3")), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), + !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$src2) = $src3")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if ($src1.new)", - !strconcat(OpcStr, "(#$src2) = $src3")), + !strconcat(OpcStr, "(##$src2) = $src3")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if (!$src1.new)", - !strconcat(OpcStr, "(#$src2) = $src3")), + !strconcat(OpcStr, "(##$src2) = $src3")), []>, Requires<[HasV4T]>; +} - def _abs_nv_V4 : STInst2<(outs), - (ins u6Imm:$src1, IntRegs:$src2), - !strconcat(OpcStr, "(#$src1) = $src2.new"), +let isExtended = 1, opExtendable = 0, mayStore = 1, isNVStore = 1, +validSubTargets = HasV4SubT in + def _abs_nv_V4 : NVInst_V4<(outs), + (ins u0AlwaysExt:$src1, IntRegs:$src2), + !strconcat(OpcStr, "(##$src1) = $src2.new"), []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), +let isExtended = 1, opExtendable = 1, mayStore = 1, isPredicated = 1, +isNVStore = 1, validSubTargets = HasV4SubT in { + def _abs_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if ($src1)", - !strconcat(OpcStr, "(#$src2) = $src3.new")), + !strconcat(OpcStr, "(##$src2) = $src3.new")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cNotPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + def _abs_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if (!$src1)", - !strconcat(OpcStr, "(#$src2) = $src3.new")), + !strconcat(OpcStr, "(##$src2) = $src3.new")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cdnPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + def _abs_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if ($src1.new)", - !strconcat(OpcStr, "(#$src2) = $src3.new")), + !strconcat(OpcStr, "(##$src2) = $src3.new")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in - def _abs_cdnNotPt_nv_V4 : STInst2<(outs), - (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + def _abs_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), !strconcat("if (!$src1.new)", - !strconcat(OpcStr, "(#$src2) = $src3.new")), + !strconcat(OpcStr, "(##$src2) = $src3.new")), []>, Requires<[HasV4T]>; } +} defm STrib_imm : ST_absimm<"memb">; defm STrih_imm : ST_absimm<"memh">; defm STriw_imm : ST_absimm<"memw">; -let Predicates = [HasV4T], AddedComplexity = 30 in -def : Pat<(truncstorei8 (i32 IntRegs:$src1), u6ImmPred:$src2), - (STrib_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>; +let Predicates = [HasV4T], AddedComplexity = 30 in { +def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), + (STrib_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; -let Predicates = [HasV4T], AddedComplexity = 30 in -def : Pat<(truncstorei16 (i32 IntRegs:$src1), u6ImmPred:$src2), - (STrih_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>; - -let Predicates = [HasV4T], AddedComplexity = 30 in -def : Pat<(store (i32 IntRegs:$src1), u6ImmPred:$src2), - (STriw_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>; +def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), + (STrih_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; +def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), + (STriw_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; +} // Load - absolute addressing mode: These instruction take constant // value as the extended operand multiclass LD_absimm<string OpcStr> { - let isPredicable = 1 in +let isExtended = 1, opExtendable = 1, isPredicable = 1, +validSubTargets = HasV4SubT in def _abs_V4 : LDInst2<(outs IntRegs:$dst), - (ins u6Imm:$src), + (ins u0AlwaysExt:$src), !strconcat("$dst = ", - !strconcat(OpcStr, "(#$src)")), + !strconcat(OpcStr, "(##$src)")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in +let isExtended = 1, opExtendable = 2, isPredicated = 1, +validSubTargets = HasV4SubT in { def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u6Imm:$src2), + (ins PredRegs:$src1, u0AlwaysExt:$src2), !strconcat("if ($src1) $dst = ", - !strconcat(OpcStr, "(#$src2)")), + !strconcat(OpcStr, "(##$src2)")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u6Imm:$src2), + (ins PredRegs:$src1, u0AlwaysExt:$src2), !strconcat("if (!$src1) $dst = ", - !strconcat(OpcStr, "(#$src2)")), + !strconcat(OpcStr, "(##$src2)")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u6Imm:$src2), + (ins PredRegs:$src1, u0AlwaysExt:$src2), !strconcat("if ($src1.new) $dst = ", - !strconcat(OpcStr, "(#$src2)")), + !strconcat(OpcStr, "(##$src2)")), []>, Requires<[HasV4T]>; - let isPredicated = 1 in def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u6Imm:$src2), + (ins PredRegs:$src1, u0AlwaysExt:$src2), !strconcat("if (!$src1.new) $dst = ", - !strconcat(OpcStr, "(#$src2)")), + !strconcat(OpcStr, "(##$src2)")), []>, Requires<[HasV4T]>; } +} -defm LDrib_imm : LD_absimm<"memb">; +defm LDrib_imm : LD_absimm<"memb">; defm LDriub_imm : LD_absimm<"memub">; -defm LDrih_imm : LD_absimm<"memh">; +defm LDrih_imm : LD_absimm<"memh">; defm LDriuh_imm : LD_absimm<"memuh">; -defm LDriw_imm : LD_absimm<"memw">; +defm LDriw_imm : LD_absimm<"memw">; -let Predicates = [HasV4T], AddedComplexity = 30 in -def : Pat<(i32 (load u6ImmPred:$src)), - (LDriw_imm_abs_V4 u6ImmPred:$src)>; +let Predicates = [HasV4T], AddedComplexity = 30 in { +def : Pat<(i32 (load u0AlwaysExtPred:$src)), + (LDriw_imm_abs_V4 u0AlwaysExtPred:$src)>; -let Predicates = [HasV4T], AddedComplexity=30 in -def : Pat<(i32 (sextloadi8 u6ImmPred:$src)), - (LDrib_imm_abs_V4 u6ImmPred:$src)>; +def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)), + (LDrib_imm_abs_V4 u0AlwaysExtPred:$src)>; -let Predicates = [HasV4T], AddedComplexity=30 in -def : Pat<(i32 (zextloadi8 u6ImmPred:$src)), - (LDriub_imm_abs_V4 u6ImmPred:$src)>; +def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)), + (LDriub_imm_abs_V4 u0AlwaysExtPred:$src)>; -let Predicates = [HasV4T], AddedComplexity=30 in -def : Pat<(i32 (sextloadi16 u6ImmPred:$src)), - (LDrih_imm_abs_V4 u6ImmPred:$src)>; - -let Predicates = [HasV4T], AddedComplexity=30 in -def : Pat<(i32 (zextloadi16 u6ImmPred:$src)), - (LDriuh_imm_abs_V4 u6ImmPred:$src)>; +def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)), + (LDrih_imm_abs_V4 u0AlwaysExtPred:$src)>; +def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)), + (LDriuh_imm_abs_V4 u0AlwaysExtPred:$src)>; +} // Indexed store double word - global address. // memw(Rs+#u6:2)=#S8 @@ -4764,3 +4063,109 @@ def STrih_offset_ext_V4 : STInst<(outs), [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3), (add IntRegs:$src1, u6_1ImmPred:$src2))]>, Requires<[HasV4T]>; +// Map from store(globaladdress + x) -> memd(#foo + x) +let AddedComplexity = 100 in +def : Pat<(store (i64 DoubleRegs:$src1), + FoldGlobalAddrGP:$addr), + (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr, + (i64 DoubleRegs:$src1)), + (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), + (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), + (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memh(#foo + x) +let AddedComplexity = 100 in +def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), + (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), + (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memw(#foo + x) +let AddedComplexity = 100 in +def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr), + (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)), + (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memd(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i64 (load FoldGlobalAddrGP:$addr)), + (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr), + (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)), + (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)), + (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +//let AddedComplexity = 100 in +let AddedComplexity = 100 in +def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)), + (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memh(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)), + (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memuh(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)), + (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr), + (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memub(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)), + (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr), + (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memw(#foo + x) +let AddedComplexity = 100 in +def : Pat<(i32 (load FoldGlobalAddrGP:$addr)), + (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr), + (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>, + Requires<[HasV4T]>; + diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index db36ac0..f011d51 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -15,6 +15,7 @@ #include "Hexagon.h" #include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" +#include "MCTargetDesc/HexagonMCInst.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/MC/MCExpr.h" @@ -38,9 +39,10 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, } // Create an MCInst from a MachineInstr -void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI, +void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI, HexagonAsmPrinter& AP) { MCI.setOpcode(MI->getOpcode()); + MCI.setDesc(MI->getDesc()); for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { const MachineOperand &MO = MI->getOperand(i); diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index aef6830..ced17b3 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -152,6 +152,12 @@ void VLIWMachineScheduler::schedule() { // Postprocess the DAG to add platform specific artificial dependencies. postprocessDAG(); + SmallVector<SUnit*, 8> TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + SchedImpl->initialize(this); + // To view Height/Depth correctly, they should be accessed at least once. DEBUG(unsigned maxH = 0; for (unsigned su = 0, e = SUnits.size(); su != e; ++su) @@ -166,7 +172,7 @@ void VLIWMachineScheduler::schedule() { DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); - initQueues(); + initQueues(TopRoots, BotRoots); bool IsTopNode = false; while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { @@ -186,6 +192,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { DAG = static_cast<VLIWMachineScheduler*>(dag); SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; + Top.init(DAG, SchedModel); Bot.init(DAG, SchedModel); @@ -193,6 +200,8 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); const TargetMachine &TM = DAG->MF.getTarget(); + delete Top.HazardRec; + delete Bot.HazardRec; Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); @@ -677,4 +686,3 @@ void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { Bot.bumpNode(SU); } } - diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index d1882de..f947dfc 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -117,37 +117,15 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { "architecture version"); } -void HexagonRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - MachineInstr &MI = *I; - - if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) { - // Hexagon_TODO: add code - } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) { - // Hexagon_TODO: add code - } else { - llvm_unreachable("Cannot handle this call frame pseudo instruction"); - } - MBB.erase(I); -} - void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { - + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { // // Hexagon_TODO: Do we need to enforce this for Hexagon? assert(SPAdj == 0 && "Unexpected"); - - unsigned i = 0; MachineInstr &MI = *II; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Addressable stack objects are accessed using neg. offsets from %fp. MachineFunction &MF = *MI.getParent()->getParent(); @@ -167,8 +145,9 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) && !TII.isSpillPredRegOp(&MI)) { // Replace frame index with a stack pointer reference. - MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true); - MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), false, + false, true); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(FrameSize+Offset); } else { // Replace frame index with a frame pointer reference. if (!TII.isValidOffset(MI.getOpcode(), Offset)) { @@ -205,8 +184,8 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, dstReg).addReg(FrameReg).addImm(Offset); } - MI.getOperand(i).ChangeToRegister(dstReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(0); + MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } else if ((MI.getOpcode() == Hexagon::STriw_indexed) || (MI.getOpcode() == Hexagon::STriw) || (MI.getOpcode() == Hexagon::STrid) || @@ -233,29 +212,31 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, TII.get(Hexagon::ADD_ri), resReg).addReg(FrameReg).addImm(Offset); } - MI.getOperand(i).ChangeToRegister(resReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(0); + MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } else if (TII.isMemOp(&MI)) { unsigned resReg = HEXAGON_RESERVED_REG_1; if (!MFI.hasVarSizedObjects() && TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { - MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, - true); - MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), + false, false, true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset); } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(Hexagon::ADD_rr), resReg).addReg(FrameReg).addReg(resReg); - MI.getOperand(i).ChangeToRegister(resReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(0); + MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false, + true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } else { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(Hexagon::ADD_ri), resReg).addReg(FrameReg).addImm(Offset); - MI.getOperand(i).ChangeToRegister(resReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(0); + MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false, + true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } } else { unsigned dstReg = MI.getOperand(0).getReg(); @@ -265,14 +246,14 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, TII.get(Hexagon::ADD_rr), dstReg).addReg(FrameReg).addReg(dstReg); // Can we delete MI??? r2 = add (r2, #0). - MI.getOperand(i).ChangeToRegister(dstReg, false, false, true); - MI.getOperand(i+1).ChangeToImmediate(0); + MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } } else { // If the offset is small enough to fit in the immediate field, directly // encode it. - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); } } @@ -310,58 +291,6 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove> Moves.push_back(MachineMove(0, Dst, Src)); } -// Get the weight in units of pressure for this register class. -const RegClassWeight & -HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const { - // Each TargetRegisterClass has a per register weight, and weight - // limit which must be less than the limits of its pressure sets. - static const RegClassWeight RCWeightTable[] = { - {1, 32}, // IntRegs - {1, 8}, // CRRegs - {1, 4}, // PredRegs - {2, 16}, // DoubleRegs - {0, 0} }; - return RCWeightTable[RC->getID()]; -} - -/// Get the number of dimensions of register pressure. -unsigned HexagonRegisterInfo::getNumRegPressureSets() const { - return 4; -} - -/// Get the name of this register unit pressure set. -const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const { - static const char *const RegPressureSetName[] = { - "IntRegsRegSet", - "CRRegsRegSet", - "PredRegsRegSet", - "DoubleRegsRegSet" - }; - assert((Idx < 4) && "Index out of bounds"); - return RegPressureSetName[Idx]; -} - -/// Get the register unit pressure limit for this dimension. -/// This limit must be adjusted dynamically for reserved registers. -unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { - static const int RegPressureLimit [] = { 16, 4, 2, 8 }; - assert((Idx < 4) && "Index out of bounds"); - return RegPressureLimit[Idx]; -} - -const int* -HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC) - const { - static const int RCSetsTable[] = { - 0, -1, // IntRegs - 1, -1, // CRRegs - 2, -1, // PredRegs - 0, -1, // DoubleRegs - -1 }; - static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 }; - unsigned SetListStart = RCSetStartTable[RC->getID()]; - return &RCSetsTable[SetListStart]; -} unsigned HexagonRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index e8f3cfb..8a3f94a 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -56,12 +56,9 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. @@ -87,11 +84,6 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const; - unsigned getNumRegPressureSets() const; - const char *getRegPressureSetName(unsigned Idx) const; - unsigned getRegPressureSetLimit(unsigned Idx) const; - const int* getRegClassPressureSets(const TargetRegisterClass *RC) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td index b5ff69a..c2cfbb9 100644 --- a/lib/Target/Hexagon/HexagonSchedule.td +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -8,10 +8,11 @@ //===----------------------------------------------------------------------===// // Functional Units -def LUNIT : FuncUnit; -def LSUNIT : FuncUnit; -def MUNIT : FuncUnit; -def SUNIT : FuncUnit; +def LSUNIT : FuncUnit; // SLOT0 +def LUNIT : FuncUnit; // SLOT1 +def MUNIT : FuncUnit; // SLOT2 +def SUNIT : FuncUnit; // SLOT3 +def LOOPUNIT : FuncUnit; // Itinerary classes def ALU32 : InstrItinClass; @@ -20,27 +21,34 @@ def CR : InstrItinClass; def J : InstrItinClass; def JR : InstrItinClass; def LD : InstrItinClass; +def LD0 : InstrItinClass; def M : InstrItinClass; def ST : InstrItinClass; +def ST0 : InstrItinClass; def S : InstrItinClass; def SYS : InstrItinClass; -def MARKER : InstrItinClass; +def ENDLOOP : InstrItinClass; def PSEUDO : InstrItinClass; +def PSEUDOM : InstrItinClass; def HexagonItineraries : - ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [ + ProcessorItineraries<[LSUNIT, LUNIT, MUNIT, SUNIT, LOOPUNIT], [], [ InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>, InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>, InstrItinData<CR , [InstrStage<1, [SUNIT]>]>, InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>, InstrItinData<JR , [InstrStage<1, [MUNIT]>]>, InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>, + InstrItinData<LD0 , [InstrStage<1, [LSUNIT]>]>, InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>, InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>, + InstrItinData<ST0 , [InstrStage<1, [LSUNIT]>]>, InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>, InstrItinData<SYS , [InstrStage<1, [LSUNIT]>]>, - InstrItinData<MARKER , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>, - InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]> + InstrItinData<ENDLOOP, [InstrStage<1, [LOOPUNIT]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>, + InstrItinData<PSEUDOM, [InstrStage<1, [MUNIT, SUNIT], 0>, + InstrStage<1, [MUNIT, SUNIT]>]> ]>; def HexagonModel : SchedMachineModel { diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td index 5668ae8..ef72cf4 100644 --- a/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -28,6 +28,10 @@ def SLOT0 : FuncUnit; def SLOT1 : FuncUnit; def SLOT2 : FuncUnit; def SLOT3 : FuncUnit; +// Endloop is a pseudo instruction that is encoded with 2 bits in a packet +// rather than taking an execution slot. This special unit is needed +// to schedule an ENDLOOP with 4 other instructions. +def SLOT_ENDLOOP: FuncUnit; // Itinerary classes. def NV_V4 : InstrItinClass; @@ -36,22 +40,26 @@ def MEM_V4 : InstrItinClass; def PREFIX : InstrItinClass; def HexagonItinerariesV4 : - ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3], [], [ + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData<CR , [InstrStage<1, [SLOT3]>]>, InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData<JR , [InstrStage<1, [SLOT2]>]>, InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<LD0 , [InstrStage<1, [SLOT0]>]>, InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<ST0 , [InstrStage<1, [SLOT0]>]>, InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData<SYS , [InstrStage<1, [SLOT0]>]>, InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>, InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>, - InstrItinData<MARKER , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ENDLOOP, [InstrStage<1, [SLOT_ENDLOOP]>]>, InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, - InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]> + InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [SLOT2, SLOT3]>]> ]>; def HexagonModelV4 : SchedMachineModel { diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 287b3d6..d9fef3e 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -122,7 +122,7 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { bool HexagonPassConfig::addInstSelector() { addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine())); - addPass(createHexagonISelDag(getHexagonTargetMachine())); + addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel())); addPass(createHexagonPeephole()); return false; } diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 409a243..aff6b86 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -376,7 +376,6 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) { case Hexagon::STrib_indexed: case Hexagon::STrib_indexed_shl_V4: case Hexagon::STrib_shl_V4: - case Hexagon::STrib_GP_V4: case Hexagon::STb_GP_V4: case Hexagon::POST_STbri: case Hexagon::STrib_cPt: @@ -399,17 +398,12 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) { case Hexagon::STb_GP_cNotPt_V4: case Hexagon::STb_GP_cdnPt_V4: case Hexagon::STb_GP_cdnNotPt_V4: - case Hexagon::STrib_GP_cPt_V4: - case Hexagon::STrib_GP_cNotPt_V4: - case Hexagon::STrib_GP_cdnPt_V4: - case Hexagon::STrib_GP_cdnNotPt_V4: // store halfword case Hexagon::STrih: case Hexagon::STrih_indexed: case Hexagon::STrih_indexed_shl_V4: case Hexagon::STrih_shl_V4: - case Hexagon::STrih_GP_V4: case Hexagon::STh_GP_V4: case Hexagon::POST_SThri: case Hexagon::STrih_cPt: @@ -432,17 +426,12 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) { case Hexagon::STh_GP_cNotPt_V4: case Hexagon::STh_GP_cdnPt_V4: case Hexagon::STh_GP_cdnNotPt_V4: - case Hexagon::STrih_GP_cPt_V4: - case Hexagon::STrih_GP_cNotPt_V4: - case Hexagon::STrih_GP_cdnPt_V4: - case Hexagon::STrih_GP_cdnNotPt_V4: // store word case Hexagon::STriw: case Hexagon::STriw_indexed: case Hexagon::STriw_indexed_shl_V4: case Hexagon::STriw_shl_V4: - case Hexagon::STriw_GP_V4: case Hexagon::STw_GP_V4: case Hexagon::POST_STwri: case Hexagon::STriw_cPt: @@ -465,10 +454,6 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) { case Hexagon::STw_GP_cNotPt_V4: case Hexagon::STw_GP_cdnPt_V4: case Hexagon::STw_GP_cdnNotPt_V4: - case Hexagon::STriw_GP_cPt_V4: - case Hexagon::STriw_GP_cNotPt_V4: - case Hexagon::STriw_GP_cdnPt_V4: - case Hexagon::STriw_GP_cdnNotPt_V4: return QRI->Subtarget.hasV4TOps(); } return false; @@ -508,9 +493,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STrib_shl_V4: return Hexagon::STrib_shl_nv_V4; - case Hexagon::STrib_GP_V4: - return Hexagon::STrib_GP_nv_V4; - case Hexagon::STb_GP_V4: return Hexagon::STb_GP_nv_V4; @@ -577,18 +559,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STb_GP_cdnNotPt_V4: return Hexagon::STb_GP_cdnNotPt_nv_V4; - case Hexagon::STrib_GP_cPt_V4: - return Hexagon::STrib_GP_cPt_nv_V4; - - case Hexagon::STrib_GP_cNotPt_V4: - return Hexagon::STrib_GP_cNotPt_nv_V4; - - case Hexagon::STrib_GP_cdnPt_V4: - return Hexagon::STrib_GP_cdnPt_nv_V4; - - case Hexagon::STrib_GP_cdnNotPt_V4: - return Hexagon::STrib_GP_cdnNotPt_nv_V4; - // store new value halfword case Hexagon::STrih: return Hexagon::STrih_nv_V4; @@ -602,9 +572,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STrih_shl_V4: return Hexagon::STrih_shl_nv_V4; - case Hexagon::STrih_GP_V4: - return Hexagon::STrih_GP_nv_V4; - case Hexagon::STh_GP_V4: return Hexagon::STh_GP_nv_V4; @@ -671,18 +638,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STh_GP_cdnNotPt_V4: return Hexagon::STh_GP_cdnNotPt_nv_V4; - case Hexagon::STrih_GP_cPt_V4: - return Hexagon::STrih_GP_cPt_nv_V4; - - case Hexagon::STrih_GP_cNotPt_V4: - return Hexagon::STrih_GP_cNotPt_nv_V4; - - case Hexagon::STrih_GP_cdnPt_V4: - return Hexagon::STrih_GP_cdnPt_nv_V4; - - case Hexagon::STrih_GP_cdnNotPt_V4: - return Hexagon::STrih_GP_cdnNotPt_nv_V4; - // store new value word case Hexagon::STriw: return Hexagon::STriw_nv_V4; @@ -696,9 +651,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STriw_shl_V4: return Hexagon::STriw_shl_nv_V4; - case Hexagon::STriw_GP_V4: - return Hexagon::STriw_GP_nv_V4; - case Hexagon::STw_GP_V4: return Hexagon::STw_GP_nv_V4; @@ -765,17 +717,6 @@ static int GetDotNewOp(const int opc) { case Hexagon::STw_GP_cdnNotPt_V4: return Hexagon::STw_GP_cdnNotPt_nv_V4; - case Hexagon::STriw_GP_cPt_V4: - return Hexagon::STriw_GP_cPt_nv_V4; - - case Hexagon::STriw_GP_cNotPt_V4: - return Hexagon::STriw_GP_cNotPt_nv_V4; - - case Hexagon::STriw_GP_cdnPt_V4: - return Hexagon::STriw_GP_cdnPt_nv_V4; - - case Hexagon::STriw_GP_cdnNotPt_V4: - return Hexagon::STriw_GP_cdnNotPt_nv_V4; } } @@ -821,12 +762,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STb_GP_cNotPt_V4 : return Hexagon::STb_GP_cdnNotPt_V4; - case Hexagon::STrib_GP_cPt_V4 : - return Hexagon::STrib_GP_cdnPt_V4; - - case Hexagon::STrib_GP_cNotPt_V4 : - return Hexagon::STrib_GP_cdnNotPt_V4; - // Store doubleword conditionally case Hexagon::STrid_cPt : return Hexagon::STrid_cdnPt_V4; @@ -858,12 +793,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STd_GP_cNotPt_V4 : return Hexagon::STd_GP_cdnNotPt_V4; - case Hexagon::STrid_GP_cPt_V4 : - return Hexagon::STrid_GP_cdnPt_V4; - - case Hexagon::STrid_GP_cNotPt_V4 : - return Hexagon::STrid_GP_cdnNotPt_V4; - // Store halfword conditionally case Hexagon::STrih_cPt : return Hexagon::STrih_cdnPt_V4; @@ -901,12 +830,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STh_GP_cNotPt_V4 : return Hexagon::STh_GP_cdnNotPt_V4; - case Hexagon::STrih_GP_cPt_V4 : - return Hexagon::STrih_GP_cdnPt_V4; - - case Hexagon::STrih_GP_cNotPt_V4 : - return Hexagon::STrih_GP_cdnNotPt_V4; - // Store word conditionally case Hexagon::STriw_cPt : return Hexagon::STriw_cdnPt_V4; @@ -944,12 +867,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STw_GP_cNotPt_V4 : return Hexagon::STw_GP_cdnNotPt_V4; - case Hexagon::STriw_GP_cPt_V4 : - return Hexagon::STriw_GP_cdnPt_V4; - - case Hexagon::STriw_GP_cNotPt_V4 : - return Hexagon::STriw_GP_cdnNotPt_V4; - // Condtional Jumps case Hexagon::JMP_c: return Hexagon::JMP_cdnPt; @@ -1092,72 +1009,36 @@ static int GetDotNewPredOp(const int opc) { // V4 indexed+scaled load - case Hexagon::LDrid_indexed_cPt_V4 : - return Hexagon::LDrid_indexed_cdnPt_V4; - - case Hexagon::LDrid_indexed_cNotPt_V4 : - return Hexagon::LDrid_indexed_cdnNotPt_V4; - case Hexagon::LDrid_indexed_shl_cPt_V4 : return Hexagon::LDrid_indexed_shl_cdnPt_V4; case Hexagon::LDrid_indexed_shl_cNotPt_V4 : return Hexagon::LDrid_indexed_shl_cdnNotPt_V4; - case Hexagon::LDrib_indexed_cPt_V4 : - return Hexagon::LDrib_indexed_cdnPt_V4; - - case Hexagon::LDrib_indexed_cNotPt_V4 : - return Hexagon::LDrib_indexed_cdnNotPt_V4; - case Hexagon::LDrib_indexed_shl_cPt_V4 : return Hexagon::LDrib_indexed_shl_cdnPt_V4; case Hexagon::LDrib_indexed_shl_cNotPt_V4 : return Hexagon::LDrib_indexed_shl_cdnNotPt_V4; - case Hexagon::LDriub_indexed_cPt_V4 : - return Hexagon::LDriub_indexed_cdnPt_V4; - - case Hexagon::LDriub_indexed_cNotPt_V4 : - return Hexagon::LDriub_indexed_cdnNotPt_V4; - case Hexagon::LDriub_indexed_shl_cPt_V4 : return Hexagon::LDriub_indexed_shl_cdnPt_V4; case Hexagon::LDriub_indexed_shl_cNotPt_V4 : return Hexagon::LDriub_indexed_shl_cdnNotPt_V4; - case Hexagon::LDrih_indexed_cPt_V4 : - return Hexagon::LDrih_indexed_cdnPt_V4; - - case Hexagon::LDrih_indexed_cNotPt_V4 : - return Hexagon::LDrih_indexed_cdnNotPt_V4; - case Hexagon::LDrih_indexed_shl_cPt_V4 : return Hexagon::LDrih_indexed_shl_cdnPt_V4; case Hexagon::LDrih_indexed_shl_cNotPt_V4 : return Hexagon::LDrih_indexed_shl_cdnNotPt_V4; - case Hexagon::LDriuh_indexed_cPt_V4 : - return Hexagon::LDriuh_indexed_cdnPt_V4; - - case Hexagon::LDriuh_indexed_cNotPt_V4 : - return Hexagon::LDriuh_indexed_cdnNotPt_V4; - case Hexagon::LDriuh_indexed_shl_cPt_V4 : return Hexagon::LDriuh_indexed_shl_cdnPt_V4; case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : return Hexagon::LDriuh_indexed_shl_cdnNotPt_V4; - case Hexagon::LDriw_indexed_cPt_V4 : - return Hexagon::LDriw_indexed_cdnPt_V4; - - case Hexagon::LDriw_indexed_cNotPt_V4 : - return Hexagon::LDriw_indexed_cdnNotPt_V4; - case Hexagon::LDriw_indexed_shl_cPt_V4 : return Hexagon::LDriw_indexed_shl_cdnPt_V4; @@ -1202,42 +1083,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::LDw_GP_cNotPt_V4: return Hexagon::LDw_GP_cdnNotPt_V4; - case Hexagon::LDrid_GP_cPt_V4: - return Hexagon::LDrid_GP_cdnPt_V4; - - case Hexagon::LDrid_GP_cNotPt_V4: - return Hexagon::LDrid_GP_cdnNotPt_V4; - - case Hexagon::LDrib_GP_cPt_V4: - return Hexagon::LDrib_GP_cdnPt_V4; - - case Hexagon::LDrib_GP_cNotPt_V4: - return Hexagon::LDrib_GP_cdnNotPt_V4; - - case Hexagon::LDriub_GP_cPt_V4: - return Hexagon::LDriub_GP_cdnPt_V4; - - case Hexagon::LDriub_GP_cNotPt_V4: - return Hexagon::LDriub_GP_cdnNotPt_V4; - - case Hexagon::LDrih_GP_cPt_V4: - return Hexagon::LDrih_GP_cdnPt_V4; - - case Hexagon::LDrih_GP_cNotPt_V4: - return Hexagon::LDrih_GP_cdnNotPt_V4; - - case Hexagon::LDriuh_GP_cPt_V4: - return Hexagon::LDriuh_GP_cdnPt_V4; - - case Hexagon::LDriuh_GP_cNotPt_V4: - return Hexagon::LDriuh_GP_cdnNotPt_V4; - - case Hexagon::LDriw_GP_cPt_V4: - return Hexagon::LDriw_GP_cdnPt_V4; - - case Hexagon::LDriw_GP_cNotPt_V4: - return Hexagon::LDriw_GP_cdnNotPt_V4; - // Conditional store new-value byte case Hexagon::STrib_cPt_nv_V4 : return Hexagon::STrib_cdnPt_nv_V4; @@ -1265,12 +1110,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STb_GP_cNotPt_nv_V4 : return Hexagon::STb_GP_cdnNotPt_nv_V4; - case Hexagon::STrib_GP_cPt_nv_V4 : - return Hexagon::STrib_GP_cdnPt_nv_V4; - - case Hexagon::STrib_GP_cNotPt_nv_V4 : - return Hexagon::STrib_GP_cdnNotPt_nv_V4; - // Conditional store new-value halfword case Hexagon::STrih_cPt_nv_V4 : return Hexagon::STrih_cdnPt_nv_V4; @@ -1298,12 +1137,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STh_GP_cNotPt_nv_V4 : return Hexagon::STh_GP_cdnNotPt_nv_V4; - case Hexagon::STrih_GP_cPt_nv_V4 : - return Hexagon::STrih_GP_cdnPt_nv_V4; - - case Hexagon::STrih_GP_cNotPt_nv_V4 : - return Hexagon::STrih_GP_cdnNotPt_nv_V4; - // Conditional store new-value word case Hexagon::STriw_cPt_nv_V4 : return Hexagon::STriw_cdnPt_nv_V4; @@ -1331,12 +1164,6 @@ static int GetDotNewPredOp(const int opc) { case Hexagon::STw_GP_cNotPt_nv_V4 : return Hexagon::STw_GP_cdnNotPt_nv_V4; - case Hexagon::STriw_GP_cPt_nv_V4 : - return Hexagon::STriw_GP_cdnPt_nv_V4; - - case Hexagon::STriw_GP_cNotPt_nv_V4 : - return Hexagon::STriw_GP_cdnNotPt_nv_V4; - // Conditional add case Hexagon::ADD_ri_cPt : return Hexagon::ADD_ri_cdnPt; @@ -1623,72 +1450,36 @@ static int GetDotOldOp(const int opc) { // V4 indexed+scaled Load - case Hexagon::LDrid_indexed_cdnPt_V4 : - return Hexagon::LDrid_indexed_cPt_V4; - - case Hexagon::LDrid_indexed_cdnNotPt_V4 : - return Hexagon::LDrid_indexed_cNotPt_V4; - case Hexagon::LDrid_indexed_shl_cdnPt_V4 : return Hexagon::LDrid_indexed_shl_cPt_V4; case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : return Hexagon::LDrid_indexed_shl_cNotPt_V4; - case Hexagon::LDrib_indexed_cdnPt_V4 : - return Hexagon::LDrib_indexed_cPt_V4; - - case Hexagon::LDrib_indexed_cdnNotPt_V4 : - return Hexagon::LDrib_indexed_cNotPt_V4; - case Hexagon::LDrib_indexed_shl_cdnPt_V4 : return Hexagon::LDrib_indexed_shl_cPt_V4; case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : return Hexagon::LDrib_indexed_shl_cNotPt_V4; - case Hexagon::LDriub_indexed_cdnPt_V4 : - return Hexagon::LDriub_indexed_cPt_V4; - - case Hexagon::LDriub_indexed_cdnNotPt_V4 : - return Hexagon::LDriub_indexed_cNotPt_V4; - case Hexagon::LDriub_indexed_shl_cdnPt_V4 : return Hexagon::LDriub_indexed_shl_cPt_V4; case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : return Hexagon::LDriub_indexed_shl_cNotPt_V4; - case Hexagon::LDrih_indexed_cdnPt_V4 : - return Hexagon::LDrih_indexed_cPt_V4; - - case Hexagon::LDrih_indexed_cdnNotPt_V4 : - return Hexagon::LDrih_indexed_cNotPt_V4; - case Hexagon::LDrih_indexed_shl_cdnPt_V4 : return Hexagon::LDrih_indexed_shl_cPt_V4; case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : return Hexagon::LDrih_indexed_shl_cNotPt_V4; - case Hexagon::LDriuh_indexed_cdnPt_V4 : - return Hexagon::LDriuh_indexed_cPt_V4; - - case Hexagon::LDriuh_indexed_cdnNotPt_V4 : - return Hexagon::LDriuh_indexed_cNotPt_V4; - case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : return Hexagon::LDriuh_indexed_shl_cPt_V4; case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : return Hexagon::LDriuh_indexed_shl_cNotPt_V4; - case Hexagon::LDriw_indexed_cdnPt_V4 : - return Hexagon::LDriw_indexed_cPt_V4; - - case Hexagon::LDriw_indexed_cdnNotPt_V4 : - return Hexagon::LDriw_indexed_cNotPt_V4; - case Hexagon::LDriw_indexed_shl_cdnPt_V4 : return Hexagon::LDriw_indexed_shl_cPt_V4; @@ -1733,42 +1524,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::LDw_GP_cdnNotPt_V4: return Hexagon::LDw_GP_cNotPt_V4; - case Hexagon::LDrid_GP_cdnPt_V4: - return Hexagon::LDrid_GP_cPt_V4; - - case Hexagon::LDrid_GP_cdnNotPt_V4: - return Hexagon::LDrid_GP_cNotPt_V4; - - case Hexagon::LDrib_GP_cdnPt_V4: - return Hexagon::LDrib_GP_cPt_V4; - - case Hexagon::LDrib_GP_cdnNotPt_V4: - return Hexagon::LDrib_GP_cNotPt_V4; - - case Hexagon::LDriub_GP_cdnPt_V4: - return Hexagon::LDriub_GP_cPt_V4; - - case Hexagon::LDriub_GP_cdnNotPt_V4: - return Hexagon::LDriub_GP_cNotPt_V4; - - case Hexagon::LDrih_GP_cdnPt_V4: - return Hexagon::LDrih_GP_cPt_V4; - - case Hexagon::LDrih_GP_cdnNotPt_V4: - return Hexagon::LDrih_GP_cNotPt_V4; - - case Hexagon::LDriuh_GP_cdnPt_V4: - return Hexagon::LDriuh_GP_cPt_V4; - - case Hexagon::LDriuh_GP_cdnNotPt_V4: - return Hexagon::LDriuh_GP_cNotPt_V4; - - case Hexagon::LDriw_GP_cdnPt_V4: - return Hexagon::LDriw_GP_cPt_V4; - - case Hexagon::LDriw_GP_cdnNotPt_V4: - return Hexagon::LDriw_GP_cNotPt_V4; - // Conditional add case Hexagon::ADD_ri_cdnPt : @@ -1902,16 +1657,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STb_GP_cNotPt_nv_V4: return Hexagon::STb_GP_cNotPt_V4; - case Hexagon::STrib_GP_cdnPt_nv_V4: - case Hexagon::STrib_GP_cdnPt_V4: - case Hexagon::STrib_GP_cPt_nv_V4: - return Hexagon::STrib_GP_cPt_V4; - - case Hexagon::STrib_GP_cdnNotPt_nv_V4: - case Hexagon::STrib_GP_cdnNotPt_V4: - case Hexagon::STrib_GP_cNotPt_nv_V4: - return Hexagon::STrib_GP_cNotPt_V4; - // Store new-value byte - unconditional case Hexagon::STrib_nv_V4: return Hexagon::STrib; @@ -1925,9 +1670,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STrib_shl_nv_V4: return Hexagon::STrib_shl_V4; - case Hexagon::STrib_GP_nv_V4: - return Hexagon::STrib_GP_V4; - case Hexagon::STb_GP_nv_V4: return Hexagon::STb_GP_V4; @@ -1991,16 +1733,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STh_GP_cNotPt_nv_V4: return Hexagon::STh_GP_cNotPt_V4; - case Hexagon::STrih_GP_cdnPt_nv_V4: - case Hexagon::STrih_GP_cdnPt_V4: - case Hexagon::STrih_GP_cPt_nv_V4: - return Hexagon::STrih_GP_cPt_V4; - - case Hexagon::STrih_GP_cdnNotPt_nv_V4: - case Hexagon::STrih_GP_cdnNotPt_V4: - case Hexagon::STrih_GP_cNotPt_nv_V4: - return Hexagon::STrih_GP_cNotPt_V4; - // Store new-value halfword - unconditional case Hexagon::STrih_nv_V4: @@ -2015,9 +1747,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STrih_shl_nv_V4: return Hexagon::STrih_shl_V4; - case Hexagon::STrih_GP_nv_V4: - return Hexagon::STrih_GP_V4; - case Hexagon::STh_GP_nv_V4: return Hexagon::STh_GP_V4; @@ -2082,16 +1811,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STw_GP_cNotPt_nv_V4: return Hexagon::STw_GP_cNotPt_V4; - case Hexagon::STriw_GP_cdnPt_nv_V4: - case Hexagon::STriw_GP_cdnPt_V4: - case Hexagon::STriw_GP_cPt_nv_V4: - return Hexagon::STriw_GP_cPt_V4; - - case Hexagon::STriw_GP_cdnNotPt_nv_V4: - case Hexagon::STriw_GP_cdnNotPt_V4: - case Hexagon::STriw_GP_cNotPt_nv_V4: - return Hexagon::STriw_GP_cNotPt_V4; - // Store new-value word - unconditional case Hexagon::STriw_nv_V4: @@ -2106,9 +1825,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STriw_shl_nv_V4: return Hexagon::STriw_shl_V4; - case Hexagon::STriw_GP_nv_V4: - return Hexagon::STriw_GP_V4; - case Hexagon::STw_GP_nv_V4: return Hexagon::STw_GP_V4; @@ -2147,11 +1863,6 @@ static int GetDotOldOp(const int opc) { case Hexagon::STd_GP_cdnNotPt_V4 : return Hexagon::STd_GP_cNotPt_V4; - case Hexagon::STrid_GP_cdnPt_V4 : - return Hexagon::STrid_GP_cPt_V4; - - case Hexagon::STrid_GP_cdnNotPt_V4 : - return Hexagon::STrid_GP_cNotPt_V4; } } @@ -2249,28 +1960,16 @@ static bool GetPredicateSense(MachineInstr* MI, case Hexagon::LDriub_indexed_cdnPt : case Hexagon::POST_LDriub_cPt : case Hexagon::POST_LDriub_cdnPt_V4 : - case Hexagon::LDrid_indexed_cPt_V4 : - case Hexagon::LDrid_indexed_cdnPt_V4 : case Hexagon::LDrid_indexed_shl_cPt_V4 : case Hexagon::LDrid_indexed_shl_cdnPt_V4 : - case Hexagon::LDrib_indexed_cPt_V4 : - case Hexagon::LDrib_indexed_cdnPt_V4 : case Hexagon::LDrib_indexed_shl_cPt_V4 : case Hexagon::LDrib_indexed_shl_cdnPt_V4 : - case Hexagon::LDriub_indexed_cPt_V4 : - case Hexagon::LDriub_indexed_cdnPt_V4 : case Hexagon::LDriub_indexed_shl_cPt_V4 : case Hexagon::LDriub_indexed_shl_cdnPt_V4 : - case Hexagon::LDrih_indexed_cPt_V4 : - case Hexagon::LDrih_indexed_cdnPt_V4 : case Hexagon::LDrih_indexed_shl_cPt_V4 : case Hexagon::LDrih_indexed_shl_cdnPt_V4 : - case Hexagon::LDriuh_indexed_cPt_V4 : - case Hexagon::LDriuh_indexed_cdnPt_V4 : case Hexagon::LDriuh_indexed_shl_cPt_V4 : case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : - case Hexagon::LDriw_indexed_cPt_V4 : - case Hexagon::LDriw_indexed_cdnPt_V4 : case Hexagon::LDriw_indexed_shl_cPt_V4 : case Hexagon::LDriw_indexed_shl_cdnPt_V4 : case Hexagon::ADD_ri_cPt : @@ -2299,42 +1998,22 @@ static bool GetPredicateSense(MachineInstr* MI, case Hexagon::ZXTB_cdnPt_V4 : case Hexagon::ZXTH_cPt_V4 : case Hexagon::ZXTH_cdnPt_V4 : - case Hexagon::LDrid_GP_cPt_V4 : - case Hexagon::LDrib_GP_cPt_V4 : - case Hexagon::LDriub_GP_cPt_V4 : - case Hexagon::LDrih_GP_cPt_V4 : - case Hexagon::LDriuh_GP_cPt_V4 : - case Hexagon::LDriw_GP_cPt_V4 : case Hexagon::LDd_GP_cPt_V4 : case Hexagon::LDb_GP_cPt_V4 : case Hexagon::LDub_GP_cPt_V4 : case Hexagon::LDh_GP_cPt_V4 : case Hexagon::LDuh_GP_cPt_V4 : case Hexagon::LDw_GP_cPt_V4 : - case Hexagon::STrid_GP_cPt_V4 : - case Hexagon::STrib_GP_cPt_V4 : - case Hexagon::STrih_GP_cPt_V4 : - case Hexagon::STriw_GP_cPt_V4 : case Hexagon::STd_GP_cPt_V4 : case Hexagon::STb_GP_cPt_V4 : case Hexagon::STh_GP_cPt_V4 : case Hexagon::STw_GP_cPt_V4 : - case Hexagon::LDrid_GP_cdnPt_V4 : - case Hexagon::LDrib_GP_cdnPt_V4 : - case Hexagon::LDriub_GP_cdnPt_V4 : - case Hexagon::LDrih_GP_cdnPt_V4 : - case Hexagon::LDriuh_GP_cdnPt_V4 : - case Hexagon::LDriw_GP_cdnPt_V4 : case Hexagon::LDd_GP_cdnPt_V4 : case Hexagon::LDb_GP_cdnPt_V4 : case Hexagon::LDub_GP_cdnPt_V4 : case Hexagon::LDh_GP_cdnPt_V4 : case Hexagon::LDuh_GP_cdnPt_V4 : case Hexagon::LDw_GP_cdnPt_V4 : - case Hexagon::STrid_GP_cdnPt_V4 : - case Hexagon::STrib_GP_cdnPt_V4 : - case Hexagon::STrih_GP_cdnPt_V4 : - case Hexagon::STriw_GP_cdnPt_V4 : case Hexagon::STd_GP_cdnPt_V4 : case Hexagon::STb_GP_cdnPt_V4 : case Hexagon::STh_GP_cdnPt_V4 : @@ -2420,28 +2099,16 @@ static bool GetPredicateSense(MachineInstr* MI, case Hexagon::LDriub_indexed_cdnNotPt : case Hexagon::POST_LDriub_cNotPt : case Hexagon::POST_LDriub_cdnNotPt_V4 : - case Hexagon::LDrid_indexed_cNotPt_V4 : - case Hexagon::LDrid_indexed_cdnNotPt_V4 : case Hexagon::LDrid_indexed_shl_cNotPt_V4 : case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrib_indexed_cNotPt_V4 : - case Hexagon::LDrib_indexed_cdnNotPt_V4 : case Hexagon::LDrib_indexed_shl_cNotPt_V4 : case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriub_indexed_cNotPt_V4 : - case Hexagon::LDriub_indexed_cdnNotPt_V4 : case Hexagon::LDriub_indexed_shl_cNotPt_V4 : case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrih_indexed_cNotPt_V4 : - case Hexagon::LDrih_indexed_cdnNotPt_V4 : case Hexagon::LDrih_indexed_shl_cNotPt_V4 : case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriuh_indexed_cNotPt_V4 : - case Hexagon::LDriuh_indexed_cdnNotPt_V4 : case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriw_indexed_cNotPt_V4 : - case Hexagon::LDriw_indexed_cdnNotPt_V4 : case Hexagon::LDriw_indexed_shl_cNotPt_V4 : case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : case Hexagon::ADD_ri_cNotPt : @@ -2471,42 +2138,22 @@ static bool GetPredicateSense(MachineInstr* MI, case Hexagon::ZXTH_cNotPt_V4 : case Hexagon::ZXTH_cdnNotPt_V4 : - case Hexagon::LDrid_GP_cNotPt_V4 : - case Hexagon::LDrib_GP_cNotPt_V4 : - case Hexagon::LDriub_GP_cNotPt_V4 : - case Hexagon::LDrih_GP_cNotPt_V4 : - case Hexagon::LDriuh_GP_cNotPt_V4 : - case Hexagon::LDriw_GP_cNotPt_V4 : case Hexagon::LDd_GP_cNotPt_V4 : case Hexagon::LDb_GP_cNotPt_V4 : case Hexagon::LDub_GP_cNotPt_V4 : case Hexagon::LDh_GP_cNotPt_V4 : case Hexagon::LDuh_GP_cNotPt_V4 : case Hexagon::LDw_GP_cNotPt_V4 : - case Hexagon::STrid_GP_cNotPt_V4 : - case Hexagon::STrib_GP_cNotPt_V4 : - case Hexagon::STrih_GP_cNotPt_V4 : - case Hexagon::STriw_GP_cNotPt_V4 : case Hexagon::STd_GP_cNotPt_V4 : case Hexagon::STb_GP_cNotPt_V4 : case Hexagon::STh_GP_cNotPt_V4 : case Hexagon::STw_GP_cNotPt_V4 : - case Hexagon::LDrid_GP_cdnNotPt_V4 : - case Hexagon::LDrib_GP_cdnNotPt_V4 : - case Hexagon::LDriub_GP_cdnNotPt_V4 : - case Hexagon::LDrih_GP_cdnNotPt_V4 : - case Hexagon::LDriuh_GP_cdnNotPt_V4 : - case Hexagon::LDriw_GP_cdnNotPt_V4 : case Hexagon::LDd_GP_cdnNotPt_V4 : case Hexagon::LDb_GP_cdnNotPt_V4 : case Hexagon::LDub_GP_cdnNotPt_V4 : case Hexagon::LDh_GP_cdnNotPt_V4 : case Hexagon::LDuh_GP_cdnNotPt_V4 : case Hexagon::LDw_GP_cdnNotPt_V4 : - case Hexagon::STrid_GP_cdnNotPt_V4 : - case Hexagon::STrib_GP_cdnNotPt_V4 : - case Hexagon::STrih_GP_cdnNotPt_V4 : - case Hexagon::STriw_GP_cdnNotPt_V4 : case Hexagon::STd_GP_cdnNotPt_V4 : case Hexagon::STb_GP_cdnNotPt_V4 : case Hexagon::STh_GP_cdnNotPt_V4 : @@ -2563,28 +2210,16 @@ bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) { case Hexagon::POST_LDriub_cdnPt_V4 : case Hexagon::POST_LDriub_cdnNotPt_V4 : - case Hexagon::LDrid_indexed_cdnPt_V4 : - case Hexagon::LDrid_indexed_cdnNotPt_V4 : case Hexagon::LDrid_indexed_shl_cdnPt_V4 : case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrib_indexed_cdnPt_V4 : - case Hexagon::LDrib_indexed_cdnNotPt_V4 : case Hexagon::LDrib_indexed_shl_cdnPt_V4 : case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriub_indexed_cdnPt_V4 : - case Hexagon::LDriub_indexed_cdnNotPt_V4 : case Hexagon::LDriub_indexed_shl_cdnPt_V4 : case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrih_indexed_cdnPt_V4 : - case Hexagon::LDrih_indexed_cdnNotPt_V4 : case Hexagon::LDrih_indexed_shl_cdnPt_V4 : case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriuh_indexed_cdnPt_V4 : - case Hexagon::LDriuh_indexed_cdnNotPt_V4 : case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriw_indexed_cdnPt_V4 : - case Hexagon::LDriw_indexed_cdnNotPt_V4 : case Hexagon::LDriw_indexed_shl_cdnPt_V4 : case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : @@ -2680,27 +2315,7 @@ bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) { case Hexagon::LDuh_GP_cdnNotPt_V4: case Hexagon::LDw_GP_cdnPt_V4: case Hexagon::LDw_GP_cdnNotPt_V4: - case Hexagon::LDrid_GP_cdnPt_V4: - case Hexagon::LDrid_GP_cdnNotPt_V4: - case Hexagon::LDrib_GP_cdnPt_V4: - case Hexagon::LDrib_GP_cdnNotPt_V4: - case Hexagon::LDriub_GP_cdnPt_V4: - case Hexagon::LDriub_GP_cdnNotPt_V4: - case Hexagon::LDrih_GP_cdnPt_V4: - case Hexagon::LDrih_GP_cdnNotPt_V4: - case Hexagon::LDriuh_GP_cdnPt_V4: - case Hexagon::LDriuh_GP_cdnNotPt_V4: - case Hexagon::LDriw_GP_cdnPt_V4: - case Hexagon::LDriw_GP_cdnNotPt_V4: - - case Hexagon::STrid_GP_cdnPt_V4: - case Hexagon::STrid_GP_cdnNotPt_V4: - case Hexagon::STrib_GP_cdnPt_V4: - case Hexagon::STrib_GP_cdnNotPt_V4: - case Hexagon::STrih_GP_cdnPt_V4: - case Hexagon::STrih_GP_cdnNotPt_V4: - case Hexagon::STriw_GP_cdnPt_V4: - case Hexagon::STriw_GP_cdnNotPt_V4: + case Hexagon::STd_GP_cdnPt_V4: case Hexagon::STd_GP_cdnNotPt_V4: case Hexagon::STb_GP_cdnPt_V4: diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp index c700354..36da6df 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -12,14 +12,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" -#include "HexagonInstPrinter.h" -#include "Hexagon.h" #include "HexagonAsmPrinter.h" -#include "HexagonMCInst.h" +#include "Hexagon.h" +#include "HexagonInstPrinter.h" +#include "MCTargetDesc/HexagonMCInst.h" +#include "llvm/MC/MCInst.h" #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" #include <cstdio> @@ -28,6 +28,8 @@ using namespace llvm; #define GET_INSTRUCTION_NAME #include "HexagonGenAsmWriter.inc" +const char HexagonInstPrinter::PacketPadding = '\t'; + StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const { return MII.getName(Opcode); } @@ -43,43 +45,42 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O, void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot) { - const char packetPadding[] = " "; const char startPacket = '{', endPacket = '}'; // TODO: add outer HW loop when it's supported too. if (MI->getOpcode() == Hexagon::ENDLOOP0) { // Ending a harware loop is different from ending an regular packet. - assert(MI->isEndPacket() && "Loop end must also end the packet"); + assert(MI->isPacketEnd() && "Loop-end must also end the packet"); - if (MI->isStartPacket()) { + if (MI->isPacketStart()) { // There must be a packet to end a loop. // FIXME: when shuffling is always run, this shouldn't be needed. HexagonMCInst Nop; StringRef NoAnnot; Nop.setOpcode (Hexagon::NOP); - Nop.setStartPacket (MI->isStartPacket()); + Nop.setPacketStart (MI->isPacketStart()); printInst (&Nop, O, NoAnnot); } // Close the packet. - if (MI->isEndPacket()) - O << packetPadding << endPacket; + if (MI->isPacketEnd()) + O << PacketPadding << endPacket; printInstruction(MI, O); } else { // Prefix the insn opening the packet. - if (MI->isStartPacket()) - O << packetPadding << startPacket << '\n'; + if (MI->isPacketStart()) + O << PacketPadding << startPacket << '\n'; printInstruction(MI, O); // Suffix the insn closing the packet. - if (MI->isEndPacket()) + if (MI->isPacketEnd()) // Suffix the packet in a new line always, since the GNU assembler has // issues with a closing brace on the same line as CONST{32,64}. - O << '\n' << packetPadding << endPacket; + O << '\n' << PacketPadding << endPacket; } printAnnotation(O, Annot); @@ -102,12 +103,23 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const { - O << MI->getOperand(OpNo).getImm(); + const MCOperand& MO = MI->getOperand(OpNo); + + if(MO.isExpr()) { + O << *MO.getExpr(); + } else if(MO.isImm()) { + O << MI->getOperand(OpNo).getImm(); + } else { + llvm_unreachable("Unknown operand"); + } } void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const { - O << MI->getOperand(OpNo).getImm(); + const HexagonMCInst *HMCI = static_cast<const HexagonMCInst*>(MI); + if (HMCI->isConstExtended()) + O << "#"; + printOperand(MI, OpNo, O); } void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI, diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h index 902a323..d0cef68 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h @@ -14,16 +14,18 @@ #ifndef HEXAGONINSTPRINTER_H #define HEXAGONINSTPRINTER_H -#include "HexagonMCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" namespace llvm { + class HexagonMCInst; + class HexagonInstPrinter : public MCInstPrinter { public: explicit HexagonInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MII, MRI) {} + : MCInstPrinter(MAI, MII, MRI), MII(MII) {} virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot); @@ -65,10 +67,19 @@ namespace llvm { void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const { printSymbol(MI, OpNo, O, false); } - bool isConstExtended(const MCInst *MI) const; + const MCInstrInfo &getMII() const { + return MII; + } + protected: void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi) const; + + static const char PacketPadding; + + private: + const MCInstrInfo &MII; + }; } // end namespace llvm diff --git a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt index 8678401..59849aa 100644 --- a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt +++ b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = HexagonAsmPrinter parent = Hexagon -required_libraries = MC Support +required_libraries = HexagonDesc MC Support add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt index 8e3da99..62b9b60 100644 --- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMHexagonDesc - HexagonMCTargetDesc.cpp HexagonMCAsmInfo.cpp + HexagonMCInst.cpp + HexagonMCTargetDesc.cpp ) add_dependencies(LLVMHexagonDesc HexagonCommonTableGen) diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 9fc826f..5f9718b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -17,6 +17,9 @@ #ifndef HEXAGONBASEINFO_H #define HEXAGONBASEINFO_H +#include "HexagonMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" + namespace llvm { /// HexagonII - This namespace holds all of the target specific flags that @@ -28,19 +31,19 @@ namespace HexagonII { // Insn types. // *** Must match HexagonInstrFormat*.td *** enum Type { - TypePSEUDO = 0, - TypeALU32 = 1, - TypeCR = 2, - TypeJR = 3, - TypeJ = 4, - TypeLD = 5, - TypeST = 6, - TypeSYSTEM = 7, - TypeXTYPE = 8, - TypeMEMOP = 9, - TypeNV = 10, - TypePREFIX = 30, // Such as extenders. - TypeMARKER = 31 // Such as end of a HW loop. + TypePSEUDO = 0, + TypeALU32 = 1, + TypeCR = 2, + TypeJR = 3, + TypeJ = 4, + TypeLD = 5, + TypeST = 6, + TypeSYSTEM = 7, + TypeXTYPE = 8, + TypeMEMOP = 9, + TypeNV = 10, + TypePREFIX = 30, // Such as extenders. + TypeENDLOOP = 31 // Such as end of a HW loop. }; enum SubTarget { @@ -65,6 +68,14 @@ namespace HexagonII { BaseRegOffset = 5 // Indirect with register offset }; + enum MemAccessSize { + NoMemAccess = 0, // Not a memory acces instruction. + ByteAccess = 1, // Byte access instruction (memb). + HalfWordAccess = 2, // Half word access instruction (memh). + WordAccess = 3, // Word access instrution (memw). + DoubleWordAccess = 4 // Double word access instruction (memd) + }; + // MCInstrDesc TSFlags // *** Must match HexagonInstrFormat*.td *** enum { @@ -79,46 +90,67 @@ namespace HexagonII { // Predicated instructions. PredicatedPos = 6, PredicatedMask = 0x1, - PredicatedNewPos = 7, + PredicatedFalsePos = 7, + PredicatedFalseMask = 0x1, + PredicatedNewPos = 8, PredicatedNewMask = 0x1, - // Stores that can be newified. - mayNVStorePos = 8, + // New-Value consumer instructions. + NewValuePos = 9, + NewValueMask = 0x1, + + // New-Value producer instructions. + hasNewValuePos = 10, + hasNewValueMask = 0x1, + + // Which operand consumes or produces a new value. + NewValueOpPos = 11, + NewValueOpMask = 0x7, + + // Which bits encode the new value. + NewValueBitsPos = 14, + NewValueBitsMask = 0x3, + + // Stores that can become new-value stores. + mayNVStorePos = 16, mayNVStoreMask = 0x1, - // Dot new value store instructions. - NVStorePos = 9, + // New-value store instructions. + NVStorePos = 17, NVStoreMask = 0x1, // Extendable insns. - ExtendablePos = 10, + ExtendablePos = 18, ExtendableMask = 0x1, // Insns must be extended. - ExtendedPos = 11, + ExtendedPos = 19, ExtendedMask = 0x1, // Which operand may be extended. - ExtendableOpPos = 12, + ExtendableOpPos = 20, ExtendableOpMask = 0x7, // Signed or unsigned range. - ExtentSignedPos = 15, + ExtentSignedPos = 23, ExtentSignedMask = 0x1, // Number of bits of range before extending operand. - ExtentBitsPos = 16, + ExtentBitsPos = 24, ExtentBitsMask = 0x1f, // Valid subtargets - validSubTargetPos = 21, + validSubTargetPos = 29, validSubTargetMask = 0xf, - // Addressing mode for load/store instructions - AddrModePos = 25, - AddrModeMask = 0xf + // Addressing mode for load/store instructions. + AddrModePos = 33, + AddrModeMask = 0x7, - }; + // Access size of memory access instructions (load/store). + MemAccessSizePos = 36, + MemAccesSizeMask = 0x7 + }; // *** The code above must match HexagonInstrFormat*.td *** // diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp new file mode 100644 index 0000000..9260b4a --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp @@ -0,0 +1,175 @@ +//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class extends MCInst to allow some Hexagon VLIW annotations. +// +//===----------------------------------------------------------------------===// + +#include "HexagonInstrInfo.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInst.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" + +using namespace llvm; + +// Return the slots used by the insn. +unsigned HexagonMCInst::getUnits(const HexagonTargetMachine* TM) const { + const HexagonInstrInfo* QII = TM->getInstrInfo(); + const InstrItineraryData* II = TM->getInstrItineraryData(); + const InstrStage* + IS = II->beginStage(QII->get(this->getOpcode()).getSchedClass()); + + return (IS->getUnits()); +} + +// Return the Hexagon ISA class for the insn. +unsigned HexagonMCInst::getType() const { + const uint64_t F = MCID->TSFlags; + + return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); +} + +// Return whether the insn is an actual insn. +bool HexagonMCInst::isCanon() const { + return (!MCID->isPseudo() && + !isPrefix() && + getType() != HexagonII::TypeENDLOOP); +} + +// Return whether the insn is a prefix. +bool HexagonMCInst::isPrefix() const { + return (getType() == HexagonII::TypePREFIX); +} + +// Return whether the insn is solo, i.e., cannot be in a packet. +bool HexagonMCInst::isSolo() const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); +} + +// Return whether the insn is a new-value consumer. +bool HexagonMCInst::isNewValue() const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + +// Return whether the instruction is a legal new-value producer. +bool HexagonMCInst::hasNewValue() const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); +} + +// Return the operand that consumes or produces a new value. +const MCOperand& HexagonMCInst::getNewValue() const { + const uint64_t F = MCID->TSFlags; + const unsigned O = (F >> HexagonII::NewValueOpPos) & + HexagonII::NewValueOpMask; + const MCOperand& MCO = getOperand(O); + + assert ((isNewValue() || hasNewValue()) && MCO.isReg()); + return (MCO); +} + +// Return whether the instruction needs to be constant extended. +// 1) Always return true if the instruction has 'isExtended' flag set. +// +// isExtendable: +// 2) For immediate extended operands, return true only if the value is +// out-of-range. +// 3) For global address, always return true. + +bool HexagonMCInst::isConstExtended(void) const { + if (isExtended()) + return true; + + if (!isExtendable()) + return false; + + short ExtOpNum = getCExtOpNum(); + int MinValue = getMinValue(); + int MaxValue = getMaxValue(); + const MCOperand& MO = getOperand(ExtOpNum); + + // We could be using an instruction with an extendable immediate and shoehorn + // a global address into it. If it is a global address it will be constant + // extended. We do this for COMBINE. + // We currently only handle isGlobal() because it is the only kind of + // object we are going to end up with here for now. + // In the future we probably should add isSymbol(), etc. + if (MO.isExpr()) + return true; + + // If the extendable operand is not 'Immediate' type, the instruction should + // have 'isExtended' flag set. + assert(MO.isImm() && "Extendable operand must be Immediate type"); + + int ImmValue = MO.getImm(); + return (ImmValue < MinValue || ImmValue > MaxValue); +} + +// Return whether the instruction must be always extended. +bool HexagonMCInst::isExtended(void) const { + const uint64_t F = MCID->TSFlags; + return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; +} + +// Return true if the instruction may be extended based on the operand value. +bool HexagonMCInst::isExtendable(void) const { + const uint64_t F = MCID->TSFlags; + return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; +} + +// Return number of bits in the constant extended operand. +unsigned HexagonMCInst::getBitCount(void) const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); +} + +// Return constant extended operand number. +unsigned short HexagonMCInst::getCExtOpNum(void) const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +// Return whether the operand can be constant extended. +bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const { + const uint64_t F = MCID->TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) + == OperandNum; +} + +// Return the min value that a constant extendable operand can have +// without being extended. +int HexagonMCInst::getMinValue(void) const { + const uint64_t F = MCID->TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return -1 << (bits - 1); + else + return 0; +} + +// Return the max value that a constant extendable operand can have +// without being extended. +int HexagonMCInst::getMaxValue(void) const { + const uint64_t F = MCID->TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return ~(-1 << (bits - 1)); + else + return ~(-1 << bits); +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h new file mode 100644 index 0000000..3ca71f0 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h @@ -0,0 +1,100 @@ +//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class extends MCInst to allow some VLIW annotations. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCINST_H +#define HEXAGONMCINST_H + +#include "HexagonTargetMachine.h" +#include "llvm/MC/MCInst.h" + +namespace llvm { + class MCOperand; + + class HexagonMCInst: public MCInst { + // MCID is set during instruction lowering. + // It is needed in order to access TSFlags for + // use in checking MC instruction properties. + const MCInstrDesc *MCID; + + // Packet start and end markers + unsigned packetStart: 1, packetEnd: 1; + + public: + explicit HexagonMCInst(): + MCInst(), MCID(0), packetStart(0), packetEnd(0) {}; + HexagonMCInst(const MCInstrDesc& mcid): + MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {}; + + bool isPacketStart() const { return (packetStart); }; + bool isPacketEnd() const { return (packetEnd); }; + void setPacketStart(bool Y) { packetStart = Y; }; + void setPacketEnd(bool Y) { packetEnd = Y; }; + void resetPacket() { setPacketStart(false); setPacketEnd(false); }; + + // Return the slots used by the insn. + unsigned getUnits(const HexagonTargetMachine* TM) const; + + // Return the Hexagon ISA class for the insn. + unsigned getType() const; + + void setDesc(const MCInstrDesc& mcid) { MCID = &mcid; }; + const MCInstrDesc& getDesc(void) const { return *MCID; }; + + // Return whether the insn is an actual insn. + bool isCanon() const; + + // Return whether the insn is a prefix. + bool isPrefix() const; + + // Return whether the insn is solo, i.e., cannot be in a packet. + bool isSolo() const; + + // Return whether the instruction needs to be constant extended. + bool isConstExtended() const; + + // Return constant extended operand number. + unsigned short getCExtOpNum(void) const; + + // Return whether the insn is a new-value consumer. + bool isNewValue() const; + + // Return whether the instruction is a legal new-value producer. + bool hasNewValue() const; + + // Return the operand that consumes or produces a new value. + const MCOperand& getNewValue() const; + + // Return number of bits in the constant extended operand. + unsigned getBitCount(void) const; + + private: + // Return whether the instruction must be always extended. + bool isExtended() const; + + // Return true if the insn may be extended based on the operand value. + bool isExtendable() const; + + // Return true if the operand can be constant extended. + bool isOperandExtended(const unsigned short OperandNum) const; + + // Return the min value that a constant extendable operand can have + // without being extended. + int getMinValue() const; + + // Return the max value that a constant extendable operand can have + // without being extended. + int getMaxValue() const; + }; +} + +#endif diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 737789b..6b1d2d1 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -13,11 +13,13 @@ #include "HexagonMCTargetDesc.h" #include "HexagonMCAsmInfo.h" +#include "InstPrinter/HexagonInstPrinter.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index f3a9c1c..c06e8bc 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore +subdirectories = AArch64 ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index 2ab163e..ad495ff 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -451,7 +451,7 @@ MBlazeOperand *MBlazeAsmParser::ParseImmediate() { case AsmToken::Minus: case AsmToken::Integer: case AsmToken::Identifier: - if (getParser().ParseExpression(EVal)) + if (getParser().parseExpression(EVal)) return 0; return MBlazeOperand::CreateImm(EVal, S, E); @@ -537,10 +537,10 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); + getParser().getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index b6edbba..172304b 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -426,6 +426,45 @@ void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF, } } +// Eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions +void MBlazeFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const MBlazeInstrInfo &TII = + *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo()); + if (!hasReservedCallFrame(MF)) { + // If we have a frame pointer, turn the adjcallstackup instruction into a + // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into + // 'addi r1, r1, <amt>' + MachineInstr *Old = I; + int Amount = Old->getOperand(0).getImm() + 4; + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + MachineInstr *New; + if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) { + New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1) + .addReg(MBlaze::R1).addImm(-Amount); + } else { + assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP); + New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1) + .addReg(MBlaze::R1).addImm(Amount); + } + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + + void MBlazeFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h index 01e6578..f4228c5 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.h +++ b/lib/Target/MBlaze/MBlazeFrameLowering.h @@ -39,6 +39,10 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool hasFP(const MachineFunction &MF) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 8a9f092..7664c60 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -81,6 +81,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FLOG, MVT::f32, Expand); @@ -1027,15 +1028,17 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); + + // If this function is using the interrupt_handler calling convention + // then use "rtid r14, 0" otherwise use "rtsd r15, 8" + unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet + : MBlazeISD::Ret; + unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14 + : MBlaze::R15; + RetOps.push_back(DAG.getRegister(Reg, MVT::i32)); + // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -1048,20 +1051,16 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // guarantee that all emitted copies are // stuck together, avoiding something bad Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - // If this function is using the interrupt_handler calling convention - // then use "rtid r14, 0" otherwise use "rtsd r15, 8" - unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet - : MBlazeISD::Ret; - unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14 - : MBlaze::R15; - SDValue DReg = DAG.getRegister(Reg, MVT::i32); + RetOps[0] = Chain; // Update chain. + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag); + RetOps.push_back(Flag); - return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg); + return DAG.getNode(Ret, dl, MVT::Other, &RetOps[0], RetOps.size()); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index 139bf71..f86bc0b 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -28,9 +28,9 @@ def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; //===----------------------------------------------------------------------===// def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def MBlazeIRet : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink, [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue, diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index ed06cc4..d0fd7dc 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -83,67 +83,21 @@ getReservedRegs(const MachineFunction &MF) const { return Reserved; } -// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions -void MBlazeRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - // If we have a frame pointer, turn the adjcallstackup instruction into a - // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into - // 'addi r1, r1, <amt>' - MachineInstr *Old = I; - int Amount = Old->getOperand(0).getImm() + 4; - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - MachineInstr *New; - if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) { - New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1) - .addReg(MBlaze::R1).addImm(-Amount); - } else { - assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP); - New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1) - .addReg(MBlaze::R1).addImm(Amount); - } - - // Replace the pseudo instruction with a new instruction... - MBB.insert(I, New); - } - } - - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); -} - // FrameIndex represent objects inside a abstract stack. // We must replace FrameIndex with an stack/frame pointer // direct reference. void MBlazeRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const { + unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); - - unsigned i = 0; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - - unsigned oi = i == 2 ? 1 : 2; + unsigned OFIOperandNum = FIOperandNum == 2 ? 1 : 2; DEBUG(dbgs() << "\nFunction : " << MF.getName() << "\n"; dbgs() << "<--------->\n" << MI); - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); int stackSize = MFI->getStackSize(); int spOffset = MFI->getObjectOffset(FrameIndex); @@ -159,12 +113,12 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, // as explained on LowerFormalArguments, detect negative offsets // and adjust SPOffsets considering the final stack size. int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset; - Offset += MI.getOperand(oi).getImm(); + Offset += MI.getOperand(OFIOperandNum).getImm(); DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n"); - MI.getOperand(oi).ChangeToImmediate(Offset); - MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); + MI.getOperand(OFIOperandNum).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false); } void MBlazeRegisterInfo:: diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 1d51162..99a2fac 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -50,13 +50,10 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - /// Stack Frame Processing Methods void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index 2e328cb..3c95760 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; void MSP430MCAsmInfo::anchor() { } MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) { - PointerSize = 2; + PointerSize = CalleeSaveStackSlotSize = 2; PrivateGlobalPrefix = ".L"; WeakRefDirective ="\t.weak\t"; diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index aef45d8..ae2e556 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -222,13 +222,73 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +void MSP430FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const MSP430InstrInfo &TII = + *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo()); + unsigned StackAlign = getStackAlignment(); + + if (!hasReservedCallFrame(MF)) { + // If the stack pointer can be changed after prologue, turn the + // adjcallstackup instruction into a 'sub SPW, <amt>' and the + // adjcallstackdown instruction into 'add SPW, <amt>' + // TODO: consider using push / pop instead of sub + store / add + MachineInstr *Old = I; + uint64_t Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; + + MachineInstr *New = 0; + if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { + New = BuildMI(MF, Old->getDebugLoc(), + TII.get(MSP430::SUB16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(Amount); + } else { + assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); + // factor out the amount the callee already popped. + uint64_t CalleeAmt = Old->getOperand(1).getImm(); + Amount -= CalleeAmt; + if (Amount) + New = BuildMI(MF, Old->getDebugLoc(), + TII.get(MSP430::ADD16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(Amount); + } + + if (New) { + // The SRW implicit def is dead. + New->getOperand(3).setIsDead(); + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. + if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { + MachineInstr *Old = I; + MachineInstr *New = + BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), + MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt); + // The SRW implicit def is dead. + New->getOperand(3).setIsDead(); + + MBB.insert(I, New); + } + } + + MBB.erase(I); +} + void MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - // Create a frame entry for the FPW register that must be saved. - if (TFI->hasFP(MF)) { + if (hasFP(MF)) { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); (void)FrameIdx; assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index cb02545..a077dd7 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -35,6 +35,10 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 5a156c1..09cdf32 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -423,15 +423,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_MSP430); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -444,16 +437,19 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are stuck together, // avoiding something bad. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } unsigned Opc = (CallConv == CallingConv::MSP430_INTR ? MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG); + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag); + RetOps.push_back(Flag); - // Return Void - return DAG.getNode(Opc, dl, MVT::Other, Chain); + return DAG.getNode(Opc, dl, MVT::Other, &RetOps[0], RetOps.size()); } /// LowerCCCCallTo - functions arguments are copied from virtual regs to diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index f003574..e45780d 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -40,9 +40,9 @@ def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, // MSP430 Specific Node Definitions. //===----------------------------------------------------------------------===// def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>; def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 8f7813a..0b3e9e2 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -101,83 +101,18 @@ MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) return &MSP430::GR16RegClass; } -void MSP430RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - // If the stack pointer can be changed after prologue, turn the - // adjcallstackup instruction into a 'sub SPW, <amt>' and the - // adjcallstackdown instruction into 'add SPW, <amt>' - // TODO: consider using push / pop instead of sub + store / add - MachineInstr *Old = I; - uint64_t Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; - - MachineInstr *New = 0; - if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { - New = BuildMI(MF, Old->getDebugLoc(), - TII.get(MSP430::SUB16ri), MSP430::SPW) - .addReg(MSP430::SPW).addImm(Amount); - } else { - assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); - // factor out the amount the callee already popped. - uint64_t CalleeAmt = Old->getOperand(1).getImm(); - Amount -= CalleeAmt; - if (Amount) - New = BuildMI(MF, Old->getDebugLoc(), - TII.get(MSP430::ADD16ri), MSP430::SPW) - .addReg(MSP430::SPW).addImm(Amount); - } - - if (New) { - // The SRW implicit def is dead. - New->getOperand(3).setIsDead(); - - // Replace the pseudo instruction with a new instruction... - MBB.insert(I, New); - } - } - } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { - // If we are performing frame pointer elimination and if the callee pops - // something off the stack pointer, add it back. - if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { - MachineInstr *Old = I; - MachineInstr *New = - BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), - MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt); - // The SRW implicit def is dead. - New->getOperand(3).setIsDead(); - - MBB.insert(I, New); - } - } - - MBB.erase(I); -} - void MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); - unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); @@ -191,7 +126,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset += 2; // Skip the saved FPW // Fold imm into offset - Offset += MI.getOperand(i+1).getImm(); + Offset += MI.getOperand(FIOperandNum + 1).getImm(); if (MI.getOpcode() == MSP430::ADD16ri) { // This is actually "load effective address" of the stack slot @@ -199,7 +134,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // expand it into mov + add MI.setDesc(TII.get(MSP430::MOV16rr)); - MI.getOperand(i).ChangeToRegister(BasePtr, false); + MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); if (Offset == 0) return; @@ -216,8 +151,8 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; } - MI.getOperand(i).ChangeToRegister(BasePtr, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 64a43bc..69cccb2 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -42,12 +42,9 @@ public: const TargetRegisterClass* getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 085503eb..ade6084 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -84,15 +84,30 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseDirective(AsmToken DirectiveID); MipsAsmParser::OperandMatchResultTy - parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); + parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + MipsAsmParser::OperandMatchResultTy + parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + MipsAsmParser::OperandMatchResultTy + parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + MipsAsmParser::OperandMatchResultTy + parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + MipsAsmParser::OperandMatchResultTy + parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + MipsAsmParser::OperandMatchResultTy + parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); - int tryParseRegister(StringRef Mnemonic); + int tryParseRegister(bool is64BitReg); bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic); + bool is64BitReg); bool needsExpansion(MCInst &Inst); @@ -107,7 +122,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool reportParseError(StringRef ErrorMsg); bool parseMemOffset(const MCExpr *&Res); - bool parseRelocOperand(const MCExpr *&Res, SMLoc &E); + bool parseRelocOperand(const MCExpr *&Res); bool parseDirectiveSet(); @@ -118,6 +133,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetReorderDirective(); bool parseSetNoReorderDirective(); + bool parseDirectiveWord(unsigned Size, SMLoc L); + MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); bool isMips64() const { @@ -128,9 +145,11 @@ class MipsAsmParser : public MCTargetAsmParser { return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0; } - int matchRegisterName(StringRef Symbol); + int matchRegisterName(StringRef Symbol, bool is64BitReg); - int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic); + int matchCPURegisterName(StringRef Symbol); + + int matchRegisterByNumber(unsigned RegNum, unsigned RegClass); void setFpFormat(FpFormatTy Format) { FpFormat = Format; @@ -146,7 +165,7 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned getReg(int RC,int RegNo); - unsigned getATReg(); + int getATReg(); public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) : MCTargetAsmParser(), STI(sti), Parser(parser) { @@ -166,6 +185,20 @@ namespace { /// instruction. class MipsOperand : public MCParsedAsmOperand { +public: + enum RegisterKind { + Kind_None, + Kind_CPURegs, + Kind_CPU64Regs, + Kind_HWRegs, + Kind_HW64Regs, + Kind_FGR32Regs, + Kind_FGR64Regs, + Kind_AFGR64Regs, + Kind_CCRRegs + }; + +private: enum KindTy { k_CondCode, k_CoprocNum, @@ -186,6 +219,7 @@ class MipsOperand : public MCParsedAsmOperand { struct { unsigned RegNum; + RegisterKind Kind; } Reg; struct { @@ -246,6 +280,11 @@ public: return Reg.RegNum; } + void setRegKind(RegisterKind RegKind) { + assert((Kind == k_Register) && "Invalid access!"); + Reg.Kind = RegKind; + } + const MCExpr *getImm() const { assert((Kind == k_Immediate) && "Invalid access!"); return Imm.Val; @@ -296,6 +335,45 @@ public: return Op; } + bool isCPURegsAsm() const { + return Kind == k_Register && Reg.Kind == Kind_CPURegs; + } + void addCPURegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isCPU64RegsAsm() const { + return Kind == k_Register && Reg.Kind == Kind_CPU64Regs; + } + void addCPU64RegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isHWRegsAsm() const { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.Kind == Kind_HWRegs; + } + void addHWRegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isHW64RegsAsm() const { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.Kind == Kind_HW64Regs; + } + void addHW64RegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + void addCCRAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isCCRAsm() const { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.Kind == Kind_CCRRegs; + } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. @@ -344,31 +422,31 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, if ( 0 <= ImmValue && ImmValue <= 65535) { // for 0 <= j <= 65535. // li d,j => ori d,$zero,j - tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand( - MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO)); + MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else if ( ImmValue < 0 && ImmValue >= -32768) { // for -32768 <= j < 0. // li d,j => addiu d,$zero,j - tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files? + tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand( - MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO)); + MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else { // for any other value of j that is representable as a 32-bit integer. // li d,j => lui d,hi16(j) // ori d,d,lo16(j) - tmpInst.setOpcode(isMips64() ? Mips::LUi64 : Mips::LUi); + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); tmpInst.clear(); - tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); @@ -390,7 +468,7 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, if ( -32768 <= ImmValue && ImmValue <= 65535) { //for -32768 <= j <= 65535. //la d,j(s) => addiu d,s,j - tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files? + tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); @@ -400,12 +478,12 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, //la d,j(s) => lui d,hi16(j) // ori d,d,lo16(j) // addu d,d,s - tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi); + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); tmpInst.clear(); - tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); @@ -433,19 +511,19 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand( - MCOperand::CreateReg(isMips64()?Mips::ZERO_64:Mips::ZERO)); + MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else { //for any other value of j that is representable as a 32-bit integer. //la d,j => lui d,hi16(j) // ori d,d,lo16(j) - tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi); + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); tmpInst.clear(); - tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); @@ -498,84 +576,72 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } -int MipsAsmParser::matchRegisterName(StringRef Name) { - +int MipsAsmParser::matchCPURegisterName(StringRef Name) { int CC; - if (!isMips64()) + + if (Name == "at") + return getATReg(); + CC = StringSwitch<unsigned>(Name) - .Case("zero", Mips::ZERO) - .Case("a0", Mips::A0) - .Case("a1", Mips::A1) - .Case("a2", Mips::A2) - .Case("a3", Mips::A3) - .Case("v0", Mips::V0) - .Case("v1", Mips::V1) - .Case("s0", Mips::S0) - .Case("s1", Mips::S1) - .Case("s2", Mips::S2) - .Case("s3", Mips::S3) - .Case("s4", Mips::S4) - .Case("s5", Mips::S5) - .Case("s6", Mips::S6) - .Case("s7", Mips::S7) - .Case("k0", Mips::K0) - .Case("k1", Mips::K1) - .Case("sp", Mips::SP) - .Case("fp", Mips::FP) - .Case("gp", Mips::GP) - .Case("ra", Mips::RA) - .Case("t0", Mips::T0) - .Case("t1", Mips::T1) - .Case("t2", Mips::T2) - .Case("t3", Mips::T3) - .Case("t4", Mips::T4) - .Case("t5", Mips::T5) - .Case("t6", Mips::T6) - .Case("t7", Mips::T7) - .Case("t8", Mips::T8) - .Case("t9", Mips::T9) - .Case("at", Mips::AT) - .Case("fcc0", Mips::FCC0) - .Default(-1); - else + .Case("zero", 0) + .Case("a0", 4) + .Case("a1", 5) + .Case("a2", 6) + .Case("a3", 7) + .Case("v0", 2) + .Case("v1", 3) + .Case("s0", 16) + .Case("s1", 17) + .Case("s2", 18) + .Case("s3", 19) + .Case("s4", 20) + .Case("s5", 21) + .Case("s6", 22) + .Case("s7", 23) + .Case("k0", 26) + .Case("k1", 27) + .Case("sp", 29) + .Case("fp", 30) + .Case("gp", 28) + .Case("ra", 31) + .Case("t0", 8) + .Case("t1", 9) + .Case("t2", 10) + .Case("t3", 11) + .Case("t4", 12) + .Case("t5", 13) + .Case("t6", 14) + .Case("t7", 15) + .Case("t8", 24) + .Case("t9", 25) + .Default(-1); + + // Although SGI documentation just cut out t0-t3 for n32/n64, + // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7 + // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7. + if (isMips64() && 8 <= CC && CC <= 11) + CC += 4; + + if (CC == -1 && isMips64()) CC = StringSwitch<unsigned>(Name) - .Case("zero", Mips::ZERO_64) - .Case("at", Mips::AT_64) - .Case("v0", Mips::V0_64) - .Case("v1", Mips::V1_64) - .Case("a0", Mips::A0_64) - .Case("a1", Mips::A1_64) - .Case("a2", Mips::A2_64) - .Case("a3", Mips::A3_64) - .Case("a4", Mips::T0_64) - .Case("a5", Mips::T1_64) - .Case("a6", Mips::T2_64) - .Case("a7", Mips::T3_64) - .Case("t4", Mips::T4_64) - .Case("t5", Mips::T5_64) - .Case("t6", Mips::T6_64) - .Case("t7", Mips::T7_64) - .Case("s0", Mips::S0_64) - .Case("s1", Mips::S1_64) - .Case("s2", Mips::S2_64) - .Case("s3", Mips::S3_64) - .Case("s4", Mips::S4_64) - .Case("s5", Mips::S5_64) - .Case("s6", Mips::S6_64) - .Case("s7", Mips::S7_64) - .Case("t8", Mips::T8_64) - .Case("t9", Mips::T9_64) - .Case("kt0", Mips::K0_64) - .Case("kt1", Mips::K1_64) - .Case("gp", Mips::GP_64) - .Case("sp", Mips::SP_64) - .Case("fp", Mips::FP_64) - .Case("s8", Mips::FP_64) - .Case("ra", Mips::RA_64) + .Case("a4", 8) + .Case("a5", 9) + .Case("a6", 10) + .Case("a7", 11) + .Case("kt0", 26) + .Case("kt1", 27) + .Case("s8", 30) .Default(-1); + return CC; +} +int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { + + int CC; + CC = matchCPURegisterName(Name); if (CC != -1) - return CC; + return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID: + Mips::CPURegsRegClassID); if (Name[0] == 'f') { StringRef NumString = Name.substr(1); @@ -639,75 +705,49 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) { return true; } -unsigned MipsAsmParser::getATReg() { - unsigned Reg = Options.getATRegNum(); - if (isMips64()) - return getReg(Mips::CPU64RegsRegClassID,Reg); - - return getReg(Mips::CPURegsRegClassID,Reg); +int MipsAsmParser::getATReg() { + return Options.getATRegNum(); } unsigned MipsAsmParser::getReg(int RC,int RegNo) { return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo); } -int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic) { - - if (Mnemonic.lower() == "rdhwr") { - // at the moment only hwreg29 is supported - if (RegNum != 29) - return -1; - return Mips::HWR29; - } +int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) { if (RegNum > 31) return -1; - // MIPS64 registers are numbered 1 after the 32-bit equivalents - return getReg(Mips::CPURegsRegClassID, RegNum) + isMips64(); + return getReg(RegClass, RegNum); } -int MipsAsmParser::tryParseRegister(StringRef Mnemonic) { +int MipsAsmParser::tryParseRegister(bool is64BitReg) { const AsmToken &Tok = Parser.getTok(); int RegNum = -1; if (Tok.is(AsmToken::Identifier)) { std::string lowerCase = Tok.getString().lower(); - RegNum = matchRegisterName(lowerCase); + RegNum = matchRegisterName(lowerCase, is64BitReg); } else if (Tok.is(AsmToken::Integer)) RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()), - Mnemonic.lower()); - else - return RegNum; //error - // 64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64 - if (isMips64() && RegNum == Mips::ZERO_64) { - if (Mnemonic.find("ddiv") != StringRef::npos) - RegNum = Mips::ZERO; - } + is64BitReg ? Mips::CPU64RegsRegClassID + : Mips::CPURegsRegClassID); return RegNum; } bool MipsAsmParser:: tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic){ + bool is64BitReg){ SMLoc S = Parser.getTok().getLoc(); - SMLoc E = Parser.getTok().getEndLoc(); int RegNo = -1; - // FIXME: we should make a more generic method for CCR - if ((Mnemonic == "cfc1" || Mnemonic == "ctc1") - && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){ - RegNo = Parser.getTok().getIntVal(); // get the int value - // at the moment only fcc0 is supported - if (RegNo == 0) - RegNo = Mips::FCC0; - } else - RegNo = tryParseRegister(Mnemonic); + RegNo = tryParseRegister(is64BitReg); if (RegNo == -1) return true; - Operands.push_back(MipsOperand::CreateReg(RegNo, S, E)); + Operands.push_back(MipsOperand::CreateReg(RegNo, S, + Parser.getTok().getLoc())); Parser.Lex(); // Eat register token. return false; } @@ -734,7 +774,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, SMLoc S = Parser.getTok().getLoc(); Parser.Lex(); // Eat dollar token. // parse register operand - if (!tryParseRegisterOperand(Operands, Mnemonic)) { + if (!tryParseRegisterOperand(Operands, isMips64())) { if (getLexer().is(AsmToken::LParen)) { // check if it is indexed addressing operand Operands.push_back(MipsOperand::CreateToken("(", S)); @@ -743,7 +783,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, return true; Parser.Lex(); // eat dollar - if (tryParseRegisterOperand(Operands, Mnemonic)) + if (tryParseRegisterOperand(Operands, isMips64())) return true; if (!getLexer().is(AsmToken::RParen)) @@ -757,10 +797,10 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, } // maybe it is a symbol reference StringRef Identifier; - if (Parser.ParseIdentifier(Identifier)) + if (Parser.parseIdentifier(Identifier)) return true; - SMLoc E = SMLoc::getFromPointer(Identifier.end()); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier); @@ -780,9 +820,9 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, // quoted label names const MCExpr *IdVal; SMLoc S = Parser.getTok().getLoc(); - SMLoc E; - if (getParser().ParseExpression(IdVal, E)) + if (getParser().parseExpression(IdVal)) return true; + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); return false; } @@ -790,10 +830,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, // it is a symbol reference or constant expression const MCExpr *IdVal; SMLoc S = Parser.getTok().getLoc(); // start location of the operand - SMLoc E; - if (parseRelocOperand(IdVal, E)) + if (parseRelocOperand(IdVal)) return true; + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); return false; } // case AsmToken::Percent @@ -801,7 +842,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, return true; } -bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) { +bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { Parser.Lex(); // eat % token const AsmToken &Tok = Parser.getTok(); // get next token, operation @@ -813,6 +854,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) { Parser.Lex(); // eat identifier // now make expression from the rest of the operand const MCExpr *IdVal; + SMLoc EndLoc; if (getLexer().getKind() == AsmToken::LParen) { while (1) { @@ -830,13 +872,11 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) { } else break; } - if (getParser().ParseParenExpression(IdVal,EndLoc)) + if (getParser().parseParenExpression(IdVal,EndLoc)) return true; - while (getLexer().getKind() == AsmToken::RParen) { - EndLoc = Parser.getTok().getEndLoc(); + while (getLexer().getKind() == AsmToken::RParen) Parser.Lex(); // eat ')' token - } } else return true; // parenthesis must follow reloc operand @@ -848,7 +888,12 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) { if (Str == "lo") { Val = Val & 0xffff; } else if (Str == "hi") { + int LoSign = Val & 0x8000; Val = (Val & 0xffff0000) >> 16; + //lower part is treated as signed int, so if it is negative + //we must add 1 to hi part to compensate + if (LoSign) + Val++; } Res = MCConstantExpr::Create(Val, getContext()); return false; @@ -868,23 +913,24 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { StartLoc = Parser.getTok().getLoc(); - EndLoc = Parser.getTok().getEndLoc(); - RegNo = tryParseRegister(""); + RegNo = tryParseRegister(isMips64()); + EndLoc = Parser.getTok().getLoc(); return (RegNo == (unsigned)-1); } bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { + + SMLoc S; + switch(getLexer().getKind()) { default: return true; case AsmToken::Integer: case AsmToken::Minus: case AsmToken::Plus: - return getParser().ParseExpression(Res); - case AsmToken::Percent: { - SMLoc E; - return parseRelocOperand(Res, E); - } + return (getParser().parseExpression(Res)); + case AsmToken::Percent: + return parseRelocOperand(Res); case AsmToken::LParen: return false; // it's probably assuming 0 } @@ -895,8 +941,9 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( SmallVectorImpl<MCParsedAsmOperand*>&Operands) { const MCExpr *IdVal = 0; - SMLoc S = Parser.getTok().getLoc(); - SMLoc E = Parser.getTok().getEndLoc(); + SMLoc S; + // first operand is the offset + S = Parser.getTok().getLoc(); if (parseMemOffset(IdVal)) return MatchOperand_ParseFail; @@ -905,6 +952,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( if (Tok.isNot(AsmToken::LParen)) { MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]); if (Mnemonic->getToken() == "la") { + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1); Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); return MatchOperand_Success; } @@ -917,7 +965,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( const AsmToken &Tok1 = Parser.getTok(); // get next token if (Tok1.is(AsmToken::Dollar)) { Parser.Lex(); // Eat '$' token. - if (tryParseRegisterOperand(Operands,"")) { + if (tryParseRegisterOperand(Operands, isMips64())) { Error(Parser.getTok().getLoc(), "unexpected token in operand"); return MatchOperand_ParseFail; } @@ -933,7 +981,8 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( return MatchOperand_ParseFail; } - E = Parser.getTok().getEndLoc(); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Parser.Lex(); // Eat ')' token. if (IdVal == 0) @@ -950,6 +999,132 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( return MatchOperand_Success; } +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + + if (!isMips64()) + return MatchOperand_NoMatch; + // if the first token is not '$' we have an error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat $ + if(!tryParseRegisterOperand(Operands, true)) { + // set the proper register kind + MipsOperand* op = static_cast<MipsOperand*>(Operands.back()); + op->setRegKind(MipsOperand::Kind_CPU64Regs); + return MatchOperand_Success; + } + return MatchOperand_NoMatch; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + + // if the first token is not '$' we have an error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat $ + if(!tryParseRegisterOperand(Operands, false)) { + // set the propper register kind + MipsOperand* op = static_cast<MipsOperand*>(Operands.back()); + op->setRegKind(MipsOperand::Kind_CPURegs); + return MatchOperand_Success; + } + return MatchOperand_NoMatch; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + + if (isMips64()) + return MatchOperand_NoMatch; + + // if the first token is not '$' we have error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat $ + + const AsmToken &Tok = Parser.getTok(); // get next token + if (Tok.isNot(AsmToken::Integer)) + return MatchOperand_NoMatch; + + unsigned RegNum = Tok.getIntVal(); + // at the moment only hwreg29 is supported + if (RegNum != 29) + return MatchOperand_ParseFail; + + MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S, + Parser.getTok().getLoc()); + op->setRegKind(MipsOperand::Kind_HWRegs); + Operands.push_back(op); + + Parser.Lex(); // Eat reg number + return MatchOperand_Success; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + + if (!isMips64()) + return MatchOperand_NoMatch; + //if the first token is not '$' we have error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat $ + + const AsmToken &Tok = Parser.getTok(); // get next token + if (Tok.isNot(AsmToken::Integer)) + return MatchOperand_NoMatch; + + unsigned RegNum = Tok.getIntVal(); + // at the moment only hwreg29 is supported + if (RegNum != 29) + return MatchOperand_ParseFail; + + MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S, + Parser.getTok().getLoc()); + op->setRegKind(MipsOperand::Kind_HW64Regs); + Operands.push_back(op); + + Parser.Lex(); // Eat reg number + return MatchOperand_Success; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + unsigned RegNum; + //if the first token is not '$' we have error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat $ + + const AsmToken &Tok = Parser.getTok(); // get next token + if (Tok.is(AsmToken::Integer)) { + RegNum = Tok.getIntVal(); + // at the moment only fcc0 is supported + if (RegNum != 0) + return MatchOperand_ParseFail; + } else if (Tok.is(AsmToken::Identifier)) { + // at the moment only fcc0 is supported + if (Tok.getIdentifier() != "fcc0") + return MatchOperand_ParseFail; + } else + return MatchOperand_NoMatch; + + MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S, + Parser.getTok().getLoc()); + op->setRegKind(MipsOperand::Kind_CCRRegs); + Operands.push_back(op); + + Parser.Lex(); // Eat reg number + return MatchOperand_Success; +} + MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { MCSymbolRefExpr::VariantKind VK @@ -1019,13 +1194,13 @@ parseMathOperation(StringRef Name, SMLoc NameLoc, // Read the first operand. if (ParseOperand(Operands, Name)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } if (getLexer().isNot(AsmToken::Comma)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -1037,14 +1212,14 @@ parseMathOperation(StringRef Name, SMLoc NameLoc, // Parse and remember the operand. if (ParseOperand(Operands, Name)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } } if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -1055,16 +1230,18 @@ parseMathOperation(StringRef Name, SMLoc NameLoc, bool MipsAsmParser:: ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + StringRef Mnemonic; // floating point instructions: should register be treated as double? if (requestsDoubleOperand(Name)) { setFpFormat(FP_FORMAT_D); Operands.push_back(MipsOperand::CreateToken(Name, NameLoc)); + Mnemonic = Name; } else { setDefaultFpFormat(); // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); - StringRef Mnemonic = Name.slice(Start, Next); + Mnemonic = Name.slice(Start, Next); Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc)); @@ -1083,8 +1260,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (Cc == -1) { return Error(NameLoc, "Invalid conditional code"); } - // FIXME: May include trailing whitespace... - SMLoc E = Parser.getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer( + Parser.getTok().getLoc().getPointer() -1 ); Operands.push_back(MipsOperand::CreateImm( MCConstantExpr::Create(Cc, getContext()), NameLoc, E)); } else { @@ -1104,9 +1281,9 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, // Read the remaining operands. if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. - if (ParseOperand(Operands, Name)) { + if (ParseOperand(Operands, Mnemonic)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -1116,7 +1293,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, // Parse and remember the operand. if (ParseOperand(Operands, Name)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } } @@ -1124,7 +1301,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -1134,7 +1311,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, bool MipsAsmParser::reportParseError(StringRef ErrorMsg) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, ErrorMsg); } @@ -1157,6 +1334,7 @@ bool MipsAsmParser::parseSetAtDirective() { // line can be // .set at - defaults to $1 // or .set at=$reg + int AtRegNo; getParser().Lex(); if (getLexer().is(AsmToken::EndOfStatement)) { Options.setATReg(1); @@ -1169,12 +1347,22 @@ bool MipsAsmParser::parseSetAtDirective() { return false; } Parser.Lex(); // eat '$' - if (getLexer().isNot(AsmToken::Integer)) { + const AsmToken &Reg = Parser.getTok(); + if (Reg.is(AsmToken::Identifier)) { + AtRegNo = matchCPURegisterName(Reg.getIdentifier()); + } else if (Reg.is(AsmToken::Integer)) { + AtRegNo = Reg.getIntVal(); + } else { reportParseError("unexpected token in statement"); return false; } - const AsmToken &Reg = Parser.getTok(); - if (!Options.setATReg(Reg.getIntVal())) { + + if ( AtRegNo < 1 || AtRegNo > 31) { + reportParseError("unexpected token in statement"); + return false; + } + + if (!Options.setATReg(AtRegNo)) { reportParseError("unexpected token in statement"); return false; } @@ -1262,55 +1450,88 @@ bool MipsAsmParser::parseDirectiveSet() { return parseSetNoMacroDirective(); } else if (Tok.getString() == "nomips16") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } else if (Tok.getString() == "nomicromips") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } + return true; } +/// parseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().parseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { - if (DirectiveID.getString() == ".ent") { + StringRef IDVal = DirectiveID.getString(); + + if ( IDVal == ".ent") { // ignore this directive for now Parser.Lex(); return false; } - if (DirectiveID.getString() == ".end") { + if (IDVal == ".end") { // ignore this directive for now Parser.Lex(); return false; } - if (DirectiveID.getString() == ".frame") { + if (IDVal == ".frame") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } - if (DirectiveID.getString() == ".set") { + if (IDVal == ".set") { return parseDirectiveSet(); } - if (DirectiveID.getString() == ".fmask") { + if (IDVal == ".fmask") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } - if (DirectiveID.getString() == ".mask") { + if (IDVal == ".mask") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return false; } - if (DirectiveID.getString() == ".gpword") { + if (IDVal == ".gpword") { // ignore this directive for now - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); + return false; + } + + if (IDVal == ".word") { + parseDirectiveWord(4, DirectiveID.getLoc()); return false; } diff --git a/lib/Target/Mips/Disassembler/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt index 048ad0d..7101c06 100644 --- a/lib/Target/Mips/Disassembler/LLVMBuild.txt +++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt --------------*- Conf -*--===; +;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt -------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; diff --git a/lib/Target/Mips/Disassembler/Makefile b/lib/Target/Mips/Disassembler/Makefile index a78feba..7900373 100644 --- a/lib/Target/Mips/Disassembler/Makefile +++ b/lib/Target/Mips/Disassembler/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/Mips/Disassembler/Makefile ----------------*- Makefile -*-===## +##===- lib/Target/Mips/Disassembler/Makefile ---------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 9560f3f..025a783 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -93,6 +93,11 @@ static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -322,6 +327,15 @@ static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo); } +static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + + return MCDisassembler::Fail; + +} + static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 68d3ac5..fc23cd3 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define PRINT_ALIAS_INSTR #include "MipsGenAsmWriter.inc" const char* Mips::MipsFCCToString(Mips::CondCode CC) { @@ -78,7 +79,9 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, O << "\t.set\tmips32r2\n"; } - printInstruction(MI, O); + // Try to print any aliases first. + if (!printAliasInstr(MI, O)) + printInstruction(MI, O); printAnnotation(O, Annot); switch (MI->getOpcode()) { @@ -149,6 +152,11 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { OS << ')'; } +void MipsInstPrinter::printCPURegs(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printRegName(O, MI->getOperand(OpNo).getReg()); +} + void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 3d8a6f9..d1b561f 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -87,6 +87,9 @@ public: virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printCPURegs(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + bool printAliasInstr(const MCInst *MI, raw_ostream &OS); private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt index be5d7e4..4212c94 100644 --- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt @@ -5,6 +5,8 @@ add_llvm_library(LLVMMipsDesc MipsMCCodeEmitter.cpp MipsMCTargetDesc.cpp MipsELFObjectWriter.cpp + MipsReginfo.cpp + MipsELFStreamer.cpp ) add_dependencies(LLVMMipsDesc MipsCommonTableGen) diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index f82e203..6471b51 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -42,7 +42,6 @@ namespace { virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const; - virtual unsigned getEFlags() const; virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, const MCValue &Target, const MCFragment &F, @@ -61,19 +60,6 @@ MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, MipsELFObjectWriter::~MipsELFObjectWriter() {} -// FIXME: get the real EABI Version from the Subtarget class. -unsigned MipsELFObjectWriter::getEFlags() const { - - // FIXME: We can't tell if we are PIC (dynamic) or CPIC (static) - unsigned Flag = ELF::EF_MIPS_NOREORDER; - - if (is64Bit()) - Flag |= ELF::EF_MIPS_ARCH_64R2; - else - Flag |= ELF::EF_MIPS_ARCH_32R2; - return Flag; -} - const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, const MCValue &Target, const MCFragment &F, @@ -108,7 +94,13 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, Type = ELF::R_MIPS_64; break; case FK_GPRel_4: - Type = ELF::R_MIPS_GPREL32; + if (isN64()) { + Type = setRType((unsigned)ELF::R_MIPS_GPREL32, Type); + Type = setRType2((unsigned)ELF::R_MIPS_64, Type); + Type = setRType3((unsigned)ELF::R_MIPS_NONE, Type); + } + else + Type = ELF::R_MIPS_GPREL32; break; case Mips::fixup_Mips_GPREL16: Type = ELF::R_MIPS_GPREL16; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp new file mode 100644 index 0000000..c33bc9a --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -0,0 +1,89 @@ +//===-- MipsELFStreamer.cpp - MipsELFStreamer ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// +#include "MCTargetDesc/MipsELFStreamer.h" +#include "MipsSubtarget.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + + MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack) { + MipsELFStreamer *S = new MipsELFStreamer(Context, TAB, OS, Emitter, + RelaxAll, NoExecStack); + return S; + } + + // For llc. Set a group of ELF header flags + void + MipsELFStreamer::emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget) { + + if (hasRawTextSupport()) + return; + + // Update e_header flags + MCAssembler& MCA = getAssembler(); + unsigned EFlags = MCA.getELFHeaderEFlags(); + + if (Subtarget.inMips16Mode()) + EFlags |= ELF::EF_MIPS_ARCH_ASE_M16; + else + EFlags |= ELF::EF_MIPS_NOREORDER; + + // Architecture + if (Subtarget.hasMips64r2()) + EFlags |= ELF::EF_MIPS_ARCH_64R2; + else if (Subtarget.hasMips64()) + EFlags |= ELF::EF_MIPS_ARCH_64; + else if (Subtarget.hasMips32r2()) + EFlags |= ELF::EF_MIPS_ARCH_32R2; + else + EFlags |= ELF::EF_MIPS_ARCH_32; + + if (Subtarget.inMicroMipsMode()) + EFlags |= ELF::EF_MIPS_MICROMIPS; + + // ABI + if (Subtarget.isABI_O32()) + EFlags |= ELF::EF_MIPS_ABI_O32; + + // Relocation Model + Reloc::Model RM = Subtarget.getRelocationModel(); + if (RM == Reloc::PIC_ || RM == Reloc::Default) + EFlags |= ELF::EF_MIPS_PIC; + else if (RM == Reloc::Static) + ; // Do nothing for Reloc::Static + else + llvm_unreachable("Unsupported relocation model for e_flags"); + + MCA.setELFHeaderEFlags(EFlags); + } + + // For llc. Set a symbol's STO flags + void + MipsELFStreamer::emitMipsSTOCG(const MipsSubtarget &Subtarget, + MCSymbol *Sym, + unsigned Val) { + + if (hasRawTextSupport()) + return; + + MCSymbolData &Data = getOrCreateSymbolData(Sym); + // The "other" values are stored in the last 6 bits of the second byte + // The traditional defines for STO values assume the full byte and thus + // the shift to pack it. + MCELF::setOther(Data, Val >> 2); + } + +} // namespace llvm diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h new file mode 100644 index 0000000..b10ccc7 --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -0,0 +1,43 @@ +//=== MipsELFStreamer.h - MipsELFStreamer ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENCE.TXT for details. +// +//===-------------------------------------------------------------------===// +#ifndef MIPSELFSTREAMER_H_ +#define MIPSELFSTREAMER_H_ + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { +class MipsAsmPrinter; +class MipsSubtarget; +class MCSymbol; + +class MipsELFStreamer : public MCELFStreamer { +public: + MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack) + : MCELFStreamer(SK_MipsELFStreamer, Context, TAB, OS, Emitter) { + } + + ~MipsELFStreamer() {} + void emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget); + void emitMipsSTOCG(const MipsSubtarget &Subtarget, + MCSymbol *Sym, + unsigned Val); + + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_MipsELFStreamer; + } +}; + + MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack); +} + +#endif /* MIPSELFSTREAMER_H_ */ diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index a679749..5d4b32d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -25,8 +25,9 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { IsLittleEndian = false; if ((TheTriple.getArch() == Triple::mips64el) || - (TheTriple.getArch() == Triple::mips64)) - PointerSize = 8; + (TheTriple.getArch() == Triple::mips64)) { + PointerSize = CalleeSaveStackSlotSize = 8; + } AlignmentIsInBytes = false; Data16bitsDirective = "\t.2byte\t"; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 4b68b7e..96f93a0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -38,7 +38,8 @@ class MipsMCCodeEmitter : public MCCodeEmitter { bool IsLittleEndian; public: - MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle) : + MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, + const MCSubtargetInfo &sti, bool IsLittle) : MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {} ~MipsMCCodeEmitter() {} @@ -95,7 +96,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new MipsMCCodeEmitter(MCII, Ctx, false); + return new MipsMCCodeEmitter(MCII, Ctx, STI, false); } MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, @@ -103,7 +104,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new MipsMCCodeEmitter(MCII, Ctx, true); + return new MipsMCCodeEmitter(MCII, Ctx, STI, true); } /// EncodeInstruction - Emit the instruction. @@ -141,12 +142,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, llvm_unreachable("unimplemented opcode in EncodeInstruction()"); const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode()); - uint64_t TSFlags = Desc.TSFlags; - - // Pseudo instructions don't get encoded and shouldn't be here - // in the first place! - if ((TSFlags & MipsII::FormMask) == MipsII::Pseudo) - llvm_unreachable("Pseudo opcode found in EncodeInstruction()"); // Get byte count of instruction unsigned Size = Desc.getSize(); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 9360971..be83b54 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/MipsELFStreamer.h" #include "MipsMCTargetDesc.h" #include "InstPrinter/MipsInstPrinter.h" #include "MipsMCAsmInfo.h" @@ -131,7 +132,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, bool NoExecStack) { Triple TheTriple(TT); - return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); + return createMipsELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); } extern "C" void LLVMInitializeMipsTargetMC() { diff --git a/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp new file mode 100644 index 0000000..1dc9bcb --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp @@ -0,0 +1,80 @@ +//===-- MipsReginfo.cpp - Registerinfo handling --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// .reginfo +// Elf32_Word ri_gprmask +// Elf32_Word ri_cprmask[4] +// Elf32_Word ri_gp_value +// +// .MIPS.options - N64 +// Elf64_Byte kind (ODK_REGINFO) +// Elf64_Byte size (40 bytes) +// Elf64_Section section (0) +// Elf64_Word info (unused) +// Elf64_Word ri_gprmask () +// Elf64_Word ri_pad () +// Elf64_Word[4] ri_cprmask () +// Elf64_Addr ri_gp_value () +// +// .MIPS.options - N32 +// Elf32_Byte kind (ODK_REGINFO) +// Elf32_Byte size (36 bytes) +// Elf32_Section section (0) +// Elf32_Word info (unused) +// Elf32_Word ri_gprmask () +// Elf32_Word ri_pad () +// Elf32_Word[4] ri_cprmask () +// Elf32_Addr ri_gp_value () +// +//===----------------------------------------------------------------------===// +#include "MCTargetDesc/MipsReginfo.h" +#include "MipsSubtarget.h" +#include "MipsTargetObjectFile.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +// Integrated assembler version +void +MipsReginfo::emitMipsReginfoSectionCG(MCStreamer &OS, + const TargetLoweringObjectFile &TLOF, + const MipsSubtarget &MST) const +{ + + if (OS.hasRawTextSupport()) + return; + + const MipsTargetObjectFile &TLOFELF = + static_cast<const MipsTargetObjectFile &>(TLOF); + OS.SwitchSection(TLOFELF.getReginfoSection()); + + // .reginfo + if (MST.isABI_O32()) { + OS.EmitIntValue(0, 4); // ri_gprmask + OS.EmitIntValue(0, 4); // ri_cpr[0]mask + OS.EmitIntValue(0, 4); // ri_cpr[1]mask + OS.EmitIntValue(0, 4); // ri_cpr[2]mask + OS.EmitIntValue(0, 4); // ri_cpr[3]mask + OS.EmitIntValue(0, 4); // ri_gp_value + } + // .MIPS.options + else if (MST.isABI_N64()) { + OS.EmitIntValue(1, 1); // kind + OS.EmitIntValue(40, 1); // size + OS.EmitIntValue(0, 2); // section + OS.EmitIntValue(0, 4); // info + OS.EmitIntValue(0, 4); // ri_gprmask + OS.EmitIntValue(0, 4); // pad + OS.EmitIntValue(0, 4); // ri_cpr[0]mask + OS.EmitIntValue(0, 4); // ri_cpr[1]mask + OS.EmitIntValue(0, 4); // ri_cpr[2]mask + OS.EmitIntValue(0, 4); // ri_cpr[3]mask + OS.EmitIntValue(0, 8); // ri_gp_value + } + else llvm_unreachable("Unsupported abi for reginfo"); +} + diff --git a/lib/Target/Mips/MCTargetDesc/MipsReginfo.h b/lib/Target/Mips/MCTargetDesc/MipsReginfo.h new file mode 100644 index 0000000..039b8ea --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsReginfo.h @@ -0,0 +1,31 @@ +//=== MipsReginfo.h - MipsReginfo -----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENCE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSREGINFO_H +#define MIPSREGINFO_H + +namespace llvm { + class MCStreamer; + class TargetLoweringObjectFile; + class MipsSubtarget; + + class MipsReginfo { + void anchor(); + public: + MipsReginfo() {} + + void emitMipsReginfoSectionCG(MCStreamer &OS, + const TargetLoweringObjectFile &TLOF, + const MipsSubtarget &MST) const; + }; + +} // namespace llvm + +#endif + diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 23e2a94..1326623 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -80,6 +80,9 @@ def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">; def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true", "Mips DSP-R2 ASE", [FeatureDSP]>; +def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true", + "microMips mode">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index 127fcf2..1bb6fe4 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -139,6 +139,25 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions +void Mips16FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + const Mips16InstrInfo &TII = + *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo()); + + TII.adjustStackPtr(Mips::SP, Amount, MBB, I); + } + + MBB.erase(I); +} + bool Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h index 01db71e..25f4ffb 100644 --- a/lib/Target/Mips/Mips16FrameLowering.h +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -27,6 +27,10 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td index 61602b6..4ff62ef 100644 --- a/lib/Target/Mips/Mips16InstrFormats.td +++ b/lib/Target/Mips/Mips16InstrFormats.td @@ -29,45 +29,13 @@ // //===----------------------------------------------------------------------===// -// Format specifies the encoding used by the instruction. This is part of the -// ad-hoc solution used to emit machine instruction encodings by our machine -// code emitter. -// -class Format16<bits<5> val> { - bits<5> Value = val; -} - -def Pseudo16 : Format16<0>; -def FrmI16 : Format16<1>; -def FrmRI16 : Format16<2>; -def FrmRR16 : Format16<3>; -def FrmRRI16 : Format16<4>; -def FrmRRR16 : Format16<5>; -def FrmRRI_A16 : Format16<6>; -def FrmSHIFT16 : Format16<7>; -def FrmI8_TYPE16 : Format16<8>; -def FrmI8_MOVR3216 : Format16<9>; -def FrmI8_MOV32R16 : Format16<10>; -def FrmI8_SVRS16 : Format16<11>; -def FrmJAL16 : Format16<12>; -def FrmJALX16 : Format16<13>; -def FrmEXT_I16 : Format16<14>; -def FrmASMACRO16 : Format16<15>; -def FrmEXT_RI16 : Format16<16>; -def FrmEXT_RRI16 : Format16<17>; -def FrmEXT_RRI_A16 : Format16<18>; -def FrmEXT_SHIFT16 : Format16<19>; -def FrmEXT_I816 : Format16<20>; -def FrmEXT_I8_SVRS16 : Format16<21>; -def FrmOther16 : Format16<22>; // Instruction w/ a custom format // Base class for Mips 16 Format // This class does not depend on the instruction size // class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin, Format16 f>: Instruction + InstrItinClass itin>: Instruction { - Format16 Form = f; let Namespace = "Mips"; @@ -78,14 +46,6 @@ class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern, let Pattern = pattern; let Itinerary = itin; - // - // Attributes specific to Mips instructions... - // - bits<5> FormBits = Form.Value; - - // TSFlags layout should be kept in sync with MipsInstrInfo.h. - let TSFlags{4-0} = FormBits; - let Predicates = [InMips16Mode]; } @@ -93,30 +53,35 @@ class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern, // Generic Mips 16 Format // class MipsInst16<dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin, Format16 f>: - MipsInst16_Base<outs, ins, asmstr, pattern, itin, f> + InstrItinClass itin>: + MipsInst16_Base<outs, ins, asmstr, pattern, itin> { field bits<16> Inst; bits<5> Opcode = 0; // Top 5 bits are the 'opcode' field let Inst{15-11} = Opcode; + + let Size=2; + field bits<16> SoftFail = 0; } // // For 32 bit extended instruction forms. // class MipsInst16_32<dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin, Format16 f>: - MipsInst16_Base<outs, ins, asmstr, pattern, itin, f> + InstrItinClass itin>: + MipsInst16_Base<outs, ins, asmstr, pattern, itin> { field bits<32> Inst; - + + let Size=4; + field bits<32> SoftFail = 0; } class MipsInst16_EXTEND<dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin, Format16 f>: - MipsInst16_32<outs, ins, asmstr, pattern, itin, f> + InstrItinClass itin>: + MipsInst16_32<outs, ins, asmstr, pattern, itin> { let Inst{31-27} = 0b11110; } @@ -125,7 +90,7 @@ class MipsInst16_EXTEND<dag outs, dag ins, string asmstr, list<dag> pattern, // Mips Pseudo Instructions Format class MipsPseudo16<dag outs, dag ins, string asmstr, list<dag> pattern>: - MipsInst16<outs, ins, asmstr, pattern, IIPseudo, Pseudo16> { + MipsInst16<outs, ins, asmstr, pattern, IIPseudo> { let isCodeGenOnly = 1; let isPseudo = 1; } @@ -137,7 +102,7 @@ class MipsPseudo16<dag outs, dag ins, string asmstr, list<dag> pattern>: class FI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmI16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<11> imm11; @@ -152,7 +117,7 @@ class FI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern, class FRI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmRI16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<3> rx; bits<8> imm8; @@ -169,7 +134,7 @@ class FRI16<bits<5> op, dag outs, dag ins, string asmstr, class FRR16<bits<5> _funct, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<3> rx; bits<3> ry; @@ -188,7 +153,7 @@ class FRR16<bits<5> _funct, dag outs, dag ins, string asmstr, // class FRR_SF16<bits<5> _funct, bits<3> _subfunct, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<3> rx; bits<3> subfunct; @@ -208,7 +173,7 @@ class FRR_SF16<bits<5> _funct, bits<3> _subfunct, dag outs, dag ins, // class FC16<bits<5> _funct, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<6> _code; // code is a keyword in tablegen bits<5> funct; @@ -226,7 +191,7 @@ class FC16<bits<5> _funct, dag outs, dag ins, string asmstr, class FRR16_JALRC<bits<1> _nd, bits<1> _l, bits<1> r_a, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<3> rx; bits<1> nd; @@ -252,7 +217,7 @@ class FRR16_JALRC<bits<1> _nd, bits<1> _l, bits<1> r_a, class FRRI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRI16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<3> rx; bits<3> ry; @@ -272,7 +237,7 @@ class FRRI16<bits<5> op, dag outs, dag ins, string asmstr, class FRRR16<bits<2> _f, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRR16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<3> rx; bits<3> ry; @@ -294,7 +259,7 @@ class FRRR16<bits<2> _f, dag outs, dag ins, string asmstr, class FRRI_A16<bits<1> _f, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRI_A16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<3> rx; bits<3> ry; @@ -316,7 +281,7 @@ class FRRI_A16<bits<1> _f, dag outs, dag ins, string asmstr, class FSHIFT16<bits<2> _f, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmSHIFT16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<3> rx; bits<3> ry; @@ -338,7 +303,7 @@ class FSHIFT16<bits<2> _f, dag outs, dag ins, string asmstr, class FI816<bits<3> _func, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_TYPE16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<3> func; bits<8> imm8; @@ -356,7 +321,7 @@ class FI816<bits<3> _func, dag outs, dag ins, string asmstr, class FI8_MOVR3216<dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_MOVR3216> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<4> ry; @@ -378,7 +343,7 @@ class FI8_MOVR3216<dag outs, dag ins, string asmstr, class FI8_MOV32R16<dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_MOV32R16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<3> func; @@ -402,7 +367,7 @@ class FI8_MOV32R16<dag outs, dag ins, string asmstr, class FI8_SVRS16<bits<1> _s, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_SVRS16> + MipsInst16<outs, ins, asmstr, pattern, itin> { bits<1> s; bits<1> ra = 0; @@ -429,7 +394,7 @@ class FI8_SVRS16<bits<1> _s, dag outs, dag ins, string asmstr, class FJAL16<bits<1> _X, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16_32<outs, ins, asmstr, pattern, itin, FrmJAL16> + MipsInst16_32<outs, ins, asmstr, pattern, itin> { bits<1> X; bits<26> imm26; @@ -452,7 +417,7 @@ class FJAL16<bits<1> _X, dag outs, dag ins, string asmstr, class FEXT_I16<bits<5> _eop, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_I16> + MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin> { bits<16> imm16; bits<5> eop; @@ -474,7 +439,7 @@ class FEXT_I16<bits<5> _eop, dag outs, dag ins, string asmstr, class FASMACRO16<dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmASMACRO16> + MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin> { bits<3> select; bits<3> p4; @@ -503,7 +468,7 @@ class FASMACRO16<dag outs, dag ins, string asmstr, class FEXT_RI16<bits<5> _op, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RI16> + MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin> { bits<16> imm16; bits<5> op; @@ -527,7 +492,7 @@ class FEXT_RI16<bits<5> _op, dag outs, dag ins, string asmstr, class FEXT_RRI16<bits<5> _op, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RRI16> + MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin> { bits<5> op; bits<16> imm16; @@ -552,7 +517,7 @@ class FEXT_RRI16<bits<5> _op, dag outs, dag ins, string asmstr, class FEXT_RRI_A16<bits<1> _f, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RRI_A16> + MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin> { bits<15> imm15; bits<3> rx; @@ -578,7 +543,7 @@ class FEXT_RRI_A16<bits<1> _f, dag outs, dag ins, string asmstr, class FEXT_SHIFT16<bits<2> _f, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_SHIFT16> + MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin> { bits<6> sa6; bits<3> rx; @@ -605,7 +570,7 @@ class FEXT_SHIFT16<bits<2> _f, dag outs, dag ins, string asmstr, class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_I816> + MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin> { bits<16> imm16; bits<5> I8; @@ -630,7 +595,7 @@ class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr, class FEXT_I8_SVRS16<bits<1> s_, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmI8_SVRS16> + MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin> { bits<3> xsregs =0; bits<8> framesize =0; @@ -659,5 +624,3 @@ class FEXT_I8_SVRS16<bits<1> s_, dag outs, dag ins, string asmstr, } - - diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 91b5ba0..fd3cc8f 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -19,7 +19,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -28,7 +30,8 @@ using namespace llvm; static cl::opt<bool> NeverUseSaveRestore( "mips16-never-use-save-restore", cl::init(false), - cl::desc("For testing ability to adjust stack pointer without save/restore instruction"), + cl::desc("For testing ability to adjust stack pointer " + "without save/restore instruction"), cl::Hidden); @@ -129,7 +132,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); - switch(MI->getDesc().getOpcode()) { default: return false; @@ -169,19 +171,20 @@ unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { } // Adjust SP by FrameSize bytes. Save RA, S0, S1 -void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, +void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (!NeverUseSaveRestore) { if (isUInt<11>(FrameSize)) BuildMI(MBB, I, DL, get(Mips::SaveRaF16)).addImm(FrameSize); else { - int Base = 2040; // should create template function like isUInt that returns largest - // possible n bit unsigned integer + int Base = 2040; // should create template function like isUInt that + // returns largest possible n bit unsigned integer int64_t Remainder = FrameSize - Base; BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base); if (isInt<16>(-Remainder)) - BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(-Remainder); + BuildAddiuSpImm(MBB, I, -Remainder); else adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1); } @@ -193,13 +196,16 @@ void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBloc // sw s1, -8[sp] // sw s0, -12[sp] - MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::RA); + MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), + Mips::RA); MIB1.addReg(Mips::SP); MIB1.addImm(-4); - MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::S1); + MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), + Mips::S1); MIB2.addReg(Mips::SP); MIB2.addImm(-8); - MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::S0); + MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), + Mips::S0); MIB3.addReg(Mips::SP); MIB3.addImm(-12); adjustStackPtrBig(SP, -FrameSize, MBB, I, Mips::V0, Mips::V1); @@ -207,18 +213,19 @@ void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBloc } // Adjust SP by FrameSize bytes. Restore RA, S0, S1 -void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { +void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (!NeverUseSaveRestore) { if (isUInt<11>(FrameSize)) BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)).addImm(FrameSize); else { - int Base = 2040; // should create template function like isUInt that returns largest - // possible n bit unsigned integer + int Base = 2040; // should create template function like isUInt that + // returns largest possible n bit unsigned integer int64_t Remainder = FrameSize - Base; if (isInt<16>(Remainder)) - BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Remainder); + BuildAddiuSpImm(MBB, I, Remainder); else adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1); BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base); @@ -229,15 +236,19 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicB // lw ra, -4[sp] // lw s1, -8[sp] // lw s0, -12[sp] - MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::A0); + MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), + Mips::A0); MIB1.addReg(Mips::SP); MIB1.addImm(-4); - MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16), Mips::RA); + MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16), + Mips::RA); MIB0.addReg(Mips::A0); - MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::S1); + MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), + Mips::S1); MIB2.addReg(Mips::SP); MIB2.addImm(-8); - MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::S0); + MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), + Mips::S0); MIB3.addReg(Mips::SP); MIB3.addImm(-12); } @@ -245,10 +256,12 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicB } // Adjust SP by Amount bytes where bytes can be up to 32bit number. -// This can only be called at times that we know that there is at least one free register. +// This can only be called at times that we know that there is at least one free +// register. // This is clearly safe at prologue and epilogue. // -void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, +void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Reg1, unsigned Reg2) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); @@ -269,11 +282,13 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasi MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::AdduRxRyRz16), Reg1); MIB3.addReg(Reg1); MIB3.addReg(Reg2, RegState::Kill); - MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16), Mips::SP); + MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16), + Mips::SP); MIB4.addReg(Reg1, RegState::Kill); } -void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, +void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { assert(false && "adjust stack pointer amount exceeded"); } @@ -282,9 +297,8 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount, void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16> - BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Amount); + BuildAddiuSpImm(MBB, I, Amount); else adjustStackPtrBigUnrestricted(SP, Amount, MBB, I); } @@ -292,11 +306,79 @@ void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, /// This function generates the sequence of instructions needed to get the /// result of adding register REG and immediate IMM. unsigned -Mips16InstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, +Mips16InstrInfo::loadImmediate(unsigned FrameReg, + int64_t Imm, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, DebugLoc DL, - unsigned *NewImm) const { + unsigned &NewImm) const { + // + // given original instruction is: + // Instr rx, T[offset] where offset is too big. + // + // lo = offset & 0xFFFF + // hi = ((offset >> 16) + (lo >> 15)) & 0xFFFF; + // + // let T = temporary register + // li T, hi + // shl T, 16 + // add T, Rx, T + // + RegScavenger rs; + int32_t lo = Imm & 0xFFFF; + int32_t hi = ((Imm >> 16) + (lo >> 15)) & 0xFFFF; + NewImm = lo; + unsigned Reg =0; + unsigned SpReg = 0; + rs.enterBasicBlock(&MBB); + rs.forward(II); + // + // we use T0 for the first register, if we need to save something away. + // we use T1 for the second register, if we need to save something away. + // + unsigned FirstRegSaved =0, SecondRegSaved=0; + unsigned FirstRegSavedTo = 0, SecondRegSavedTo = 0; + + Reg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass); + if (Reg == 0) { + FirstRegSaved = Reg = Mips::V0; + FirstRegSavedTo = Mips::T0; + copyPhysReg(MBB, II, DL, FirstRegSavedTo, FirstRegSaved, true); + } + else + rs.setUsed(Reg); + BuildMI(MBB, II, DL, get(Mips::LiRxImmX16), Reg).addImm(hi); + BuildMI(MBB, II, DL, get(Mips::SllX16), Reg).addReg(Reg). + addImm(16); + if (FrameReg == Mips::SP) { + SpReg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass); + if (SpReg == 0) { + if (Reg != Mips::V1) { + SecondRegSaved = SpReg = Mips::V1; + SecondRegSavedTo = Mips::T1; + } + else { + SecondRegSaved = SpReg = Mips::V0; + SecondRegSavedTo = Mips::T0; + } + copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true); + } + else + rs.setUsed(SpReg); - return 0; + copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false); + BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(SpReg) + .addReg(Reg); + } + else + BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(FrameReg) + .addReg(Reg, RegState::Kill); + if (FirstRegSaved || SecondRegSaved) { + II = llvm::next(II); + if (FirstRegSaved) + copyPhysReg(MBB, II, DL, FirstRegSaved, FirstRegSavedTo, true); + if (SecondRegSaved) + copyPhysReg(MBB, II, DL, SecondRegSaved, SecondRegSavedTo, true); + } + return Reg; } unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { @@ -317,6 +399,20 @@ void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); } + +const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const { + if (validSpImm8(Imm)) + return get(Mips::AddiuSpImm16); + else + return get(Mips::AddiuSpImmX16); +} + +void Mips16InstrInfo::BuildAddiuSpImm + (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const { + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm); +} + const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) { return new Mips16InstrInfo(TM); } diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 3704e25..1cb1dfe 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -77,12 +77,27 @@ public: void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - /// Emit a series of instructions to load an immediate. If NewImm is a - /// non-NULL parameter, the last instruction is not emitted, but instead - /// its immediate operand is returned in NewImm. - unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + /// Emit a series of instructions to load an immediate. + // This is to adjust some FrameReg. We return the new register to be used + // in place of FrameReg and the adjusted immediate field (&NewImm) + // + unsigned loadImmediate(unsigned FrameReg, + int64_t Imm, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, DebugLoc DL, - unsigned *NewImm) const; + unsigned &NewImm) const; + + static bool validSpImm8(int offset) { + return ((offset & 7) == 0) && isInt<11>(offset); + } + + // + // build the proper one based on the Imm field + // + + const MCInstrDesc& AddiuSpImm(int64_t Imm) const; + + void BuildAddiuSpImm + (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const; private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; @@ -100,7 +115,6 @@ private: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - }; } diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index e8e2f3c..a9e9c52 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -32,18 +32,76 @@ def mem16_ea : Operand<i32> { } // +// +// I8 instruction format +// + +class FI816_ins_base<bits<3> _func, string asmstr, + string asmstr2, InstrItinClass itin>: + FI816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2), + [], itin>; + + +class FI816_SP_ins<bits<3> _func, string asmstr, + InstrItinClass itin>: + FI816_ins_base<_func, asmstr, "\t$$sp, $imm # 16 bit inst", itin>; + +// +// RI instruction format +// + + +class FRI16_ins_base<bits<5> op, string asmstr, string asmstr2, + InstrItinClass itin>: + FRI16<op, (outs CPU16Regs:$rx), (ins simm16:$imm), + !strconcat(asmstr, asmstr2), [], itin>; + +class FRI16_ins<bits<5> op, string asmstr, + InstrItinClass itin>: + FRI16_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>; + +class FRI16R_ins_base<bits<5> op, string asmstr, string asmstr2, + InstrItinClass itin>: + FRI16<op, (outs), (ins CPU16Regs:$rx, simm16:$imm), + !strconcat(asmstr, asmstr2), [], itin>; + +class FRI16R_ins<bits<5> op, string asmstr, + InstrItinClass itin>: + FRI16R_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>; + +class F2RI16_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FRI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm\t# 16 bit inst"), [], itin> { + let Constraints = "$rx_ = $rx"; +} + +class FRI16_B_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FRI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm), + !strconcat(asmstr, "\t$rx, $imm # 16 bit inst"), [], itin>; +// // Compare a register and immediate and place result in CC // Implicit use of T8 // // EXT-CCRR Instruction format // -class FEXT_CCRXI16_ins<bits<5> _op, string asmstr, - InstrItinClass itin>: - FEXT_RI16<_op, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm), - !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), [], itin> { +class FEXT_CCRXI16_ins<string asmstr>: + MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), []> { let isCodeGenOnly=1; + let usesCustomInserter = 1; } +// JAL and JALX instruction format +// +class FJAL16_ins<bits<1> _X, string asmstr, + InstrItinClass itin>: + FJAL16<_X, (outs), (ins simm20:$imm), + !strconcat(asmstr, "\t$imm\n\tnop"),[], + itin> { + let isCodeGenOnly=1; +} // // EXT-I instruction format // @@ -77,10 +135,11 @@ class FEXT_I816_SP_ins<bits<3> _func, string asmstr, // // CC-RR Instruction format // -class FCCRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : - FRR16<f, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry), - !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), [], itin> { +class FCCRR16_ins<string asmstr> : + MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), []> { let isCodeGenOnly=1; + let usesCustomInserter = 1; } // @@ -96,6 +155,15 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr, InstrItinClass itin>: FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $imm", itin>; +class FEXT_RI16R_ins_base<bits<5> _op, string asmstr, string asmstr2, + InstrItinClass itin>: + FEXT_RI16<_op, (outs ), (ins CPU16Regs:$rx, simm16:$imm), + !strconcat(asmstr, asmstr2), [], itin>; + +class FEXT_RI16R_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16R_ins_base<_op, asmstr, "\t$rx, $imm", itin>; + class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>: FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>; @@ -153,25 +221,25 @@ class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>: // // EXT-T8I8 // -class FEXT_T8I816_ins<bits<3> _func, string asmstr, string asmstr2, - InstrItinClass itin>: - FEXT_I816<_func, (outs), - (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm), - !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t", - !strconcat(asmstr, "\t$imm"))),[], itin> { +class FEXT_T8I816_ins<string asmstr, string asmstr2>: + MipsPseudo16<(outs), + (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm), + !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t", + !strconcat(asmstr, "\t$imm"))),[]> { let isCodeGenOnly=1; + let usesCustomInserter = 1; } // // EXT-T8I8I // -class FEXT_T8I8I16_ins<bits<3> _func, string asmstr, string asmstr2, - InstrItinClass itin>: - FEXT_I816<_func, (outs), - (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ), - !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t", - !strconcat(asmstr, "\t$targ"))), [], itin> { +class FEXT_T8I8I16_ins<string asmstr, string asmstr2>: + MipsPseudo16<(outs), + (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ), + !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t", + !strconcat(asmstr, "\t$targ"))), []> { let isCodeGenOnly=1; + let usesCustomInserter = 1; } // @@ -219,9 +287,14 @@ class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : !strconcat(asmstr, "\t$rx, $ry"), [], itin> { } -class FRRTR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : - FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), - !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), [], itin> ; +class FRR16R_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry"), [], itin> { +} + +class FRRTR16_ins<string asmstr> : + MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), []> ; // // maybe refactor but need a $zero as a dummy first parameter @@ -257,7 +330,7 @@ class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_, class FRR16_JALRC_ins<bits<1> nd, bits<1> l, bits<1> ra, string asmstr, InstrItinClass itin>: - FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx), + FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx), !strconcat(asmstr, "\t $rx"), [], itin> ; // @@ -296,13 +369,13 @@ class FRRR16_ins<bits<2> _f, string asmstr, InstrItinClass itin> : // // So this pseudo class only has one operand, i.e. op // -class Sel<bits<5> f1, string op, InstrItinClass itin>: - MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, - CPU16Regs:$rt), - !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), [], itin, - Pseudo16> { - let isCodeGenOnly=1; +class Sel<string op>: + MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, + CPU16Regs:$rt), + !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), []> { + //let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; + let usesCustomInserter = 1; } // @@ -320,16 +393,15 @@ class Sel<bits<5> f1, string op, InstrItinClass itin>: // move $rd, $rs // // -class SeliT<bits<5> f1, string op1, bits<5> f2, string op2, - InstrItinClass itin>: - MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, - CPU16Regs:$rl, simm16:$imm), - !strconcat(op2, - !strconcat("\t$rl, $imm\n\t", - !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin, - Pseudo16> { +class SeliT<string op1, string op2>: + MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, + CPU16Regs:$rl, simm16:$imm), + !strconcat(op2, + !strconcat("\t$rl, $imm\n\t", + !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> { let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; + let usesCustomInserter = 1; } // @@ -344,16 +416,16 @@ class SeliT<bits<5> f1, string op1, bits<5> f2, string op2, // move $rd, $rs // // -class SelT<bits<5> f1, string op1, bits<5> f2, string op2, - InstrItinClass itin>: - MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, +class SelT<string op1, string op2>: + MipsPseudo16<(outs CPU16Regs:$rd_), + (ins CPU16Regs:$rd, CPU16Regs:$rs, CPU16Regs:$rl, CPU16Regs:$rr), - !strconcat(op2, - !strconcat("\t$rl, $rr\n\t", - !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin, - Pseudo16> { + !strconcat(op2, + !strconcat("\t$rl, $rr\n\t", + !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> { let isCodeGenOnly=1; let Constraints = "$rd = $rd_"; + let usesCustomInserter = 1; } // @@ -363,7 +435,7 @@ def imm32: Operand<i32>; def Constant32: MipsPseudo16<(outs), (ins imm32:$imm), "\t.word $imm", []>; - + def LwConstant32: MipsPseudo16<(outs), (ins CPU16Regs:$rx, imm32:$imm), "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>; @@ -401,14 +473,21 @@ class MayStore { } // + // Format: ADDIU rx, immediate MIPS16e // Purpose: Add Immediate Unsigned Word (2-Operand, Extended) // To add a constant to a 32-bit integer. // def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>; +def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>, + ArithLogic16Defs<0> { + let AddedComplexity = 5; +} def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>, - ArithLogic16Defs<0>; + ArithLogic16Defs<0> { + let isCodeGenOnly = 1; +} def AddiuRxRyOffMemX16: FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>; @@ -426,11 +505,18 @@ def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>; // Purpose: Add Immediate Unsigned Word (2-Operand, SP-Relative, Extended) // To add a constant to the stack pointer. // +def AddiuSpImm16 + : FI816_SP_ins<0b011, "addiu", IIAlu> { + let Defs = [SP]; + let Uses = [SP]; + let AddedComplexity = 5; +} + def AddiuSpImmX16 : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> { let Defs = [SP]; let Uses = [SP]; -} +} // // Format: ADDU rz, rx, ry MIPS16e @@ -450,6 +536,14 @@ def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; // // Format: BEQZ rx, offset MIPS16e +// Purpose: Branch on Equal to Zero +// To test a GPR then do a PC-relative conditional branch. +// +def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16; + + +// +// Format: BEQZ rx, offset MIPS16e // Purpose: Branch on Equal to Zero (Extended) // To test a GPR then do a PC-relative conditional branch. // @@ -463,6 +557,13 @@ def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16; // // Format: BNEZ rx, offset MIPS16e +// Purpose: Branch on Not Equal to Zero +// To test a GPR then do a PC-relative conditional branch. +// +def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16; + +// +// Format: BNEZ rx, offset MIPS16e // Purpose: Branch on Not Equal to Zero (Extended) // To test a GPR then do a PC-relative conditional branch. // @@ -473,20 +574,22 @@ def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16; // Purpose: Branch on T Equal to Zero (Extended) // To test special register T then do a PC-relative conditional branch. // -def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16; +def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 { + let Uses = [T8]; +} -def BteqzT8CmpX16: FEXT_T8I816_ins<0b000, "bteqz", "cmp", IIAlu>, cbranch16; +def BteqzT8CmpX16: FEXT_T8I816_ins<"bteqz", "cmp">, cbranch16; -def BteqzT8CmpiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "cmpi", IIAlu>, +def BteqzT8CmpiX16: FEXT_T8I8I16_ins<"bteqz", "cmpi">, cbranch16; -def BteqzT8SltX16: FEXT_T8I816_ins<0b000, "bteqz", "slt", IIAlu>, cbranch16; +def BteqzT8SltX16: FEXT_T8I816_ins<"bteqz", "slt">, cbranch16; -def BteqzT8SltuX16: FEXT_T8I816_ins<0b000, "bteqz", "sltu", IIAlu>, cbranch16; +def BteqzT8SltuX16: FEXT_T8I816_ins<"bteqz", "sltu">, cbranch16; -def BteqzT8SltiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "slti", IIAlu>, cbranch16; +def BteqzT8SltiX16: FEXT_T8I8I16_ins<"bteqz", "slti">, cbranch16; -def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>, +def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">, cbranch16; // @@ -494,22 +597,52 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>, // Purpose: Branch on T Not Equal to Zero (Extended) // To test special register T then do a PC-relative conditional branch. // -def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16; +def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 { + let Uses = [T8]; +} -def BtnezT8CmpX16: FEXT_T8I816_ins<0b000, "btnez", "cmp", IIAlu>, cbranch16; +def BtnezT8CmpX16: FEXT_T8I816_ins<"btnez", "cmp">, cbranch16; -def BtnezT8CmpiX16: FEXT_T8I8I16_ins<0b000, "btnez", "cmpi", IIAlu>, cbranch16; +def BtnezT8CmpiX16: FEXT_T8I8I16_ins<"btnez", "cmpi">, cbranch16; -def BtnezT8SltX16: FEXT_T8I816_ins<0b000, "btnez", "slt", IIAlu>, cbranch16; +def BtnezT8SltX16: FEXT_T8I816_ins<"btnez", "slt">, cbranch16; -def BtnezT8SltuX16: FEXT_T8I816_ins<0b000, "btnez", "sltu", IIAlu>, cbranch16; +def BtnezT8SltuX16: FEXT_T8I816_ins<"btnez", "sltu">, cbranch16; -def BtnezT8SltiX16: FEXT_T8I8I16_ins<0b000, "btnez", "slti", IIAlu>, cbranch16; +def BtnezT8SltiX16: FEXT_T8I8I16_ins<"btnez", "slti">, cbranch16; -def BtnezT8SltiuX16: FEXT_T8I8I16_ins<0b000, "btnez", "sltiu", IIAlu>, +def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">, cbranch16; // +// Format: CMP rx, ry MIPS16e +// Purpose: Compare +// To compare the contents of two GPRs. +// +def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> { + let Defs = [T8]; +} + +// +// Format: CMPI rx, immediate MIPS16e +// Purpose: Compare Immediate +// To compare a constant with the contents of a GPR. +// +def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> { + let Defs = [T8]; +} + +// +// Format: CMPI rx, immediate MIPS16e +// Purpose: Compare Immediate (Extended) +// To compare a constant with the contents of a GPR. +// +def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> { + let Defs = [T8]; +} + + +// // Format: DIV rx, ry MIPS16e // Purpose: Divide Word // To divide 32-bit signed integers. @@ -526,7 +659,19 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> { def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> { let Defs = [HI, LO]; } +// +// Format: JAL target MIPS16e +// Purpose: Jump and Link +// To execute a procedure call within the current 256 MB-aligned +// region and preserve the current ISA. +// +def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> { + let isBranch = 1; + let hasDelaySlot = 0; // not true, but we add the nop for now + let isTerminator=1; + let isBarrier=1; +} // // Format: JR ra MIPS16e @@ -543,7 +688,7 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> { let isBarrier=1; } -def JrcRa16: FRR16_JALRC_RA_only_ins<0, 0, "jrc", IIAlu> { +def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIAlu> { let isBranch = 1; let isIndirectBranch = 1; let isTerminator=1; @@ -561,7 +706,9 @@ def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> { // Purpose: Load Byte (Extended) // To load a byte from memory as a signed value. // -def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad; +def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad{ + let isCodeGenOnly = 1; +} // // Format: LBU ry, offset(rx) MIPS16e @@ -569,14 +716,18 @@ def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad; // To load a byte from memory as a unsigned value. // def LbuRxRyOffMemX16: - FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad; + FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad { + let isCodeGenOnly = 1; +} // // Format: LH ry, offset(rx) MIPS16e // Purpose: Load Halfword signed (Extended) // To load a halfword from memory as a signed value. // -def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad; +def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad{ + let isCodeGenOnly = 1; +} // // Format: LHU ry, offset(rx) MIPS16e @@ -584,7 +735,16 @@ def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad; // To load a halfword from memory as an unsigned value. // def LhuRxRyOffMemX16: - FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad; + FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad { + let isCodeGenOnly = 1; +} + +// +// Format: LI rx, immediate MIPS16e +// Purpose: Load Immediate +// To load a constant into a GPR. +// +def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>; // // Format: LI rx, immediate MIPS16e @@ -598,7 +758,9 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>; // Purpose: Load Word (Extended) // To load a word from memory as a signed value. // -def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad; +def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad{ + let isCodeGenOnly = 1; +} // Format: LW rx, offset(sp) MIPS16e // Purpose: Load Word (SP-Relative, Extended) @@ -779,7 +941,7 @@ def SbRxRyOffMemX16: // Purpose: if rt==0, do nothing // else rs = rt // -def SelBeqZ: Sel<0b00100, "beqz", IIAlu>; +def SelBeqZ: Sel<"beqz">; // // Format: SelTBteqZCmp rd, rs, rl, rr @@ -787,7 +949,7 @@ def SelBeqZ: Sel<0b00100, "beqz", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>; +def SelTBteqZCmp: SelT<"bteqz", "cmp">; // // Format: SelTBteqZCmpi rd, rs, rl, rr @@ -795,7 +957,7 @@ def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>; +def SelTBteqZCmpi: SeliT<"bteqz", "cmpi">; // // Format: SelTBteqZSlt rd, rs, rl, rr @@ -803,7 +965,7 @@ def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>; +def SelTBteqZSlt: SelT<"bteqz", "slt">; // // Format: SelTBteqZSlti rd, rs, rl, rr @@ -811,7 +973,7 @@ def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>; +def SelTBteqZSlti: SeliT<"bteqz", "slti">; // // Format: SelTBteqZSltu rd, rs, rl, rr @@ -819,7 +981,7 @@ def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>; +def SelTBteqZSltu: SelT<"bteqz", "sltu">; // // Format: SelTBteqZSltiu rd, rs, rl, rr @@ -827,14 +989,14 @@ def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>; // If b==0 then do nothing. // if b!=0 then rd = rs // -def SelTBteqZSltiu: SeliT<0b000, "bteqz", 0b01011, "sltiu", IIAlu>; +def SelTBteqZSltiu: SeliT<"bteqz", "sltiu">; // // Format: SelBnez rd, rs, rt // Purpose: if rt!=0, do nothing // else rs = rt // -def SelBneZ: Sel<0b00101, "bnez", IIAlu>; +def SelBneZ: Sel<"bnez">; // // Format: SelTBtneZCmp rd, rs, rl, rr @@ -842,7 +1004,7 @@ def SelBneZ: Sel<0b00101, "bnez", IIAlu>; // If b!=0 then do nothing. // if b0=0 then rd = rs // -def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>; +def SelTBtneZCmp: SelT<"btnez", "cmp">; // // Format: SelTBtnezCmpi rd, rs, rl, rr @@ -850,7 +1012,7 @@ def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>; // If b!=0 then do nothing. // if b==0 then rd = rs // -def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>; +def SelTBtneZCmpi: SeliT<"btnez", "cmpi">; // // Format: SelTBtneZSlt rd, rs, rl, rr @@ -858,7 +1020,7 @@ def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>; // If b!=0 then do nothing. // if b==0 then rd = rs // -def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>; +def SelTBtneZSlt: SelT<"btnez", "slt">; // // Format: SelTBtneZSlti rd, rs, rl, rr @@ -866,7 +1028,7 @@ def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>; // If b!=0 then do nothing. // if b==0 then rd = rs // -def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>; +def SelTBtneZSlti: SeliT<"btnez", "slti">; // // Format: SelTBtneZSltu rd, rs, rl, rr @@ -874,7 +1036,7 @@ def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>; // If b!=0 then do nothing. // if b==0 then rd = rs // -def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>; +def SelTBtneZSltu: SelT<"btnez", "sltu">; // // Format: SelTBtneZSltiu rd, rs, rl, rr @@ -882,7 +1044,7 @@ def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>; // If b!=0 then do nothing. // if b==0 then rd = rs // -def SelTBtneZSltiu: SeliT<0b001, "btnez", 0b01011, "sltiu", IIAlu>; +def SelTBtneZSltiu: SeliT<"btnez", "sltiu">; // // // Format: SH ry, offset(rx) MIPS16e @@ -906,39 +1068,78 @@ def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>; // def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>; +// Format: SLTI rx, immediate MIPS16e +// Purpose: Set on Less Than Immediate +// To record the result of a less-than comparison with a constant. +// +// +def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> { + let Defs = [T8]; +} + // // Format: SLTI rx, immediate MIPS16e // Purpose: Set on Less Than Immediate (Extended) // To record the result of a less-than comparison with a constant. // -def SltiCCRxImmX16: FEXT_CCRXI16_ins<0b01010, "slti", IIAlu>; +// +def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIAlu> { + let Defs = [T8]; +} + +def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">; +// Format: SLTIU rx, immediate MIPS16e +// Purpose: Set on Less Than Immediate Unsigned +// To record the result of a less-than comparison with a constant. +// +// +def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> { + let Defs = [T8]; +} + +// +// Format: SLTI rx, immediate MIPS16e +// Purpose: Set on Less Than Immediate Unsigned (Extended) +// To record the result of a less-than comparison with a constant. +// +// +def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIAlu> { + let Defs = [T8]; +} // // Format: SLTIU rx, immediate MIPS16e // Purpose: Set on Less Than Immediate Unsigned (Extended) // To record the result of a less-than comparison with a constant. // -def SltiuCCRxImmX16: FEXT_CCRXI16_ins<0b01011, "sltiu", IIAlu>; +def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">; // // Format: SLT rx, ry MIPS16e // Purpose: Set on Less Than // To record the result of a less-than comparison. // -def SltRxRy16: FRR16_ins<0b00010, "slt", IIAlu>; +def SltRxRy16: FRR16R_ins<0b00010, "slt", IIAlu>{ + let Defs = [T8]; +} -def SltCCRxRy16: FCCRR16_ins<0b00010, "slt", IIAlu>; +def SltCCRxRy16: FCCRR16_ins<"slt">; // Format: SLTU rx, ry MIPS16e // Purpose: Set on Less Than Unsigned // To record the result of an unsigned less-than comparison. // -def SltuRxRyRz16: FRRTR16_ins<0b00011, "sltu", IIAlu> { +def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIAlu>{ + let Defs = [T8]; +} + +def SltuRxRyRz16: FRRTR16_ins<"sltu"> { let isCodeGenOnly=1; + let Defs = [T8]; } -def SltuCCRxRy16: FCCRR16_ins<0b00011, "sltu", IIAlu>; +def SltuCCRxRy16: FCCRR16_ins<"sltu">; // // Format: SRAV ry, rx MIPS16e // Purpose: Shift Word Right Arithmetic Variable @@ -1034,6 +1235,7 @@ class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> : Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm), (I CPU16Regs:$in, imm_type:$imm)>; +def: ArithLogicI16_pat<add, immSExt8, AddiuRxRxImm16>; def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>; def: ArithLogicI16_pat<shl, immZExt5, SllX16>; def: ArithLogicI16_pat<srl, immZExt5, SrlX16>; @@ -1067,14 +1269,19 @@ def: StoreM16_pat<store, SwRxRyOffMemX16>; // Unconditional branch class UncondBranch16_pat<SDNode OpNode, Instruction I>: Mips16Pat<(OpNode bb:$imm16), (I bb:$imm16)> { - let Predicates = [RelocPIC, InMips16Mode]; + let Predicates = [InMips16Mode]; } +def : Mips16Pat<(MipsJmpLink (i32 tglobaladdr:$dst)), + (Jal16 tglobaladdr:$dst)>; + +def : Mips16Pat<(MipsJmpLink (i32 texternalsym:$dst)), + (Jal16 texternalsym:$dst)>; + // Indirect branch def: Mips16Pat< - (brind CPU16Regs:$rs), - (JrcRx16 CPU16Regs:$rs)>; - + (brind CPU16Regs:$rs), + (JrcRx16 CPU16Regs:$rs)>; // Jump and Link (Call) let isCall=1, hasDelaySlot=0 in @@ -1502,7 +1709,7 @@ def: Mips16Pat // def: Mips16Pat <(setle CPU16Regs:$lhs, CPU16Regs:$rhs), - (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImmX16 1))>; + (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImm16 1))>; // // setlt @@ -1562,7 +1769,11 @@ def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), // hi/lo relocs -def : Mips16Pat<(MipsHi tglobaltlsaddr:$in), +def : Mips16Pat<(MipsHi tglobaladdr:$in), + (SllX16 (LiRxImmX16 tglobaladdr:$in), 16)>; +def : Mips16Pat<(MipsHi tjumptable:$in), + (SllX16 (LiRxImmX16 tjumptable:$in), 16)>; +def : Mips16Pat<(MipsHi tglobaltlsaddr:$in), (SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>; // wrapper_pic diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index c2e09a7..0ea9368 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -1,3 +1,4 @@ + //===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===// // // The LLVM Compiler Infrastructure @@ -12,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "Mips16RegisterInfo.h" +#include "Mips16InstrInfo.h" #include "Mips.h" #include "Mips16InstrInfo.h" #include "MipsAnalyzeImmediate.h" @@ -23,6 +25,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" @@ -69,27 +72,6 @@ bool Mips16RegisterInfo::saveScavengerRegister return true; } -// This function eliminate ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void Mips16RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - int64_t Amount = I->getOperand(0).getImm(); - - if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) - Amount = -Amount; - - const Mips16InstrInfo *II = static_cast<const Mips16InstrInfo*>(&TII); - - II->adjustStackPtr(Mips::SP, Amount, MBB, I); - } - - MBB.erase(I); -} - void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, @@ -140,6 +122,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // by adding the size of the stack: // incoming argument, callee-saved register location or local variable. int64_t Offset; + bool IsKill = false; Offset = SPOffset + (int64_t)StackSize; Offset += MI.getOperand(OpNo + 1).getImm(); @@ -148,9 +131,14 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, if (!MI.isDebugValue() && ( ((FrameReg != Mips::SP) && !isInt<16>(Offset)) || ((FrameReg == Mips::SP) && !isInt<15>(Offset)) )) { - llvm_unreachable("frame offset does not fit in instruction"); + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = II->getDebugLoc(); + unsigned NewImm; + FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm); + Offset = SignExtend64<16>(NewImm); + IsKill = true; } - MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill); MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h index 6101739..b8f818a 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.h +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -25,10 +25,6 @@ public: Mips16RegisterInfo(const MipsSubtarget &Subtarget, const Mips16InstrInfo &TII); - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - bool requiresRegisterScavenging(const MachineFunction &MF) const; bool requiresFrameIndexScavenging(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index cdf12c8..494ba87 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -71,52 +71,55 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc], //===----------------------------------------------------------------------===// let DecoderNamespace = "Mips64" in { /// Arithmetic Instructions (ALU Immediate) -def DADDi : ArithLogicI<"daddi", simm16_64, CPU64Regs>, ADDI_FM<0x18>; -def DADDiu : ArithLogicI<"daddiu", simm16_64, CPU64Regs, immSExt16, add>, +def DADDi : ArithLogicI<"daddi", simm16_64, CPU64RegsOpnd>, ADDI_FM<0x18>; +def DADDiu : ArithLogicI<"daddiu", simm16_64, CPU64RegsOpnd, immSExt16, add>, ADDI_FM<0x19>, IsAsCheapAsAMove; -def DANDi : ArithLogicI<"andi", uimm16_64, CPU64Regs, immZExt16, and>, +def DANDi : ArithLogicI<"andi", uimm16_64, CPU64RegsOpnd, immZExt16, and>, ADDI_FM<0xc>; def SLTi64 : SetCC_I<"slti", setlt, simm16_64, immSExt16, CPU64Regs>, SLTI_FM<0xa>; def SLTiu64 : SetCC_I<"sltiu", setult, simm16_64, immSExt16, CPU64Regs>, SLTI_FM<0xb>; -def ORi64 : ArithLogicI<"ori", uimm16_64, CPU64Regs, immZExt16, or>, +def ORi64 : ArithLogicI<"ori", uimm16_64, CPU64RegsOpnd, immZExt16, or>, ADDI_FM<0xd>; -def XORi64 : ArithLogicI<"xori", uimm16_64, CPU64Regs, immZExt16, xor>, +def XORi64 : ArithLogicI<"xori", uimm16_64, CPU64RegsOpnd, immZExt16, xor>, ADDI_FM<0xe>; def LUi64 : LoadUpper<"lui", CPU64Regs, uimm16_64>, LUI_FM; /// Arithmetic Instructions (3-Operand, R-Type) -def DADD : ArithLogicR<"dadd", CPU64Regs>, ADD_FM<0, 0x2c>; -def DADDu : ArithLogicR<"daddu", CPU64Regs, 1, IIAlu, add>, ADD_FM<0, 0x2d>; -def DSUBu : ArithLogicR<"dsubu", CPU64Regs, 0, IIAlu, sub>, ADD_FM<0, 0x2f>; +def DADD : ArithLogicR<"dadd", CPU64RegsOpnd>, ADD_FM<0, 0x2c>; +def DADDu : ArithLogicR<"daddu", CPU64RegsOpnd, 1, IIAlu, add>, + ADD_FM<0, 0x2d>; +def DSUBu : ArithLogicR<"dsubu", CPU64RegsOpnd, 0, IIAlu, sub>, + ADD_FM<0, 0x2f>; def SLT64 : SetCC_R<"slt", setlt, CPU64Regs>, ADD_FM<0, 0x2a>; def SLTu64 : SetCC_R<"sltu", setult, CPU64Regs>, ADD_FM<0, 0x2b>; -def AND64 : ArithLogicR<"and", CPU64Regs, 1, IIAlu, and>, ADD_FM<0, 0x24>; -def OR64 : ArithLogicR<"or", CPU64Regs, 1, IIAlu, or>, ADD_FM<0, 0x25>; -def XOR64 : ArithLogicR<"xor", CPU64Regs, 1, IIAlu, xor>, ADD_FM<0, 0x26>; -def NOR64 : LogicNOR<"nor", CPU64Regs>, ADD_FM<0, 0x27>; +def AND64 : ArithLogicR<"and", CPU64RegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>; +def OR64 : ArithLogicR<"or", CPU64RegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>; +def XOR64 : ArithLogicR<"xor", CPU64RegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>; +def NOR64 : LogicNOR<"nor", CPU64RegsOpnd>, ADD_FM<0, 0x27>; /// Shift Instructions -def DSLL : shift_rotate_imm<"dsll", shamt, CPU64Regs, shl, immZExt6>, +def DSLL : shift_rotate_imm<"dsll", shamt, CPU64RegsOpnd, shl, immZExt6>, SRA_FM<0x38, 0>; -def DSRL : shift_rotate_imm<"dsrl", shamt, CPU64Regs, srl, immZExt6>, +def DSRL : shift_rotate_imm<"dsrl", shamt, CPU64RegsOpnd, srl, immZExt6>, SRA_FM<0x3a, 0>; -def DSRA : shift_rotate_imm<"dsra", shamt, CPU64Regs, sra, immZExt6>, +def DSRA : shift_rotate_imm<"dsra", shamt, CPU64RegsOpnd, sra, immZExt6>, SRA_FM<0x3b, 0>; -def DSLLV : shift_rotate_reg<"dsllv", CPU64Regs, shl>, SRLV_FM<0x14, 0>; -def DSRLV : shift_rotate_reg<"dsrlv", CPU64Regs, srl>, SRLV_FM<0x16, 0>; -def DSRAV : shift_rotate_reg<"dsrav", CPU64Regs, sra>, SRLV_FM<0x17, 0>; -def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64Regs>, SRA_FM<0x3c, 0>; -def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64Regs>, SRA_FM<0x3e, 0>; -def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64Regs>, SRA_FM<0x3f, 0>; +def DSLLV : shift_rotate_reg<"dsllv", CPU64RegsOpnd, shl>, SRLV_FM<0x14, 0>; +def DSRLV : shift_rotate_reg<"dsrlv", CPU64RegsOpnd, srl>, SRLV_FM<0x16, 0>; +def DSRAV : shift_rotate_reg<"dsrav", CPU64RegsOpnd, sra>, SRLV_FM<0x17, 0>; +def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64RegsOpnd>, SRA_FM<0x3c, 0>; +def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64RegsOpnd>, SRA_FM<0x3e, 0>; +def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64RegsOpnd>, SRA_FM<0x3f, 0>; } // Rotate Instructions let Predicates = [HasMips64r2, HasStdEnc], DecoderNamespace = "Mips64" in { - def DROTR : shift_rotate_imm<"drotr", shamt, CPU64Regs, rotr, immZExt6>, - SRA_FM<0x3a, 1>; - def DROTRV : shift_rotate_reg<"drotrv", CPU64Regs, rotr>, SRLV_FM<0x16, 1>; + def DROTR : shift_rotate_imm<"drotr", shamt, CPU64RegsOpnd, rotr, immZExt6>, + SRA_FM<0x3a, 1>; + def DROTRV : shift_rotate_reg<"drotrv", CPU64RegsOpnd, rotr>, + SRLV_FM<0x16, 1>; } let DecoderNamespace = "Mips64" in { @@ -135,12 +138,11 @@ defm LD : LoadM<"ld", CPU64Regs, load>, LW_FM<0x37>; defm SD : StoreM<"sd", CPU64Regs, store>, LW_FM<0x3f>; /// load/store left/right -let isCodeGenOnly = 1 in { - defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>; - defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>; - defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>; - defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>; -} +defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>; +defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>; +defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>; +defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>; + defm LDL : LoadLeftRightM<"ldl", MipsLDL, CPU64Regs>, LW_FM<0x1a>; defm LDR : LoadLeftRightM<"ldr", MipsLDR, CPU64Regs>, LW_FM<0x1b>; defm SDL : StoreLeftRightM<"sdl", MipsSDL, CPU64Regs>, LW_FM<0x2c>; @@ -148,13 +150,13 @@ defm SDR : StoreLeftRightM<"sdr", MipsSDR, CPU64Regs>, LW_FM<0x2d>; /// Load-linked, Store-conditional let Predicates = [NotN64, HasStdEnc] in { - def LLD : LLBase<"lld", CPU64Regs, mem>, LW_FM<0x34>; - def SCD : SCBase<"scd", CPU64Regs, mem>, LW_FM<0x3c>; + def LLD : LLBase<"lld", CPU64RegsOpnd, mem>, LW_FM<0x34>; + def SCD : SCBase<"scd", CPU64RegsOpnd, mem>, LW_FM<0x3c>; } let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in { - def LLD_P8 : LLBase<"lld", CPU64Regs, mem64>, LW_FM<0x34>; - def SCD_P8 : SCBase<"scd", CPU64Regs, mem64>, LW_FM<0x3c>; + def LLD_P8 : LLBase<"lld", CPU64RegsOpnd, mem64>, LW_FM<0x34>; + def SCD_P8 : SCBase<"scd", CPU64RegsOpnd, mem64>, LW_FM<0x3c>; } /// Jump and Branch Instructions @@ -168,15 +170,18 @@ def BLTZ64 : CBranchZero<"bltz", setlt, CPU64Regs>, BGEZ_FM<1, 0>; } let DecoderNamespace = "Mips64" in def JALR64 : JumpLinkReg<"jalr", CPU64Regs>, JALR_FM; +def JALR64Pseudo : JumpLinkRegPseudo<CPU64Regs, JALR64, RA_64>; def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, MTLO_FM<8>, IsTailCall; let DecoderNamespace = "Mips64" in { /// Multiply and Divide Instructions. -def DMULT : Mult<"dmult", IIImul, CPU64Regs, [HI64, LO64]>, MULT_FM<0, 0x1c>; -def DMULTu : Mult<"dmultu", IIImul, CPU64Regs, [HI64, LO64]>, MULT_FM<0, 0x1d>; -def DSDIV : Div<MipsDivRem, "ddiv", IIIdiv, CPU64Regs, [HI64, LO64]>, +def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>, + MULT_FM<0, 0x1c>; +def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>, + MULT_FM<0, 0x1d>; +def DSDIV : Div<MipsDivRem, "ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1e>; -def DUDIV : Div<MipsDivRemU, "ddivu", IIIdiv, CPU64Regs, [HI64, LO64]>, +def DUDIV : Div<MipsDivRemU, "ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1f>; def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>; @@ -189,28 +194,28 @@ def SEB64 : SignExtInReg<"seb", i8, CPU64Regs>, SEB_FM<0x10, 0x20>; def SEH64 : SignExtInReg<"seh", i16, CPU64Regs>, SEB_FM<0x18, 0x20>; /// Count Leading -def DCLZ : CountLeading0<"dclz", CPU64Regs>, CLO_FM<0x24>; -def DCLO : CountLeading1<"dclo", CPU64Regs>, CLO_FM<0x25>; +def DCLZ : CountLeading0<"dclz", CPU64RegsOpnd>, CLO_FM<0x24>; +def DCLO : CountLeading1<"dclo", CPU64RegsOpnd>, CLO_FM<0x25>; /// Double Word Swap Bytes/HalfWords -def DSBH : SubwordSwap<"dsbh", CPU64Regs>, SEB_FM<2, 0x24>; -def DSHD : SubwordSwap<"dshd", CPU64Regs>, SEB_FM<5, 0x24>; +def DSBH : SubwordSwap<"dsbh", CPU64RegsOpnd>, SEB_FM<2, 0x24>; +def DSHD : SubwordSwap<"dshd", CPU64RegsOpnd>, SEB_FM<5, 0x24>; def LEA_ADDiu64 : EffectiveAddress<"daddiu", CPU64Regs, mem_ea_64>, LW_FM<0x19>; } let DecoderNamespace = "Mips64" in { -def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>, RDHWR_FM; +def RDHWR64 : ReadHardware<CPU64Regs, HW64RegsOpnd>, RDHWR_FM; -def DEXT : ExtBase<"dext", CPU64Regs>, EXT_FM<3>; +def DEXT : ExtBase<"dext", CPU64RegsOpnd>, EXT_FM<3>; let Pattern = []<dag> in { - def DEXTU : ExtBase<"dextu", CPU64Regs>, EXT_FM<2>; - def DEXTM : ExtBase<"dextm", CPU64Regs>, EXT_FM<1>; + def DEXTU : ExtBase<"dextu", CPU64RegsOpnd>, EXT_FM<2>; + def DEXTM : ExtBase<"dextm", CPU64RegsOpnd>, EXT_FM<1>; } -def DINS : InsBase<"dins", CPU64Regs>, EXT_FM<7>; +def DINS : InsBase<"dins", CPU64RegsOpnd>, EXT_FM<7>; let Pattern = []<dag> in { - def DINSU : InsBase<"dinsu", CPU64Regs>, EXT_FM<6>; - def DINSM : InsBase<"dinsm", CPU64Regs>, EXT_FM<5>; + def DINSU : InsBase<"dinsu", CPU64RegsOpnd>, EXT_FM<6>; + def DINSM : InsBase<"dinsm", CPU64RegsOpnd>, EXT_FM<5>; } let isCodeGenOnly = 1, rs = 0, shamt = 0 in { @@ -304,38 +309,60 @@ def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>; +def : InstAlias<"move $dst, $src", + (DADDu CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>, + Requires<[HasMips64]>; +def : InstAlias<"move $dst, $src", + (OR64 CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 0>, + Requires<[HasMips64]>; +def : InstAlias<"and $rs, $rt, $imm", + (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm), + 1>, + Requires<[HasMips64]>; +def : InstAlias<"slt $rs, $rt, $imm", + (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm), 1>, + Requires<[HasMips64]>; +def : InstAlias<"xor $rs, $rt, $imm", + (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm), + 1>, + Requires<[HasMips64]>; +def : InstAlias<"not $rt, $rs", + (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64), 1>, + Requires<[HasMips64]>; +def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>; +def : InstAlias<"jalr $rs", (JALR64 RA_64, CPU64Regs:$rs)>, + Requires<[HasMips64]>; +def : InstAlias<"daddu $rs, $rt, $imm", + (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm), + 1>; +def : InstAlias<"dadd $rs, $rt, $imm", + (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm), + 1>; /// Move between CPU and coprocessor registers + let DecoderNamespace = "Mips64" in { -def MFC0_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), - "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>; -def MTC0_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), - "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>; -def MFC2_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), - "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>; -def MTC2_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), - "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>; -def DMFC0_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), +def DMFC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt), + (ins CPU64RegsOpnd:$rd, uimm16:$sel), "dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>; -def DMTC0_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), +def DMTC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel), + (ins CPU64RegsOpnd:$rt), "dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>; -def DMFC2_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), +def DMFC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt), + (ins CPU64RegsOpnd:$rd, uimm16:$sel), "dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>; -def DMTC2_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), +def DMTC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel), + (ins CPU64RegsOpnd:$rt), "dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>; } + // Two operand (implicit 0 selector) versions: -def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; -def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; -def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; -def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; def : InstAlias<"dmfc0 $rt, $rd", - (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; + (DMFC0_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>; def : InstAlias<"dmtc0 $rt, $rd", - (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; + (DMTC0_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>; def : InstAlias<"dmfc2 $rt, $rd", - (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; + (DMFC2_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>; def : InstAlias<"dmtc2 $rt, $rd", - (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; + (DMTC2_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 6ad7e96..1876cb6 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -13,10 +13,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-asm-printer" -#include "MipsAsmPrinter.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" +#include "MCTargetDesc/MipsELFStreamer.h" #include "Mips.h" +#include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" #include "MipsMCInstLower.h" #include "llvm/ADT/SmallString.h" @@ -35,6 +36,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/Mangler.h" @@ -65,19 +67,28 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - // Do any auto-generated pseudo lowerings. - if (emitPseudoExpansionLowering(OutStreamer, MI)) - return; - MachineBasicBlock::const_instr_iterator I = MI; MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); do { - MCInst TmpInst0; - MCInstLowering.Lower(I++, TmpInst0); + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(OutStreamer, &*I)) + continue; + + // The inMips16Mode() test is not permanent. + // Some instructions are marked as pseudo right now which + // would make the test fail for the wrong reason but + // that will be fixed soon. We need this here because we are + // removing another test for this situation downstream in the + // callchain. + // + if (I->isPseudo() && !Subtarget->inMips16Mode()) + llvm_unreachable("Pseudo opcode found in EmitInstruction()"); + MCInst TmpInst0; + MCInstLowering.Lower(I, TmpInst0); OutStreamer.EmitInstruction(TmpInst0); - } while ((I != E) && I->isInsideBundle()); // Delay slot check + } while ((++I != E) && I->isInsideBundle()); // Delay slot check } //===----------------------------------------------------------------------===// @@ -221,6 +232,11 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() { // OutStreamer.EmitRawText(StringRef("\t.set\tnomicromips")); OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); } + + if (Subtarget->inMicroMipsMode()) + if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer)) + MES->emitMipsSTOCG(*Subtarget, CurrentFnSym, + (unsigned)ELF::STO_MIPS_MICROMIPS); OutStreamer.EmitLabel(CurrentFnSym); } @@ -236,10 +252,11 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { raw_svector_ostream OS(Str); printSavedRegsBitmask(OS); OutStreamer.EmitRawText(OS.str()); - - OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder")); - OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); - OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); + if (!Subtarget->inMips16Mode()) { + OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder")); + OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); + OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); + } } } @@ -250,9 +267,11 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { // always be at the function end, and we can't emit and // break with BB logic. if (OutStreamer.hasRawTextSupport()) { - OutStreamer.EmitRawText(StringRef("\t.set\tat")); - OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); - OutStreamer.EmitRawText(StringRef("\t.set\treorder")); + if (!Subtarget->inMips16Mode()) { + OutStreamer.EmitRawText(StringRef("\t.set\tat")); + OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); + OutStreamer.EmitRawText(StringRef("\t.set\treorder")); + } OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); } } @@ -540,6 +559,18 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // return to previous section if (OutStreamer.hasRawTextSupport()) OutStreamer.EmitRawText(StringRef("\t.previous")); + +} + +void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) { + + if (OutStreamer.hasRawTextSupport()) return; + + // Emit Mips ELF register info + Subtarget->getMReginfo().emitMipsReginfoSectionCG( + OutStreamer, getObjFileLowering(), *Subtarget); + if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer)) + MES->emitELFHeaderFlagsCG(*Subtarget); } MachineLocation diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index d8fbeeb..dbdaf26 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -80,6 +80,7 @@ public: void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); void EmitStartOfAsmFile(Module &M); + void EmitEndOfAsmFile(Module &M); virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); }; diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 52fa95b..df877b6 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" @@ -62,67 +63,73 @@ class MipsCodeEmitter : public MachineFunctionPass { static char ID; - public: - MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : - MachineFunctionPass(ID), JTI(0), - II((const MipsInstrInfo *) tm.getInstrInfo()), - TD(tm.getDataLayout()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), - IsPIC(TM.getRelocationModel() == Reloc::PIC_) { - } +public: + MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), JTI(0), + II((const MipsInstrInfo *) tm.getInstrInfo()), TD(tm.getDataLayout()), + TM(tm), MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { - return "Mips Machine Code Emitter"; - } + virtual const char *getPassName() const { + return "Mips Machine Code Emitter"; + } - /// getBinaryCodeForInstr - This function, generated by the - /// CodeEmitterGenerator using TableGen, produces the binary encoding for - /// machine instructions. - uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; - void emitInstruction(const MachineInstr &MI); + void emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB); - private: +private: - void emitWord(unsigned Word); + void emitWord(unsigned Word); - /// Routines that handle operands which add machine relocations which are - /// fixed up by the relocation stage. - void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub) const; - void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; - void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; - void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; - void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const; + /// Routines that handle operands which add machine relocations which are + /// fixed up by the relocation stage. + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const; - /// getMachineOpValue - Return binary encoding of operand. If the machine - /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const; + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; - unsigned getRelocation(const MachineInstr &MI, - const MachineOperand &MO) const; + unsigned getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const; - unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; - unsigned getBranchTargetOpValue(const MachineInstr &MI, - unsigned OpNo) const; - unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; - void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc, - int Offset) const; - }; + void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc, + int Offset) const; + + /// \brief Expand pseudo instruction. Return true if MI was expanded. + bool expandPseudos(MachineBasicBlock::instr_iterator &MI, + MachineBasicBlock &MBB) const; +}; } char MipsCodeEmitter::ID = 0; bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { - JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo(); - II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo(); - TD = ((const MipsTargetMachine&) MF.getTarget()).getDataLayout(); + MipsTargetMachine &Target = static_cast<MipsTargetMachine &>( + const_cast<TargetMachine &>(MF.getTarget())); + + JTI = Target.getJITInfo(); + II = Target.getInstrInfo(); + TD = Target.getDataLayout(); Subtarget = &TM.getSubtarget<MipsSubtarget> (); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; @@ -139,8 +146,8 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { MBB != E; ++MBB){ MCE.StartMachineBasicBlock(MBB); for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), - E = MBB->instr_end(); I != E; ++I) - emitInstruction(*I); + E = MBB->instr_end(); I != E;) + emitInstruction(*I++, *MBB); } } while (MCE.finishFunction(MF)); @@ -265,19 +272,21 @@ void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, Reloc, BB)); } -void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) { - DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); - - MCE.processDebugLoc(MI.getDebugLoc(), true); +void MipsCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB) { + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI); - // Skip pseudo instructions. - if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) + // Expand pseudo instruction. Skip if MI was not expanded. + if (((MI->getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) && + !expandPseudos(MI, MBB)) return; - emitWord(getBinaryCodeForInstr(MI)); + MCE.processDebugLoc(MI->getDebugLoc(), true); + + emitWord(getBinaryCodeForInstr(*MI)); ++NumEmitted; // Keep track of the # of mi's emitted - MCE.processDebugLoc(MI.getDebugLoc(), false); + MCE.processDebugLoc(MI->getDebugLoc(), false); } void MipsCodeEmitter::emitWord(unsigned Word) { @@ -289,6 +298,25 @@ void MipsCodeEmitter::emitWord(unsigned Word) { MCE.emitWordBE(Word); } +bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI, + MachineBasicBlock &MBB) const { + switch (MI->getOpcode()) { + case Mips::NOP: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO) + .addReg(Mips::ZERO).addImm(0); + break; + case Mips::JALRPseudo: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA) + .addReg(MI->getOperand(0).getReg()); + break; + default: + return false; + } + + (MI--)->eraseFromBundle(); + return true; +} + /// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips /// code to the specified MCE object. FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM, diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 041a9d0..d62b166 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -1,4 +1,4 @@ -//===-- DelaySlotFiller.cpp - Mips Delay Slot Filler ----------------------===// +//===-- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler ------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Simple pass to fills delay slots with useful instructions. +// Simple pass to fill delay slots with useful instructions. // //===----------------------------------------------------------------------===// @@ -15,7 +15,7 @@ #include "Mips.h" #include "MipsTargetMachine.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -33,8 +33,7 @@ STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" static cl::opt<bool> DisableDelaySlotFiller( "disable-mips-delay-filler", cl::init(false), - cl::desc("Disable the delay slot filler, which attempts to fill the Mips" - "delay slots with useful instructions."), + cl::desc("Fill all delay slots with NOPs."), cl::Hidden); // This option can be used to silence complaints by machine verifier passes. @@ -45,15 +44,25 @@ static cl::opt<bool> SkipDelaySlotFiller( cl::Hidden); namespace { - struct Filler : public MachineFunctionPass { - typedef MachineBasicBlock::instr_iterator InstrIter; - typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter; + class RegDefsUses { + public: + RegDefsUses(TargetMachine &TM); + void init(const MachineInstr &MI); + bool update(const MachineInstr &MI, unsigned Begin, unsigned End); - TargetMachine &TM; - const TargetInstrInfo *TII; - InstrIter LastFiller; + private: + bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg, + bool IsDef) const; - static char ID; + /// Returns true if Reg or its alias is in RegSet. + bool isRegInSet(const BitVector &RegSet, unsigned Reg) const; + + const TargetRegisterInfo &TRI; + BitVector Defs, Uses; + }; + + class Filler : public MachineFunctionPass { + public: Filler(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } @@ -61,7 +70,6 @@ namespace { return "Mips Delay Slot Filler"; } - bool runOnMachineBasicBlock(MachineBasicBlock &MBB); bool runOnMachineFunction(MachineFunction &F) { if (SkipDelaySlotFiller) return false; @@ -73,66 +81,115 @@ namespace { return Changed; } - bool isDelayFiller(MachineBasicBlock &MBB, - InstrIter candidate); + private: + typedef MachineBasicBlock::iterator Iter; + typedef MachineBasicBlock::reverse_iterator ReverseIter; - void insertCallUses(InstrIter MI, - SmallSet<unsigned, 32> &RegDefs, - SmallSet<unsigned, 32> &RegUses); - - void insertDefsUses(InstrIter MI, - SmallSet<unsigned, 32> &RegDefs, - SmallSet<unsigned, 32> &RegUses); + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); - bool IsRegInSet(SmallSet<unsigned, 32> &RegSet, - unsigned Reg); + /// This function checks if it is valid to move Candidate to the delay slot + /// and returns true if it isn't. It also updates load and store flags and + /// register defs and uses. + bool delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, + bool &SawStore, RegDefsUses &RegDU) const; - bool delayHasHazard(InstrIter candidate, - bool &sawLoad, bool &sawStore, - SmallSet<unsigned, 32> &RegDefs, - SmallSet<unsigned, 32> &RegUses); + bool findDelayInstr(MachineBasicBlock &MBB, Iter slot, Iter &Filler) const; - bool - findDelayInstr(MachineBasicBlock &MBB, InstrIter slot, - InstrIter &Filler); + bool terminateSearch(const MachineInstr &Candidate) const; + TargetMachine &TM; + const TargetInstrInfo *TII; + static char ID; }; char Filler::ID = 0; } // end of anonymous namespace +RegDefsUses::RegDefsUses(TargetMachine &TM) + : TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false), + Uses(TRI.getNumRegs(), false) {} + +void RegDefsUses::init(const MachineInstr &MI) { + // Add all register operands which are explicit and non-variadic. + update(MI, 0, MI.getDesc().getNumOperands()); + + // If MI is a call, add RA to Defs to prevent users of RA from going into + // delay slot. + if (MI.isCall()) + Defs.set(Mips::RA); + + // Add all implicit register operands of branch instructions except + // register AT. + if (MI.isBranch()) { + update(MI, MI.getDesc().getNumOperands(), MI.getNumOperands()); + Defs.reset(Mips::AT); + } +} + +bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) { + BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs()); + bool HasHazard = false; + + for (unsigned I = Begin; I != End; ++I) { + const MachineOperand &MO = MI.getOperand(I); + + if (MO.isReg() && MO.getReg()) + HasHazard |= checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef()); + } + + Defs |= NewDefs; + Uses |= NewUses; + + return HasHazard; +} + +bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, + unsigned Reg, bool IsDef) const { + if (IsDef) { + NewDefs.set(Reg); + // check whether Reg has already been defined or used. + return (isRegInSet(Defs, Reg) || isRegInSet(Uses, Reg)); + } + + NewUses.set(Reg); + // check whether Reg has already been defined. + return isRegInSet(Defs, Reg); +} + +bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const { + // Check Reg and all aliased Registers. + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + if (RegSet.test(*AI)) + return true; + return false; +} + /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. /// We assume there is only one delay slot per delayed instruction. -bool Filler:: -runOnMachineBasicBlock(MachineBasicBlock &MBB) { +bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - LastFiller = MBB.instr_end(); - - for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I) - if (I->hasDelaySlot()) { - ++FilledSlots; - Changed = true; - InstrIter InstrWithSlot = I; - InstrIter D; - - // Delay slot filling is disabled at -O0. - if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) && - findDelayInstr(MBB, I, D)) { - MBB.splice(llvm::next(I), &MBB, D); - ++UsefulSlots; - } else - BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); - - // Record the filler instruction that filled the delay slot. - // The instruction after it will be visited in the next iteration. - LastFiller = ++I; - - // Bundle the delay slot filler to InstrWithSlot so that the machine - // verifier doesn't expect this instruction to be a terminator. - MIBundleBuilder(MBB, InstrWithSlot, llvm::next(LastFiller)); - } - return Changed; + for (Iter I = MBB.begin(); I != MBB.end(); ++I) { + if (!I->hasDelaySlot()) + continue; + + ++FilledSlots; + Changed = true; + Iter D; + + // Delay slot filling is disabled at -O0. + if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) && + findDelayInstr(MBB, I, D)) { + MBB.splice(llvm::next(I), &MBB, D); + ++UsefulSlots; + } else + BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); + + // Bundle the delay slot filler to the instruction with the delay slot. + MIBundleBuilder(MBB, I, llvm::next(llvm::next(I))); + } + + return Changed; } /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay @@ -141,146 +198,57 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { return new Filler(tm); } -bool Filler::findDelayInstr(MachineBasicBlock &MBB, - InstrIter slot, - InstrIter &Filler) { - SmallSet<unsigned, 32> RegDefs; - SmallSet<unsigned, 32> RegUses; +bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot, + Iter &Filler) const { + RegDefsUses RegDU(TM); - insertDefsUses(slot, RegDefs, RegUses); + RegDU.init(*Slot); - bool sawLoad = false; - bool sawStore = false; + bool SawLoad = false; + bool SawStore = false; - for (ReverseInstrIter I(slot); I != MBB.instr_rend(); ++I) { + for (ReverseIter I(Slot); I != MBB.rend(); ++I) { // skip debug value if (I->isDebugValue()) continue; - // Convert to forward iterator. - InstrIter FI(llvm::next(I).base()); - - if (I->hasUnmodeledSideEffects() - || I->isInlineAsm() - || I->isLabel() - || FI == LastFiller - || I->isPseudo() - // - // Should not allow: - // ERET, DERET or WAIT, PAUSE. Need to add these to instruction - // list. TBD. - ) + if (terminateSearch(*I)) break; - if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) { - insertDefsUses(FI, RegDefs, RegUses); + if (delayHasHazard(*I, SawLoad, SawStore, RegDU)) continue; - } - Filler = FI; + Filler = llvm::next(I).base(); return true; } return false; } -bool Filler::delayHasHazard(InstrIter candidate, - bool &sawLoad, bool &sawStore, - SmallSet<unsigned, 32> &RegDefs, - SmallSet<unsigned, 32> &RegUses) { - if (candidate->isImplicitDef() || candidate->isKill()) - return true; +bool Filler::delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, + bool &SawStore, RegDefsUses &RegDU) const { + bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill()); // Loads or stores cannot be moved past a store to the delay slot // and stores cannot be moved past a load. - if (candidate->mayLoad()) { - if (sawStore) - return true; - sawLoad = true; - } - - if (candidate->mayStore()) { - if (sawStore) - return true; - sawStore = true; - if (sawLoad) - return true; + if (Candidate.mayStore() || Candidate.hasOrderedMemoryRef()) { + HasHazard |= SawStore | SawLoad; + SawStore = true; + } else if (Candidate.mayLoad()) { + HasHazard |= SawStore; + SawLoad = true; } - assert((!candidate->isCall() && !candidate->isReturn()) && + assert((!Candidate.isCall() && !Candidate.isReturn()) && "Cannot put calls or returns in delay slot."); - for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) { - const MachineOperand &MO = candidate->getOperand(i); - unsigned Reg; + HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands()); - if (!MO.isReg() || !(Reg = MO.getReg())) - continue; // skip - - if (MO.isDef()) { - // check whether Reg is defined or used before delay slot. - if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg)) - return true; - } - if (MO.isUse()) { - // check whether Reg is defined before delay slot. - if (IsRegInSet(RegDefs, Reg)) - return true; - } - } - return false; -} - -// Helper function for getting a MachineOperand's register number and adding it -// to RegDefs or RegUses. -static void insertDefUse(const MachineOperand &MO, - SmallSet<unsigned, 32> &RegDefs, - SmallSet<unsigned, 32> &RegUses, - unsigned ExcludedReg = 0) { - unsigned Reg; - - if (!MO.isReg() || !(Reg = MO.getReg()) || (Reg == ExcludedReg)) - return; - - if (MO.isDef()) - RegDefs.insert(Reg); - else if (MO.isUse()) - RegUses.insert(Reg); -} - -// Insert Defs and Uses of MI into the sets RegDefs and RegUses. -void Filler::insertDefsUses(InstrIter MI, - SmallSet<unsigned, 32> &RegDefs, - SmallSet<unsigned, 32> &RegUses) { - unsigned I, E = MI->getDesc().getNumOperands(); - - for (I = 0; I != E; ++I) - insertDefUse(MI->getOperand(I), RegDefs, RegUses); - - // If MI is a call, add RA to RegDefs to prevent users of RA from going into - // delay slot. - if (MI->isCall()) { - RegDefs.insert(Mips::RA); - return; - } - - // Return if MI is a return. - if (MI->isReturn()) - return; - - // Examine the implicit operands. Exclude register AT which is in the list of - // clobbered registers of branch instructions. - E = MI->getNumOperands(); - for (; I != E; ++I) - insertDefUse(MI->getOperand(I), RegDefs, RegUses, Mips::AT); + return HasHazard; } -//returns true if the Reg or its alias is in the RegSet. -bool Filler::IsRegInSet(SmallSet<unsigned, 32> &RegSet, unsigned Reg) { - // Check Reg and all aliased Registers. - for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true); - AI.isValid(); ++AI) - if (RegSet.count(*AI)) - return true; - return false; +bool Filler::terminateSearch(const MachineInstr &Candidate) const { + return (Candidate.isTerminator() || Candidate.isCall() || + Candidate.isLabel() || Candidate.isInlineAsm() || + Candidate.hasUnmodeledSideEffects()); } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index c5f1290..78c74ef 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -96,7 +96,14 @@ private: SDNode *Select(SDNode *N); // Complex Pattern. - bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset); + /// (reg + imm). + bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + /// Fall back on this function if all else fails. + bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + /// Match integer address pattern. + bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Alias); @@ -323,8 +330,8 @@ SDValue MipsDAGToDAGISel::getMips16SPAliasReg() { /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions -bool MipsDAGToDAGISel:: -SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { +bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const { EVT ValTy = Addr.getValueType(); // if Address is FI, get the TargetFrameIndex. @@ -384,21 +391,24 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { return true; } } - - // If an indexed floating point load/store can be emitted, return false. - const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); - - if (LS && - (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && - Subtarget.hasFPIdx()) - return false; } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, ValTy); + return false; +} + +bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); return true; } +bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + return selectAddrRegImm(Addr, Base, Offset) || + selectAddrDefault(Addr, Base, Offset); +} + void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); if (Parent) { diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index a309040..36e1a15 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -11,8 +11,8 @@ // selection DAG. // //===----------------------------------------------------------------------===// - #define DEBUG_TYPE "mips-lower" +#include <set> #include "MipsISelLowering.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" @@ -55,6 +55,12 @@ Mips16HardFloat("mips16-hard-float", cl::NotHidden, cl::desc("MIPS: mips16 hard float enable."), cl::init(false)); +static cl::opt<bool> DontExpandCondPseudos16( + "mips16-dont-expand-cond-pseudo", + cl::init(false), + cl::desc("Dont expand conditional move related " + "pseudos for Mips 16"), + cl::Hidden); static const uint16_t O32IntRegs[4] = { @@ -162,6 +168,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::GPRel: return "MipsISD::GPRel"; case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer"; case MipsISD::Ret: return "MipsISD::Ret"; + case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN"; case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; case MipsISD::FPCmp: return "MipsISD::FPCmp"; case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T"; @@ -205,39 +212,56 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +namespace { + struct ltstr { + bool operator()(const char *s1, const char *s2) const + { + return strcmp(s1, s2) < 0; + } + }; + + std::set<const char*, ltstr> noHelperNeeded; +} + +void MipsTargetLowering::SetMips16LibcallName + (RTLIB::Libcall l, const char *Name) { + setLibcallName(l, Name); + noHelperNeeded.insert(Name); +} + void MipsTargetLowering::setMips16HardFloatLibCalls() { - setLibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); - setLibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); - setLibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); - setLibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); - setLibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); - setLibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); - setLibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); - setLibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); - setLibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); - setLibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); - setLibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); - setLibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); - setLibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); - setLibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); - setLibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); - setLibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); - setLibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); - setLibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); - setLibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); - setLibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); - setLibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); - setLibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); - setLibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); - setLibcallName(RTLIB::O_F32, "__mips16_unordsf2"); - setLibcallName(RTLIB::O_F64, "__mips16_unorddf2"); + SetMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); + SetMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); + SetMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); + SetMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); + SetMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); + SetMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); + SetMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); + SetMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); + SetMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); + SetMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); + SetMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); + SetMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); + SetMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); + SetMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); + SetMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); + SetMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); + SetMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); + SetMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); + SetMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); + SetMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); + SetMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); + SetMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); + SetMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); + SetMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); + SetMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); + SetMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); + SetMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); + SetMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); + SetMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); + SetMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); + SetMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); + SetMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); } MipsTargetLowering:: @@ -404,6 +428,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); @@ -426,6 +452,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); @@ -498,7 +526,7 @@ MipsTargetLowering(MipsTargetMachine &TM) setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); - maxStoresPerMemcpy = 16; + MaxStoresPerMemcpy = 16; } bool @@ -1026,6 +1054,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); @@ -1207,11 +1236,290 @@ MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ return Sink; } +MachineBasicBlock *MipsTargetLowering::EmitSel16(unsigned Opc, MachineInstr *MI, + MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) + .addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *MipsTargetLowering::EmitSelT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, dl, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addReg(MI->getOperand(4).getReg()); + BuildMI(BB, dl, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + + +MachineBasicBlock *MipsTargetLowering::EmitSeliT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, dl, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addImm(MI->getOperand(4).getImm()); + BuildMI(BB, dl, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + + +MachineBasicBlock + *MipsTargetLowering::EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + unsigned regY = MI->getOperand(1).getReg(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + + +MachineBasicBlock *MipsTargetLowering::EmitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + int64_t imm = MI->getOperand(1).getImm(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + unsigned CmpOpc; + if (isUInt<8>(imm)) + CmpOpc = CmpiOpc; + else if (isUInt<16>(imm)) + CmpOpc = CmpiXOpc; + else + llvm_unreachable("immediate field not usable"); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + + +static unsigned Mips16WhichOp8uOr16simm + (unsigned shortOp, unsigned longOp, int64_t Imm) { + if (isUInt<8>(Imm)) + return shortOp; + else if (isInt<16>(Imm)) + return longOp; + else + llvm_unreachable("immediate field not usable"); +} + +MachineBasicBlock *MipsTargetLowering::EmitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + unsigned regY = MI->getOperand(2).getReg(); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} +MachineBasicBlock *MipsTargetLowering::EmitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + int64_t Imm = MI->getOperand(2).getImm(); + unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addImm(Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { - default: llvm_unreachable("Unexpected instr type to insert"); + default: + llvm_unreachable("Unexpected instr type to insert"); case Mips::ATOMIC_LOAD_ADD_I8: case Mips::ATOMIC_LOAD_ADD_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); @@ -1317,6 +1625,75 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicCmpSwap(MI, BB, 8); case Mips::BPOSGE32_PSEUDO: return EmitBPOSGE32(MI, BB); + case Mips::SelBeqZ: + return EmitSel16(Mips::BeqzRxImm16, MI, BB); + case Mips::SelBneZ: + return EmitSel16(Mips::BnezRxImm16, MI, BB); + case Mips::SelTBteqZCmpi: + return EmitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBteqZSlti: + return EmitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBteqZSltiu: + return EmitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBtneZCmpi: + return EmitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBtneZSlti: + return EmitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBtneZSltiu: + return EmitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBteqZCmp: + return EmitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBteqZSlt: + return EmitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBteqZSltu: + return EmitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::SelTBtneZCmp: + return EmitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBtneZSlt: + return EmitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBtneZSltu: + return EmitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpX16: + return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::BteqzT8SltX16: + return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::BteqzT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::BtnezT8CmpX16: + return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::BtnezT8SltX16: + return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::BtnezT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpiX16: return EmitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BteqzT8SltiX16: return EmitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BteqzT8SltiuX16: return EmitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::BtnezT8CmpiX16: return EmitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BtnezT8SltiX16: return EmitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BtnezT8SltiuX16: return EmitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + break; + case Mips::SltCCRxRy16: + return EmitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); + break; + case Mips::SltiCCRxImmX16: + return EmitFEXT_CCRXI16_ins + (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::SltiuCCRxImmX16: + return EmitFEXT_CCRXI16_ins + (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SltuCCRxRy16: + return EmitFEXT_CCRX16_ins + (Mips::SltuRxRy16, MI, BB); } } @@ -2209,6 +2586,34 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op, return DAG.getCopyFromReg(DAG.getEntryNode(), Op.getDebugLoc(), Reg, VT); } +// An EH_RETURN is the result of lowering llvm.eh.return which in turn is +// generated from __builtin_eh_return (offset, handler) +// The effect of this is to adjust the stack pointer by "offset" +// and then branch to "handler". +SDValue MipsTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) + const { + MachineFunction &MF = DAG.getMachineFunction(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + MipsFI->setCallsEhReturn(); + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + EVT Ty = IsN64 ? MVT::i64 : MVT::i32; + + // Store stack offset in V1, store jump target in V0. Glue CopyToReg and + // EH_RETURN nodes, so that instructions are emitted back-to-back. + unsigned OffsetReg = IsN64 ? Mips::V1_64 : Mips::V1; + unsigned AddrReg = IsN64 ? Mips::V0_64 : Mips::V0; + Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue()); + Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1)); + return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain, + DAG.getRegister(OffsetReg, Ty), + DAG.getRegister(AddrReg, getPointerTy()), + Chain.getValue(1)); +} + // TODO: set SType according to the desired memory barrier behavior. SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const { @@ -2754,6 +3159,163 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, /*isVolatile=*/ true, false, 0); } +// +// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much +// cleaner way to do all of this but it will have to wait until the traditional +// gcc mechanism is completed. +// +// For Pic, in order for Mips16 code to call Mips32 code which according the abi +// have either arguments or returned values placed in floating point registers, +// we use a set of helper functions. (This includes functions which return type +// complex which on Mips are returned in a pair of floating point registers). +// +// This is an encoding that we inherited from gcc. +// In Mips traditional O32, N32 ABI, floating point numbers are passed in +// floating point argument registers 1,2 only when the first and optionally +// the second arguments are float (sf) or double (df). +// For Mips16 we are only concerned with the situations where floating point +// arguments are being passed in floating point registers by the ABI, because +// Mips16 mode code cannot execute floating point instructions to load those +// values and hence helper functions are needed. +// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) +// the helper function suffixs for these are: +// 0, 1, 5, 9, 2, 6, 10 +// this suffix can then be calculated as follows: +// for a given argument Arg: +// Arg1x, Arg2x = 1 : Arg is sf +// 2 : Arg is df +// 0: Arg is neither sf or df +// So this stub is the string for number Arg1x + Arg2x*4. +// However not all numbers between 0 and 10 are possible, we check anyway and +// assert if the impossible exists. +// + +unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber + (ArgListTy &Args) const { + unsigned int resultNum = 0; + if (Args.size() >= 1) { + Type *t = Args[0].Ty; + if (t->isFloatTy()) { + resultNum = 1; + } + else if (t->isDoubleTy()) { + resultNum = 2; + } + } + if (resultNum) { + if (Args.size() >=2) { + Type *t = Args[1].Ty; + if (t->isFloatTy()) { + resultNum += 4; + } + else if (t->isDoubleTy()) { + resultNum += 8; + } + } + } + return resultNum; +} + +// +// prefixs are attached to stub numbers depending on the return type . +// return type: float sf_ +// double df_ +// single complex sc_ +// double complext dc_ +// others NO PREFIX +// +// +// The full name of a helper function is__mips16_call_stub + +// return type dependent prefix + stub number +// +// +// This is something that probably should be in a different source file and +// perhaps done differently but my main purpose is to not waste runtime +// on something that we can enumerate in the source. Another possibility is +// to have a python script to generate these mapping tables. This will do +// for now. There are a whole series of helper function mapping arrays, one +// for each return type class as outlined above. There there are 11 possible +// entries. Ones with 0 are ones which should never be selected +// +// All the arrays are similar except for ones which return neither +// sf, df, sc, dc, in which only care about ones which have sf or df as a +// first parameter. +// +#define P_ "__mips16_call_stub_" +#define MAX_STUB_NUMBER 10 +#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" +#define T P "0" , T1 +#define P P_ +static char const * vMips16Helper[MAX_STUB_NUMBER+1] = + {0, T1 }; +#undef P +#define P P_ "sf_" +static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "df_" +static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "sc_" +static char const * scMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "dc_" +static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#undef P_ + + +const char* MipsTargetLowering:: + getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const { + const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); +#ifndef NDEBUG + const unsigned int maxStubNum = 10; + assert(stubNum <= maxStubNum); + const bool validStubNum[maxStubNum+1] = + {true, true, true, false, false, true, true, false, false, true, true}; + assert(validStubNum[stubNum]); +#endif + const char *result; + if (RetTy->isFloatTy()) { + result = sfMips16Helper[stubNum]; + } + else if (RetTy ->isDoubleTy()) { + result = dfMips16Helper[stubNum]; + } + else if (RetTy->isStructTy()) { + // check if it's complex + if (RetTy->getNumContainedTypes() == 2) { + if ((RetTy->getContainedType(0)->isFloatTy()) && + (RetTy->getContainedType(1)->isFloatTy())) { + result = scMips16Helper[stubNum]; + } + else if ((RetTy->getContainedType(0)->isDoubleTy()) && + (RetTy->getContainedType(1)->isDoubleTy())) { + result = dcMips16Helper[stubNum]; + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + if (stubNum == 0) { + needHelper = false; + return ""; + } + result = vMips16Helper[stubNum]; + } + needHelper = true; + return result; +} + /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. SDValue @@ -2770,6 +3332,26 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; + const char* mips16HelperFunction = 0; + bool needMips16Helper = false; + + if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat && + Mips16HardFloat) { + // + // currently we don't have symbols tagged with the mips16 or mips32 + // qualifier so we will assume that we don't know what kind it is. + // and generate the helper + // + bool lookupHelper = true; + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + if (noHelperNeeded.find(S->getSymbol()) != noHelperNeeded.end()) { + lookupHelper = false; + } + } + if (lookupHelper) mips16HelperFunction = + getMips16HelperFunction(CLI.RetTy, CLI.Args, needMips16Helper); + + } MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); @@ -2779,9 +3361,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo); + MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); - MipsCCInfo.analyzeCallOperands(Outs); + MipsCCInfo.analyzeCallOperands(Outs, isVarArg); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); @@ -2810,7 +3392,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, getPointerTy()); // With EABI is it possible to have 16 args on registers. - SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; + std::deque< std::pair<unsigned, SDValue> > RegsToPass; SmallVector<SDValue, 8> MemOpChains; MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin(); @@ -2920,31 +3502,31 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, else if (LargeGOT) Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16); - else if (HasMips64) - Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_DISP); - else // O32 & PIC + else // N64 || PIC Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL); GlobalOrExternal = true; } - SDValue InFlag; - - // T9 register operand. - SDValue T9; + SDValue JumpTarget = Callee; // T9 should contain the address of the callee function if // -reloction-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { - // copy to T9 unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); - InFlag = Chain.getValue(1); + unsigned V0Reg = Mips::V0; + if (needMips16Helper) { + RegsToPass.push_front(std::make_pair(V0Reg, Callee)); + JumpTarget = DAG.getExternalSymbol( + mips16HelperFunction, getPointerTy()); + JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); + } + else { + RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - if (Subtarget->inMips16Mode()) - T9 = DAG.getRegister(T9Reg, getPointerTy()); - else - Callee = DAG.getRegister(T9Reg, getPointerTy()); + if (!Subtarget->inMips16Mode()) + JumpTarget = SDValue(); + } } // Insert node "GP copy globalreg" before call to function. @@ -2962,6 +3544,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emitted instructions must be // stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); @@ -2973,9 +3557,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - SmallVector<SDValue, 8> Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); + SmallVector<SDValue, 8> Ops(1, Chain); + + if (JumpTarget.getNode()) + Ops.push_back(JumpTarget); // Add argument registers to the end of the list so that they are // known live into the call. @@ -2983,10 +3568,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - // Add T9 register operand. - if (T9.getNode()) - Ops.push_back(T9); - // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); @@ -3065,7 +3646,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo); + MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); MipsCCInfo.analyzeFormalArguments(Ins); MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(), @@ -3225,15 +3806,8 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_Mips); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -3242,9 +3816,9 @@ MipsTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); - // guarantee that all emitted copies are - // stuck together, avoiding something bad + // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } // The mips ABIs for returning structs by value requires that we copy @@ -3263,15 +3837,17 @@ MipsTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, V0, Val, Flag); Flag = Chain.getValue(1); - MF.getRegInfo().addLiveOut(V0); + RetOps.push_back(DAG.getRegister(V0, getPointerTy())); } - // Return on Mips is always a "jr $ra" + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain, Flag); + RetOps.push_back(Flag); - // Return Void - return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain); + // Return on Mips is always a "jr $ra" + return DAG.getNode(MipsISD::Ret, dl, MVT::Other, &RetOps[0], RetOps.size()); } //===----------------------------------------------------------------------===// @@ -3552,40 +4128,21 @@ unsigned MipsTargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } -MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CallConv, bool IsVarArg, - bool IsO32, CCState &Info) : CCInfo(Info) { - UseRegsForByval = true; - - if (IsO32) { - RegSize = 4; - NumIntArgRegs = array_lengthof(O32IntRegs); - ReservedArgArea = 16; - IntArgRegs = ShadowRegs = O32IntRegs; - FixedFn = VarFn = CC_MipsO32; - } else { - RegSize = 8; - NumIntArgRegs = array_lengthof(Mips64IntRegs); - ReservedArgArea = 0; - IntArgRegs = Mips64IntRegs; - ShadowRegs = Mips64DPRegs; - FixedFn = CC_MipsN; - VarFn = CC_MipsN_VarArg; - } - - if (CallConv == CallingConv::Fast) { - assert(!IsVarArg); - UseRegsForByval = false; - ReservedArgArea = 0; - FixedFn = VarFn = CC_Mips_FastCC; - } - +MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CC, bool IsO32_, + CCState &Info) + : CCInfo(Info), CallConv(CC), IsO32(IsO32_) { // Pre-allocate reserved argument area. - CCInfo.AllocateStack(ReservedArgArea, 1); + CCInfo.AllocateStack(reservedArgArea(), 1); } void MipsTargetLowering::MipsCC:: -analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) { +analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args, + bool IsVarArg) { + assert((CallConv != CallingConv::Fast || !IsVarArg) && + "CallingConv::Fast shouldn't be used for vararg functions."); + unsigned NumOpnds = Args.size(); + llvm::CCAssignFn *FixedFn = fixedArgFn(), *VarFn = varArgFn(); for (unsigned I = 0; I != NumOpnds; ++I) { MVT ArgVT = Args[I].VT; @@ -3597,10 +4154,10 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) { continue; } - if (Args[I].IsFixed) - R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - else + if (IsVarArg && !Args[I].IsFixed) R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + else + R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); if (R) { #ifndef NDEBUG @@ -3615,6 +4172,7 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) { void MipsTargetLowering::MipsCC:: analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) { unsigned NumArgs = Args.size(); + llvm::CCAssignFn *FixedFn = fixedArgFn(); for (unsigned I = 0; I != NumArgs; ++I) { MVT ArgVT = Args[I].VT; @@ -3644,11 +4202,12 @@ MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT, assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0."); struct ByValArgInfo ByVal; + unsigned RegSize = regSize(); unsigned ByValSize = RoundUpToAlignment(ArgFlags.getByValSize(), RegSize); unsigned Align = std::min(std::max(ArgFlags.getByValAlign(), RegSize), RegSize * 2); - if (UseRegsForByval) + if (useRegsForByval()) allocateRegs(ByVal, ByValSize, Align); // Allocate space on caller's stack. @@ -3659,9 +4218,38 @@ MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT, ByValArgs.push_back(ByVal); } +unsigned MipsTargetLowering::MipsCC::numIntArgRegs() const { + return IsO32 ? array_lengthof(O32IntRegs) : array_lengthof(Mips64IntRegs); +} + +unsigned MipsTargetLowering::MipsCC::reservedArgArea() const { + return (IsO32 && (CallConv != CallingConv::Fast)) ? 16 : 0; +} + +const uint16_t *MipsTargetLowering::MipsCC::intArgRegs() const { + return IsO32 ? O32IntRegs : Mips64IntRegs; +} + +llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const { + if (CallConv == CallingConv::Fast) + return CC_Mips_FastCC; + + return IsO32 ? CC_MipsO32 : CC_MipsN; +} + +llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const { + return IsO32 ? CC_MipsO32 : CC_MipsN_VarArg; +} + +const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const { + return IsO32 ? O32IntRegs : Mips64DPRegs; +} + void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize, unsigned Align) { + unsigned RegSize = regSize(), NumIntArgRegs = numIntArgRegs(); + const uint16_t *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs(); assert(!(ByValSize % RegSize) && !(Align % RegSize) && "Byval argument's size and alignment should be a multiple of" "RegSize."); @@ -3726,7 +4314,7 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains, // Copy byVal arg to registers and stack. void MipsTargetLowering:: passByValArg(SDValue Chain, DebugLoc DL, - SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const MipsCC &CC, const ByValArgInfo &ByVal, diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index c4b38c6..f0f3782 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -20,6 +20,8 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" +#include <deque> +#include <string> namespace llvm { namespace MipsISD { @@ -63,6 +65,8 @@ namespace llvm { // Return Ret, + EH_RETURN, + // MAdd/Sub nodes MAdd, MAddu, @@ -174,8 +178,16 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; private: + void SetMips16LibcallName(RTLIB::Libcall, const char *Name); + void setMips16HardFloatLibCalls(); + unsigned int + getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + + const char *getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + /// ByValArgInfo - Byval argument information. struct ByValArgInfo { unsigned FirstIdx; // Index of the first register used. @@ -189,53 +201,57 @@ namespace llvm { /// arguments and inquire about calling convention information. class MipsCC { public: - MipsCC(CallingConv::ID CallConv, bool IsVarArg, bool IsO32, - CCState &Info); + MipsCC(CallingConv::ID CallConv, bool IsO32, CCState &Info); - void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs); + void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + bool IsVarArg); void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins); - void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags); - const CCState &getCCInfo() const { return CCInfo; } /// hasByValArg - Returns true if function has byval arguments. bool hasByValArg() const { return !ByValArgs.empty(); } - /// useRegsForByval - Returns true if the calling convention allows the - /// use of registers to pass byval arguments. - bool useRegsForByval() const { return UseRegsForByval; } - /// regSize - Size (in number of bits) of integer registers. - unsigned regSize() const { return RegSize; } + unsigned regSize() const { return IsO32 ? 4 : 8; } /// numIntArgRegs - Number of integer registers available for calls. - unsigned numIntArgRegs() const { return NumIntArgRegs; } + unsigned numIntArgRegs() const; /// reservedArgArea - The size of the area the caller reserves for /// register arguments. This is 16-byte if ABI is O32. - unsigned reservedArgArea() const { return ReservedArgArea; } + unsigned reservedArgArea() const; - /// intArgRegs - Pointer to array of integer registers. - const uint16_t *intArgRegs() const { return IntArgRegs; } + /// Return pointer to array of integer argument registers. + const uint16_t *intArgRegs() const; typedef SmallVector<ByValArgInfo, 2>::const_iterator byval_iterator; byval_iterator byval_begin() const { return ByValArgs.begin(); } byval_iterator byval_end() const { return ByValArgs.end(); } private: + void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags); + + /// useRegsForByval - Returns true if the calling convention allows the + /// use of registers to pass byval arguments. + bool useRegsForByval() const { return CallConv != CallingConv::Fast; } + + /// Return the function that analyzes fixed argument list functions. + llvm::CCAssignFn *fixedArgFn() const; + + /// Return the function that analyzes variable argument list functions. + llvm::CCAssignFn *varArgFn() const; + + const uint16_t *shadowRegs() const; + void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize, unsigned Align); CCState &CCInfo; - bool UseRegsForByval; - unsigned RegSize; - unsigned NumIntArgRegs; - unsigned ReservedArgArea; - const uint16_t *IntArgRegs, *ShadowRegs; + CallingConv::ID CallConv; + bool IsO32; SmallVector<ByValArgInfo, 2> ByValArgs; - llvm::CCAssignFn *FixedFn, *VarFn; }; // Subtarget Info @@ -265,6 +281,7 @@ namespace llvm { SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; @@ -294,7 +311,7 @@ namespace llvm { /// passByValArg - Pass a byval argument in registers or on stack. void passByValArg(SDValue Chain, DebugLoc DL, - SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const MipsCC &CC, const ByValArgInfo &ByVal, @@ -387,6 +404,28 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const; + MachineBasicBlock *EmitSel16(unsigned Opc, MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitSeliT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitSelT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const; + }; } diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index ab6f8ab..891bdc1 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -107,7 +107,8 @@ multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm, class ABSS_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC, InstrItinClass Itin, SDPatternOperator OpNode= null_frag> : InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"), - [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>; + [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>, + NeverHasSideEffects; multiclass ABSS_M<string opstr, InstrItinClass Itin, SDPatternOperator OpNode= null_frag> { @@ -138,17 +139,27 @@ class MTC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC, InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>; +class MFC1_FT_CCR<string opstr, RegisterClass DstRC, RegisterOperand SrcRC, + InstrItinClass Itin, SDPatternOperator OpNode= null_frag> : + InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"), + [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>; + +class MTC1_FT_CCR<string opstr, RegisterOperand DstRC, RegisterClass SrcRC, + InstrItinClass Itin, SDPatternOperator OpNode= null_frag> : + InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), + [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>; + class LW_FT<string opstr, RegisterClass RC, InstrItinClass Itin, Operand MemOpnd, SDPatternOperator OpNode= null_frag> : InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(set RC:$rt, (OpNode addr:$addr))], Itin, FrmFI> { + [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> { let DecoderMethod = "DecodeFMem"; } class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin, Operand MemOpnd, SDPatternOperator OpNode= null_frag> : InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RC:$rt, addr:$addr)], Itin, FrmFI> { + [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> { let DecoderMethod = "DecodeFMem"; } @@ -169,13 +180,17 @@ class LWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC, InstrItinClass Itin, SDPatternOperator OpNode = null_frag> : InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index), !strconcat(opstr, "\t$fd, ${index}(${base})"), - [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI>; + [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI> { + let AddedComplexity = 20; +} class SWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC, InstrItinClass Itin, SDPatternOperator OpNode = null_frag> : InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index), !strconcat(opstr, "\t$fs, ${index}(${base})"), - [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI>; + [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI> { + let AddedComplexity = 20; +} class BC1F_FT<string opstr, InstrItinClass Itin, SDPatternOperator Op = null_frag> : @@ -203,15 +218,13 @@ def ROUND_W_S : ABSS_FT<"round.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xc, 16>; def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xd, 16>; def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xe, 16>; def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xf, 16>; -def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>, - NeverHasSideEffects; +def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>; defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>; defm TRUNC_W : ROUND_M<"trunc.w.d", IIFcvt>, ABSS_FM<0xd, 17>; defm CEIL_W : ROUND_M<"ceil.w.d", IIFcvt>, ABSS_FM<0xe, 17>; defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>; -defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>, - NeverHasSideEffects; +defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>; let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { def ROUND_L_S : ABSS_FT<"round.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x8, 16>; @@ -228,19 +241,16 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { } def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32, FGR32, IIFcvt>, ABSS_FM<0x20, 20>; -def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>, - NeverHasSideEffects; -def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>, - NeverHasSideEffects; +def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>; +def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>; -let Predicates = [NotFP64bit, HasStdEnc], neverHasSideEffects = 1 in { +let Predicates = [NotFP64bit, HasStdEnc] in { def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32, AFGR64, IIFcvt>, ABSS_FM<0x20, 17>; def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>; def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>; } -let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64", - neverHasSideEffects = 1 in { +let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 17>; def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 21>; def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>; @@ -265,8 +275,8 @@ defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; // regardless of register aliasing. /// Move Control Registers From/To CPU Registers -def CFC1 : MFC1_FT<"cfc1", CPURegs, CCR, IIFmove>, MFC1_FM<2>; -def CTC1 : MTC1_FT<"ctc1", CCR, CPURegs, IIFmove>, MFC1_FM<6>; +def CFC1 : MFC1_FT_CCR<"cfc1", CPURegs, CCROpnd, IIFmove>, MFC1_FM<2>; +def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegs, IIFmove>, MFC1_FM<6>; def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmove, bitconvert>, MFC1_FM<0>; def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmove, bitconvert>, MFC1_FM<4>; def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmove, bitconvert>, MFC1_FM<1>; @@ -437,7 +447,7 @@ def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>, //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions //===----------------------------------------------------------------------===// -def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src), []>; +def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCROpnd:$src), []>; // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. @@ -492,3 +502,33 @@ let Predicates = [IsFP64bit, HasStdEnc] in { def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; } + +// Load/Store patterns. +let AddedComplexity = 40 in { + let Predicates = [IsN64, HasStdEnc] in { + def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1_P8 addrRegImm:$a)>; + def : MipsPat<(store FGR32:$v, addrRegImm:$a), + (SWC1_P8 FGR32:$v, addrRegImm:$a)>; + def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164_P8 addrRegImm:$a)>; + def : MipsPat<(store FGR64:$v, addrRegImm:$a), + (SDC164_P8 FGR64:$v, addrRegImm:$a)>; + } + + let Predicates = [NotN64, HasStdEnc] in { + def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1 addrRegImm:$a)>; + def : MipsPat<(store FGR32:$v, addrRegImm:$a), + (SWC1 FGR32:$v, addrRegImm:$a)>; + } + + let Predicates = [NotN64, HasMips64, HasStdEnc] in { + def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164 addrRegImm:$a)>; + def : MipsPat<(store FGR64:$v, addrRegImm:$a), + (SDC164 FGR64:$v, addrRegImm:$a)>; + } + + let Predicates = [NotN64, NotMips64, HasStdEnc] in { + def : MipsPat<(f64 (load addrRegImm:$a)), (LDC1 addrRegImm:$a)>; + def : MipsPat<(store AFGR64:$v, addrRegImm:$a), + (SDC1 AFGR64:$v, addrRegImm:$a)>; + } +} diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index c026b5d..ee432c8 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -366,13 +366,8 @@ class LUI_FM { let Inst{15-0} = imm16; } -class NOP_FM { - bits<32> Inst; - - let Inst{31-0} = 0; -} - class JALR_FM { + bits<5> rd; bits<5> rs; bits<32> Inst; @@ -380,7 +375,7 @@ class JALR_FM { let Inst{31-26} = 0; let Inst{25-21} = rs; let Inst{20-16} = 0; - let Inst{15-11} = 31; + let Inst{15-11} = rd; let Inst{10-6} = 0; let Inst{5-0} = 9; } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 8f2ce6f..de09c9e 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -72,7 +72,8 @@ def MipsTprelLo : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>; def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>; // Return -def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; +def MipsRet : SDNode<"MipsISD::Ret", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, @@ -232,6 +233,10 @@ def calltarget64: Operand<i64>; def simm16 : Operand<i32> { let DecoderMethod= "DecodeSimm16"; } + +def simm20 : Operand<i32> { +} + def simm16_64 : Operand<i64>; def shamt : Operand<i32>; @@ -296,6 +301,10 @@ def HI16 : SDNodeXForm<imm, [{ // Node immediate fits as 16-bit sign extended on target immediate. // e.g. addi, andi +def immSExt8 : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>; + +// Node immediate fits as 16-bit sign extended on target immediate. +// e.g. addi, andi def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; // Node immediate fits as 15-bit sign extended on target immediate. @@ -325,19 +334,25 @@ def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>; // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. def addr : - ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], [SDNPWantParent]>; + ComplexPattern<iPTR, 2, "selectIntAddr", [frameindex]>; + +def addrRegImm : + ComplexPattern<iPTR, 2, "selectAddrRegImm", [frameindex]>; + +def addrDefault : + ComplexPattern<iPTR, 2, "selectAddrDefault", [frameindex]>; //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// // Arithmetic and logical instructions with 3 register operands. -class ArithLogicR<string opstr, RegisterClass RC, bit isComm = 0, +class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0, InstrItinClass Itin = NoItinerary, SDPatternOperator OpNode = null_frag>: - InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt), + InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), - [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], Itin, FrmR> { + [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> { let isCommutable = isComm; let isReMaterializable = 1; string BaseOpcode; @@ -345,27 +360,27 @@ class ArithLogicR<string opstr, RegisterClass RC, bit isComm = 0, } // Arithmetic and logical instructions with 2 register operands. -class ArithLogicI<string opstr, Operand Od, RegisterClass RC, +class ArithLogicI<string opstr, Operand Od, RegisterOperand RO, SDPatternOperator imm_type = null_frag, SDPatternOperator OpNode = null_frag> : - InstSE<(outs RC:$rt), (ins RC:$rs, Od:$imm16), + InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16), !strconcat(opstr, "\t$rt, $rs, $imm16"), - [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu, FrmI> { + [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> { let isReMaterializable = 1; } // Arithmetic Multiply ADD/SUB class MArithR<string opstr, SDPatternOperator op = null_frag, bit isComm = 0> : - InstSE<(outs), (ins CPURegs:$rs, CPURegs:$rt), + InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt), !strconcat(opstr, "\t$rs, $rt"), - [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul, FrmR> { + [(op CPURegsOpnd:$rs, CPURegsOpnd:$rt, LO, HI)], IIImul, FrmR> { let Defs = [HI, LO]; let Uses = [HI, LO]; let isCommutable = isComm; } // Logical -class LogicNOR<string opstr, RegisterClass RC>: +class LogicNOR<string opstr, RegisterOperand RC>: InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> { @@ -374,17 +389,17 @@ class LogicNOR<string opstr, RegisterClass RC>: // Shifts class shift_rotate_imm<string opstr, Operand ImmOpnd, - RegisterClass RC, SDPatternOperator OpNode = null_frag, + RegisterOperand RC, SDPatternOperator OpNode = null_frag, SDPatternOperator PF = null_frag> : InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt), !strconcat(opstr, "\t$rd, $rt, $shamt"), [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>; -class shift_rotate_reg<string opstr, RegisterClass RC, +class shift_rotate_reg<string opstr, RegisterOperand RC, SDPatternOperator OpNode = null_frag>: - InstSE<(outs RC:$rd), (ins CPURegs:$rs, RC:$rt), + InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt), !strconcat(opstr, "\t$rd, $rt, $rs"), - [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs))], IIAlu, FrmR>; + [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>; // Load Upper Imediate class LoadUpper<string opstr, RegisterClass RC, Operand Imm>: @@ -498,15 +513,16 @@ class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> : // SetCC class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> : - InstSE<(outs CPURegs:$rd), (ins RC:$rs, RC:$rt), + InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), - [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>; + [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>; class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type, RegisterClass RC>: - InstSE<(outs CPURegs:$rt), (ins RC:$rs, Od:$imm16), + InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16), !strconcat(opstr, "\t$rt, $rs, $imm16"), - [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))], IIAlu, FrmI>; + [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))], + IIAlu, FrmI>; // Jump class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator, @@ -559,12 +575,17 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in { let DecoderMethod = "DecodeJumpTarget"; } + class JumpLinkRegPseudo<RegisterClass RC, Instruction JALRInst, + Register RetReg>: + PseudoSE<(outs), (ins RC:$rs), [(MipsJmpLink RC:$rs)], IIBranch>, + PseudoInstExpansion<(JALRInst RetReg, RC:$rs)>; + class JumpLinkReg<string opstr, RegisterClass RC>: - InstSE<(outs), (ins RC:$rs), !strconcat(opstr, "\t$rs"), - [(MipsJmpLink RC:$rs)], IIBranch, FrmR>; + InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"), + [], IIBranch, FrmR>; - class BGEZAL_FT<string opstr, RegisterClass RC> : - InstSE<(outs), (ins RC:$rs, brtarget:$offset), + class BGEZAL_FT<string opstr, RegisterOperand RO> : + InstSE<(outs), (ins RO:$rs, brtarget:$offset), !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI>; } @@ -585,19 +606,19 @@ class SYNC_FT : NoItinerary, FrmOther>; // Mul, Div -class Mult<string opstr, InstrItinClass itin, RegisterClass RC, +class Mult<string opstr, InstrItinClass itin, RegisterOperand RO, list<Register> DefRegs> : - InstSE<(outs), (ins RC:$rs, RC:$rt), !strconcat(opstr, "\t$rs, $rt"), [], + InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [], itin, FrmR> { let isCommutable = 1; let Defs = DefRegs; let neverHasSideEffects = 1; } -class Div<SDNode op, string opstr, InstrItinClass itin, RegisterClass RC, +class Div<SDNode op, string opstr, InstrItinClass itin, RegisterOperand RO, list<Register> DefRegs> : - InstSE<(outs), (ins RC:$rs, RC:$rt), - !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RC:$rs, RC:$rt)], itin, + InstSE<(outs), (ins RO:$rs, RO:$rt), + !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RO:$rs, RO:$rt)], itin, FrmR> { let Defs = DefRegs; } @@ -623,14 +644,14 @@ class EffectiveAddress<string opstr, RegisterClass RC, Operand Mem> : } // Count Leading Ones/Zeros in Word -class CountLeading0<string opstr, RegisterClass RC>: - InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"), - [(set RC:$rd, (ctlz RC:$rs))], IIAlu, FrmR>, +class CountLeading0<string opstr, RegisterOperand RO>: + InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), + [(set RO:$rd, (ctlz RO:$rs))], IIAlu, FrmR>, Requires<[HasBitCount, HasStdEnc]>; -class CountLeading1<string opstr, RegisterClass RC>: - InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"), - [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu, FrmR>, +class CountLeading1<string opstr, RegisterOperand RO>: + InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), + [(set RO:$rd, (ctlz (not RO:$rs)))], IIAlu, FrmR>, Requires<[HasBitCount, HasStdEnc]>; @@ -642,31 +663,31 @@ class SignExtInReg<string opstr, ValueType vt, RegisterClass RC> : } // Subword Swap -class SubwordSwap<string opstr, RegisterClass RC>: - InstSE<(outs RC:$rd), (ins RC:$rt), !strconcat(opstr, "\t$rd, $rt"), [], +class SubwordSwap<string opstr, RegisterOperand RO>: + InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [], NoItinerary, FrmR> { let Predicates = [HasSwap, HasStdEnc]; let neverHasSideEffects = 1; } // Read Hardware -class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass> : - InstSE<(outs CPURegClass:$rt), (ins HWRegClass:$rd), "rdhwr\t$rt, $rd", [], +class ReadHardware<RegisterClass CPURegClass, RegisterOperand RO> : + InstSE<(outs CPURegClass:$rt), (ins RO:$rd), "rdhwr\t$rt, $rd", [], IIAlu, FrmR>; // Ext and Ins -class ExtBase<string opstr, RegisterClass RC>: - InstSE<(outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$size), +class ExtBase<string opstr, RegisterOperand RO>: + InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size), !strconcat(opstr, " $rt, $rs, $pos, $size"), - [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$size))], NoItinerary, + [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary, FrmR> { let Predicates = [HasMips32r2, HasStdEnc]; } -class InsBase<string opstr, RegisterClass RC>: - InstSE<(outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ins:$size, RC:$src), +class InsBase<string opstr, RegisterOperand RO>: + InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src), !strconcat(opstr, " $rt, $rs, $pos, $size"), - [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$size, RC:$src))], + [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))], NoItinerary, FrmR> { let Predicates = [HasMips32r2, HasStdEnc]; let Constraints = "$src = $rt"; @@ -699,15 +720,15 @@ multiclass AtomicCmpSwap32<PatFrag Op> { } } -class LLBase<string opstr, RegisterClass RC, Operand Mem> : - InstSE<(outs RC:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"), +class LLBase<string opstr, RegisterOperand RO, Operand Mem> : + InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; let mayLoad = 1; } -class SCBase<string opstr, RegisterClass RC, Operand Mem> : - InstSE<(outs RC:$dst), (ins RC:$rt, Mem:$addr), +class SCBase<string opstr, RegisterOperand RO, Operand Mem> : + InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr), !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; let mayStore = 1; @@ -769,42 +790,48 @@ let usesCustomInserter = 1 in { //===----------------------------------------------------------------------===// /// Arithmetic Instructions (ALU Immediate) -def ADDiu : ArithLogicI<"addiu", simm16, CPURegs, immSExt16, add>, +def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>, ADDI_FM<0x9>, IsAsCheapAsAMove; -def ADDi : ArithLogicI<"addi", simm16, CPURegs>, ADDI_FM<0x8>; +def ADDi : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>; def SLTi : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>; def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>; -def ANDi : ArithLogicI<"andi", uimm16, CPURegs, immZExt16, and>, ADDI_FM<0xc>; -def ORi : ArithLogicI<"ori", uimm16, CPURegs, immZExt16, or>, ADDI_FM<0xd>; -def XORi : ArithLogicI<"xori", uimm16, CPURegs, immZExt16, xor>, ADDI_FM<0xe>; +def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>, + ADDI_FM<0xc>; +def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>, + ADDI_FM<0xd>; +def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>, + ADDI_FM<0xe>; def LUi : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM; /// Arithmetic Instructions (3-Operand, R-Type) -def ADDu : ArithLogicR<"addu", CPURegs, 1, IIAlu, add>, ADD_FM<0, 0x21>; -def SUBu : ArithLogicR<"subu", CPURegs, 0, IIAlu, sub>, ADD_FM<0, 0x23>; -def MUL : ArithLogicR<"mul", CPURegs, 1, IIImul, mul>, ADD_FM<0x1c, 2>; -def ADD : ArithLogicR<"add", CPURegs>, ADD_FM<0, 0x20>; -def SUB : ArithLogicR<"sub", CPURegs>, ADD_FM<0, 0x22>; +def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>; +def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>; +def MUL : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>; +def ADD : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>; +def SUB : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>; def SLT : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>; def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>; -def AND : ArithLogicR<"and", CPURegs, 1, IIAlu, and>, ADD_FM<0, 0x24>; -def OR : ArithLogicR<"or", CPURegs, 1, IIAlu, or>, ADD_FM<0, 0x25>; -def XOR : ArithLogicR<"xor", CPURegs, 1, IIAlu, xor>, ADD_FM<0, 0x26>; -def NOR : LogicNOR<"nor", CPURegs>, ADD_FM<0, 0x27>; +def AND : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>; +def OR : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>; +def XOR : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>; +def NOR : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>; /// Shift Instructions -def SLL : shift_rotate_imm<"sll", shamt, CPURegs, shl, immZExt5>, SRA_FM<0, 0>; -def SRL : shift_rotate_imm<"srl", shamt, CPURegs, srl, immZExt5>, SRA_FM<2, 0>; -def SRA : shift_rotate_imm<"sra", shamt, CPURegs, sra, immZExt5>, SRA_FM<3, 0>; -def SLLV : shift_rotate_reg<"sllv", CPURegs, shl>, SRLV_FM<4, 0>; -def SRLV : shift_rotate_reg<"srlv", CPURegs, srl>, SRLV_FM<6, 0>; -def SRAV : shift_rotate_reg<"srav", CPURegs, sra>, SRLV_FM<7, 0>; +def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>, + SRA_FM<0, 0>; +def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>, + SRA_FM<2, 0>; +def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>, + SRA_FM<3, 0>; +def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>; +def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>; +def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>; // Rotate Instructions let Predicates = [HasMips32r2, HasStdEnc] in { - def ROTR : shift_rotate_imm<"rotr", shamt, CPURegs, rotr, immZExt5>, + def ROTR : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>, SRA_FM<2, 1>; - def ROTRV : shift_rotate_reg<"rotrv", CPURegs, rotr>, SRLV_FM<6, 1>; + def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>; } /// Load and Store Instructions @@ -828,13 +855,13 @@ def SYNC : SYNC_FT, SYNC_FM; /// Load-linked, Store-conditional let Predicates = [NotN64, HasStdEnc] in { - def LL : LLBase<"ll", CPURegs, mem>, LW_FM<0x30>; - def SC : SCBase<"sc", CPURegs, mem>, LW_FM<0x38>; + def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>; + def SC : SCBase<"sc", CPURegsOpnd, mem>, LW_FM<0x38>; } let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LL_P8 : LLBase<"ll", CPURegs, mem64>, LW_FM<0x30>; - def SC_P8 : SCBase<"sc", CPURegs, mem64>, LW_FM<0x38>; + def LL_P8 : LLBase<"ll", CPURegsOpnd, mem64>, LW_FM<0x30>; + def SC_P8 : SCBase<"sc", CPURegsOpnd, mem64>, LW_FM<0x38>; } /// Jump and Branch Instructions @@ -853,18 +880,41 @@ def BAL_BR: BAL_FT, BAL_FM; def JAL : JumpLink<"jal">, FJ<3>; def JALR : JumpLinkReg<"jalr", CPURegs>, JALR_FM; -def BGEZAL : BGEZAL_FT<"bgezal", CPURegs>, BGEZAL_FM<0x11>; -def BLTZAL : BGEZAL_FT<"bltzal", CPURegs>, BGEZAL_FM<0x10>; +def JALRPseudo : JumpLinkRegPseudo<CPURegs, JALR, RA>; +def BGEZAL : BGEZAL_FT<"bgezal", CPURegsOpnd>, BGEZAL_FM<0x11>; +def BLTZAL : BGEZAL_FT<"bltzal", CPURegsOpnd>, BGEZAL_FM<0x10>; def TAILCALL : JumpFJ<calltarget, "j", MipsTailCall, imm>, FJ<2>, IsTailCall; def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, MTLO_FM<8>, IsTailCall; def RET : RetBase<CPURegs>, MTLO_FM<8>; +// Exception handling related node and instructions. +// The conversion sequence is: +// ISD::EH_RETURN -> MipsISD::EH_RETURN -> +// MIPSeh_return -> (stack change + indirect branch) +// +// MIPSeh_return takes the place of regular return instruction +// but takes two arguments (V1, V0) which are used for storing +// the offset and return address respectively. +def SDT_MipsEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>; + +def MIPSehret : SDNode<"MipsISD::EH_RETURN", SDT_MipsEHRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in { + def MIPSeh_return32 : MipsPseudo<(outs), (ins CPURegs:$spoff, CPURegs:$dst), + [(MIPSehret CPURegs:$spoff, CPURegs:$dst)]>; + def MIPSeh_return64 : MipsPseudo<(outs), (ins CPU64Regs:$spoff, + CPU64Regs:$dst), + [(MIPSehret CPU64Regs:$spoff, CPU64Regs:$dst)]>; +} + /// Multiply and Divide Instructions. -def MULT : Mult<"mult", IIImul, CPURegs, [HI, LO]>, MULT_FM<0, 0x18>; -def MULTu : Mult<"multu", IIImul, CPURegs, [HI, LO]>, MULT_FM<0, 0x19>; -def SDIV : Div<MipsDivRem, "div", IIIdiv, CPURegs, [HI, LO]>, MULT_FM<0, 0x1a>; -def UDIV : Div<MipsDivRemU, "divu", IIIdiv, CPURegs, [HI, LO]>, +def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>; +def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>; +def SDIV : Div<MipsDivRem, "div", IIIdiv, CPURegsOpnd, [HI, LO]>, + MULT_FM<0, 0x1a>; +def UDIV : Div<MipsDivRemU, "divu", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1b>; def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>; @@ -877,15 +927,14 @@ def SEB : SignExtInReg<"seb", i8, CPURegs>, SEB_FM<0x10, 0x20>; def SEH : SignExtInReg<"seh", i16, CPURegs>, SEB_FM<0x18, 0x20>; /// Count Leading -def CLZ : CountLeading0<"clz", CPURegs>, CLO_FM<0x20>; -def CLO : CountLeading1<"clo", CPURegs>, CLO_FM<0x21>; +def CLZ : CountLeading0<"clz", CPURegsOpnd>, CLO_FM<0x20>; +def CLO : CountLeading1<"clo", CPURegsOpnd>, CLO_FM<0x21>; /// Word Swap Bytes Within Halfwords -def WSBH : SubwordSwap<"wsbh", CPURegs>, SEB_FM<2, 0x20>; +def WSBH : SubwordSwap<"wsbh", CPURegsOpnd>, SEB_FM<2, 0x20>; /// No operation. -/// FIXME: NOP should be an alias of "sll $0, $0, 0". -def NOP : InstSE<(outs), (ins), "nop", [], IIAlu, FrmJ>, NOP_FM; +def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; // FrameIndexes are legalized when they are operands from load/store // instructions. The same not happens for stack address copies, so an @@ -899,66 +948,86 @@ def MADDU : MArithR<"maddu", MipsMAddu, 1>, MULT_FM<0x1c, 1>; def MSUB : MArithR<"msub", MipsMSub>, MULT_FM<0x1c, 4>; def MSUBU : MArithR<"msubu", MipsMSubu>, MULT_FM<0x1c, 5>; -def RDHWR : ReadHardware<CPURegs, HWRegs>, RDHWR_FM; +def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM; -def EXT : ExtBase<"ext", CPURegs>, EXT_FM<0>; -def INS : InsBase<"ins", CPURegs>, EXT_FM<4>; +def EXT : ExtBase<"ext", CPURegsOpnd>, EXT_FM<0>; +def INS : InsBase<"ins", CPURegsOpnd>, EXT_FM<4>; /// Move Control Registers From/To CPU Registers -def MFC0_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel), +def MFC0_3OP : MFC3OP<(outs CPURegsOpnd:$rt), + (ins CPURegsOpnd:$rd, uimm16:$sel), "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>; -def MTC0_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt), +def MTC0_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel), + (ins CPURegsOpnd:$rt), "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>; -def MFC2_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel), +def MFC2_3OP : MFC3OP<(outs CPURegsOpnd:$rt), + (ins CPURegsOpnd:$rd, uimm16:$sel), "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>; -def MTC2_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt), +def MTC2_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel), + (ins CPURegsOpnd:$rt), "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>; //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>; -def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>; -def : InstAlias<"addu $rs,$rt,$imm", - (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"add $rs,$rt,$imm", - (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"and $rs,$rt,$imm", - (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"j $rs", (JR CPURegs:$rs)>; -def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>; -def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>; -def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>; -def : InstAlias<"slt $rs,$rt,$imm", - (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"xor $rs,$rt,$imm", - (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>; -def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>; -def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>; -def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>; +def : InstAlias<"move $dst, $src", + (ADDu CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>, + Requires<[NotMips64]>; +def : InstAlias<"move $dst, $src", + (OR CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 0>, + Requires<[NotMips64]>; +def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset), 1>; +def : InstAlias<"addu $rs, $rt, $imm", + (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>; +def : InstAlias<"add $rs, $rt, $imm", + (ADDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>; +def : InstAlias<"and $rs, $rt, $imm", + (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>; +def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>, + Requires<[NotMips64]>; +def : InstAlias<"jalr $rs", (JALR RA, CPURegs:$rs)>, Requires<[NotMips64]>; +def : InstAlias<"not $rt, $rs", + (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>; +def : InstAlias<"neg $rt, $rs", + (SUB CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>; +def : InstAlias<"negu $rt, $rs", + (SUBu CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>; +def : InstAlias<"slt $rs, $rt, $imm", + (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>; +def : InstAlias<"xor $rs, $rt, $imm", + (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>, + Requires<[NotMips64]>; +def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>; +def : InstAlias<"mfc0 $rt, $rd", + (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>; +def : InstAlias<"mtc0 $rt, $rd", + (MTC0_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>; +def : InstAlias<"mfc2 $rt, $rd", + (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>; +def : InstAlias<"mtc2 $rt, $rd", + (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// -class LoadImm32< string instr_asm, Operand Od, RegisterClass RC> : - MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32), +class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> : + MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadImm32Reg : LoadImm32<"li", shamt,CPURegs>; +def LoadImm32Reg : LoadImm32<"li", shamt,CPURegsOpnd>; -class LoadAddress<string instr_asm, Operand MemOpnd, RegisterClass RC> : - MipsAsmPseudoInst<(outs RC:$rt), (ins MemOpnd:$addr), +class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> : + MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr), !strconcat(instr_asm, "\t$rt, $addr")> ; -def LoadAddr32Reg : LoadAddress<"la", mem, CPURegs>; +def LoadAddr32Reg : LoadAddress<"la", mem, CPURegsOpnd>; -class LoadAddressImm<string instr_asm, Operand Od, RegisterClass RC> : - MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32), +class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> : + MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegs>; +def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>; @@ -1045,7 +1114,7 @@ def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>; // Mips does not have "not", so we expand our way def : MipsPat<(not CPURegs:$in), - (NOR CPURegs:$in, ZERO)>; + (NOR CPURegsOpnd:$in, ZERO)>; // extended loads let Predicates = [NotN64, HasStdEnc] in { diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 30f68b1..2efe534 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -10,10 +10,10 @@ // This pass expands a branch or jump instruction into a long branch if its // offset is too large to fit into its immediate field. // -// FIXME: -// 1. Fix pc-region jump instructions which cross 256MB segment boundaries. +// FIXME: +// 1. Fix pc-region jump instructions which cross 256MB segment boundaries. // 2. If program has inline assembly statements whose size cannot be -// determined accurately, load branch target addresses from the GOT. +// determined accurately, load branch target addresses from the GOT. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-long-branch" diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 0c71596..59b23f7 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -56,4 +56,20 @@ unsigned MipsFunctionInfo::getMips16SPAliasReg() { return Mips16SPAliasReg = MF.getRegInfo().createVirtualRegister(RC); } +void MipsFunctionInfo::createEhDataRegsFI() { + for (int I = 0; I < 4; ++I) { + const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>(); + const TargetRegisterClass *RC = ST.isABI_N64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + + EhDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment(), false); + } +} + +bool MipsFunctionInfo::isEhDataRegFI(int FI) const { + return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1] + || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]); +} + void MipsFunctionInfo::anchor() { } diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index eb6e1cf..b05b348 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -53,10 +53,16 @@ class MipsFunctionInfo : public MachineFunctionInfo { /// Size of incoming argument area. unsigned IncomingArgSize; + /// CallsEhReturn - Whether the function calls llvm.eh.return. + bool CallsEhReturn; + + /// Frame objects for spilling eh data registers. + int EhDataRegFI[4]; + public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0), - VarArgsFrameIndex(0) + VarArgsFrameIndex(0), CallsEhReturn(false) {} unsigned getSRetReturnReg() const { return SRetReturnReg; } @@ -78,6 +84,14 @@ public: } unsigned getIncomingArgSize() const { return IncomingArgSize; } + + bool callsEhReturn() const { return CallsEhReturn; } + void setCallsEhReturn() { CallsEhReturn = true; } + + void createEhDataRegsFI(); + int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; } + bool isEhDataRegFI(int FI) const; + }; } // end of namespace llvm diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 70eb6f3..3250733 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -47,6 +47,28 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST) unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } + +unsigned +MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + switch (RC->getID()) { + default: + return 0; + case Mips::CPURegsRegClassID: + case Mips::CPU64RegsRegClassID: + case Mips::DSPRegsRegClassID: { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + return 28 - TFI->hasFP(MF); + } + case Mips::FGR32RegClassID: + return 32; + case Mips::AFGR64RegClassID: + return 16; + case Mips::FGR64RegClassID: + return 32; + } +} + //===----------------------------------------------------------------------===// // Callee Saved Registers methods //===----------------------------------------------------------------------===// @@ -155,21 +177,14 @@ MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { // direct reference. void MipsRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const { + unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); - unsigned i = 0; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - DEBUG(errs() << "\nFunction : " << MF.getName() << "\n"; errs() << "<--------->\n" << MI); - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); uint64_t stackSize = MF.getFrameInfo()->getStackSize(); int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); @@ -177,7 +192,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, << "spOffset : " << spOffset << "\n" << "stackSize : " << stackSize << "\n"); - eliminateFI(MI, i, FrameIndex, stackSize, spOffset); + eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset); } unsigned MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 78adf7f..13b2a6a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -42,6 +42,8 @@ public: void adjustMipsStackFrame(MachineFunction &MF) const; /// Code Generation virtual methods... + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; @@ -53,7 +55,8 @@ public: /// Stack Frame Processing Methods void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f07a10c..f93dd86 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -331,3 +331,48 @@ def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; // Accumulator Registers def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; + +def CPURegsAsmOperand : AsmOperandClass { + let Name = "CPURegsAsm"; + let ParserMethod = "parseCPURegs"; +} + +def CPU64RegsAsmOperand : AsmOperandClass { + let Name = "CPU64RegsAsm"; + let ParserMethod = "parseCPU64Regs"; +} + +def CCRAsmOperand : AsmOperandClass { + let Name = "CCRAsm"; + let ParserMethod = "parseCCRRegs"; +} + +def CPURegsOpnd : RegisterOperand<CPURegs, "printCPURegs"> { + let ParserMatchClass = CPURegsAsmOperand; +} + +def CPU64RegsOpnd : RegisterOperand<CPU64Regs, "printCPURegs"> { + let ParserMatchClass = CPU64RegsAsmOperand; +} + +def CCROpnd : RegisterOperand<CCR, "printCPURegs"> { + let ParserMatchClass = CCRAsmOperand; +} + +def HWRegsAsmOperand : AsmOperandClass { + let Name = "HWRegsAsm"; + let ParserMethod = "parseHWRegs"; +} + +def HW64RegsAsmOperand : AsmOperandClass { + let Name = "HW64RegsAsm"; + let ParserMethod = "parseHW64Regs"; +} + +def HWRegsOpnd : RegisterOperand<HWRegs, "printCPURegs"> { + let ParserMatchClass = HWRegsAsmOperand; +} + +def HW64RegsOpnd : RegisterOperand<HWRegs64, "printCPURegs"> { + let ParserMatchClass = HW64RegsAsmOperand; +} diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 60b1233..0dd6713 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -29,9 +29,21 @@ using namespace llvm; +unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const { + static const unsigned EhDataReg[] = { + Mips::A0, Mips::A1, Mips::A2, Mips::A3 + }; + static const unsigned EhDataReg64[] = { + Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64 + }; + + return STI.isABI_N64() ? EhDataReg64[I] : EhDataReg[I]; +} + void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const MipsRegisterInfo *RegInfo = static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); const MipsSEInstrInfo &TII = @@ -105,6 +117,30 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { } } + if (MipsFI->callsEhReturn()) { + const TargetRegisterClass *RC = STI.isABI_N64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + + // Insert instructions that spill eh data registers. + for (int I = 0; I < 4; ++I) { + if (!MBB.isLiveIn(ehDataReg(I))) + MBB.addLiveIn(ehDataReg(I)); + TII.storeRegToStackSlot(MBB, MBBI, ehDataReg(I), false, + MipsFI->getEhDataRegFI(I), RC, RegInfo); + } + + // Emit .cfi_offset directives for eh data registers. + MCSymbol *CSLabel2 = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel2); + for (int I = 0; I < 4; ++I) { + int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I)); + DstML = MachineLocation(MachineLocation::VirtualFP, Offset); + SrcML = MachineLocation(ehDataReg(I)); + Moves.push_back(MachineMove(CSLabel2, DstML, SrcML)); + } + } + // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. @@ -124,6 +160,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + const MipsRegisterInfo *RegInfo = + static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); @@ -144,6 +183,22 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); } + if (MipsFI->callsEhReturn()) { + const TargetRegisterClass *RC = STI.isABI_N64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + + // Find first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i) + --I; + + // Insert instructions that restore eh data registers. + for (int J = 0; J < 4; ++J) { + TII.loadRegFromStackSlot(MBB, I, ehDataReg(J), MipsFI->getEhDataRegFI(J), + RC, RegInfo); + } + } + // Get the number of bytes from FrameInfo uint64_t StackSize = MFI->getStackSize(); @@ -194,16 +249,41 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects(); } +// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions +void MipsSEFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const MipsSEInstrInfo &TII = + *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo()); + + if (!hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + TII.adjustStackPtr(SP, Amount, MBB, I); + } + + MBB.erase(I); +} + void MipsSEFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineRegisterInfo &MRI = MF.getRegInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; // Mark $fp as used if function has dedicated frame pointer. if (hasFP(MF)) MRI.setPhysRegUsed(FP); + // Create spill slots for eh data registers if function calls eh_return. + if (MipsFI->callsEhReturn()) + MipsFI->createEhDataRegsFI(); + // Set scavenging frame index if necessary. uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() + estimateStackSize(MF); diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 6481a0a..7becd25 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -28,6 +28,10 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, @@ -37,6 +41,7 @@ public: void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const; + unsigned ehDataReg(unsigned I) const; }; } // End llvm namespace diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index cd8f9f4..a9809ef 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -220,6 +220,10 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::ExtractElementF64: ExpandExtractElementF64(MBB, MI); break; + case Mips::MIPSeh_return32: + case Mips::MIPSeh_return64: + ExpandEhReturn(MBB, MI); + break; } MBB.erase(MI); @@ -356,6 +360,31 @@ void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, .addReg(HiReg); } +void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // This pseudo instruction is generated as part of the lowering of + // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and + // indirect jump to TargetReg + const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>(); + unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned OR = STI.isABI_N64() ? Mips::OR64 : Mips::OR; + unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR; + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned OffsetReg = I->getOperand(0).getReg(); + unsigned TargetReg = I->getOperand(1).getReg(); + + // or $ra, $v0, $zero + // addu $sp, $sp, $v1 + // jr $ra + BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), RA) + .addReg(TargetReg).addReg(ZERO); + BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP) + .addReg(SP).addReg(OffsetReg); + BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(JR)).addReg(RA); +} + const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) { return new MipsSEInstrInfo(TM); } diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 55b78b2..3e22b33 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -85,6 +85,8 @@ private: MachineBasicBlock::iterator I) const; void ExpandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + void ExpandEhReturn(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; }; } diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index abeab7b..a39b393 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -54,28 +54,6 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const { return true; } -// This function eliminate ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void MipsSERegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - int64_t Amount = I->getOperand(0).getImm(); - - if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) - Amount = -Amount; - - const MipsSEInstrInfo *II = static_cast<const MipsSEInstrInfo*>(&TII); - unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; - - II->adjustStackPtr(SP, Amount, MBB, I); - } - - MBB.erase(I); -} - void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, @@ -83,6 +61,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; @@ -93,15 +72,18 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } + bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex); + // The following stack frame objects are always referenced relative to $sp: // 1. Outgoing arguments. // 2. Pointer to dynamically allocated stack space. // 3. Locations for callee-saved registers. + // 4. Locations for eh data registers. // Everything else is referenced relative to whatever register // getFrameRegister() returns. unsigned FrameReg; - if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) + if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI) FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; else FrameReg = getFrameRegister(MF); diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h index 7437bd3..f6827e9 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.h +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -31,10 +31,6 @@ public: bool requiresFrameIndexScavenging(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - private: virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 30d377a..75b4c98 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -26,13 +26,14 @@ void MipsSubtarget::anchor() { } MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool little, - Reloc::Model RM) : + Reloc::Model _RM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false), HasBitCount(false), HasFPIdx(false), - InMips16Mode(false), HasDSP(false), HasDSPR2(false), IsAndroid(false) + InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), + IsAndroid(false), RM(_RM) { std::string CPUName = CPU; if (CPUName.empty()) diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 6a20815..32baa3d 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -14,6 +14,7 @@ #ifndef MIPSSUBTARGET_H #define MIPSSUBTARGET_H +#include "MCTargetDesc/MipsReginfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -88,6 +89,9 @@ protected: // InMips16 -- can process Mips16 instructions bool InMips16Mode; + // InMicroMips -- can process MicroMips instructions + bool InMicroMipsMode; + // HasDSP, HasDSPR2 -- supports DSP ASE. bool HasDSP, HasDSPR2; @@ -96,6 +100,12 @@ protected: InstrItineraryData InstrItins; + // The instance to the register info section object + MipsReginfo MRI; + + // Relocation Model + Reloc::Model RM; + public: virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, AntiDepBreakMode& Mode, @@ -131,6 +141,7 @@ public: bool isNotSingleFloat() const { return !IsSingleFloat; } bool hasVFPU() const { return HasVFPU; } bool inMips16Mode() const { return InMips16Mode; } + bool inMicroMipsMode() const { return InMicroMipsMode; } bool hasDSP() const { return HasDSP; } bool hasDSPR2() const { return HasDSPR2; } bool isAndroid() const { return IsAndroid; } @@ -145,6 +156,12 @@ public: bool hasSwap() const { return HasSwap; } bool hasBitCount() const { return HasBitCount; } bool hasFPIdx() const { return HasFPIdx; } + + // Grab MipsRegInfo object + const MipsReginfo &getMReginfo() const { return MRI; } + + // Grab relocation model + Reloc::Model getRelocationModel() const {return RM;} }; } // End llvm namespace diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 9aea764..4c748c5 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -38,6 +38,20 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ ELF::SHF_WRITE |ELF::SHF_ALLOC, SectionKind::getBSS()); + // Register info information + const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>(); + if (Subtarget.isABI_N64() || Subtarget.isABI_N32()) + ReginfoSection = + getContext().getELFSection(".MIPS.options", + ELF::SHT_MIPS_OPTIONS, + ELF::SHF_ALLOC |ELF::SHF_MIPS_NOSTRIP, + SectionKind::getMetadata()); + else + ReginfoSection = + getContext().getELFSection(".reginfo", + ELF::SHT_MIPS_REGINFO, + ELF::SHF_ALLOC, + SectionKind::getMetadata()); } // A address must be loaded from a small section if its size is less than the diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h index c394a9d..c0e9140 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.h +++ b/lib/Target/Mips/MipsTargetObjectFile.h @@ -17,6 +17,7 @@ namespace llvm { class MipsTargetObjectFile : public TargetLoweringObjectFileELF { const MCSection *SmallDataSection; const MCSection *SmallBSSSection; + const MCSection *ReginfoSection; public: void Initialize(MCContext &Ctx, const TargetMachine &TM); @@ -35,6 +36,7 @@ namespace llvm { const TargetMachine &TM) const; // TODO: Classify globals as mips wishes. + const MCSection *getReginfoSection() const { return ReginfoSection; } }; } // end namespace llvm diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 7cb16b4..47baef6 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -22,7 +22,6 @@ set(NVPTXCodeGen_sources NVPTXAllocaHoisting.cpp NVPTXAsmPrinter.cpp NVPTXUtilities.cpp - VectorElementize.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 1d41665..6191819 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -30,8 +30,9 @@ void NVPTXMCAsmInfo::anchor() { } NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::nvptx64) - PointerSize = 8; + if (TheTriple.getArch() == Triple::nvptx64) { + PointerSize = CalleeSaveStackSlotSize = 8; + } CommentString = "//"; diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 097b50a..b46ea88 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -53,7 +53,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); -FunctionPass *createVectorElementizePass(NVPTXTargetMachine &); FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 22da8f3..0115e1f 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -503,21 +503,7 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, O << getNVPTXRegClassStr(RC) << mapped_vr; return; } - // Vector virtual register - if (getNVPTXVectorSize(RC) == 4) - O << "{" - << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_1, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_2, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_3" - << "}"; - else if (getNVPTXVectorSize(RC) == 2) - O << "{" - << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_1" - << "}"; - else - llvm_unreachable("Unsupported vector size"); + report_fatal_error("Bad register!"); } void @@ -1314,7 +1300,8 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, O << "shared" ; break; default: - llvm_unreachable("unexpected address space"); + report_fatal_error("Bad address space found while emitting PTX"); + break; } } @@ -2023,29 +2010,9 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8: case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16: - case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64: - case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32: - case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8: - case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16: - case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8: - case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64: - case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32: - case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8: - case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16: - case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8: case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32: case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8: - case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64: - case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32: - case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8: - case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16: - case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8: - case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64: - case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32: - case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8: - case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16: - case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8: case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64: case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32: case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: @@ -2056,16 +2023,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64: case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8: - case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64: - case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32: - case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8: - case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16: - case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8: - case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64: - case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32: - case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8: - case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16: - case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8: case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: return true; } diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 50072c5..bb2c55c 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -74,3 +74,14 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { } + +// This function eliminates ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void NVPTXFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, + // ADJCALLSTACKUP instructions. + MBB.erase(I); +} + diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index ee87b39..d34e7be 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -33,6 +33,10 @@ public: virtual void emitPrologue(MachineFunction &MF) const; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; }; } // End llvm namespace diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 36ab7f5..481f13a 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -105,6 +105,21 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { case ISD::STORE: ResNode = SelectStore(N); break; + case NVPTXISD::LoadV2: + case NVPTXISD::LoadV4: + ResNode = SelectLoadVector(N); + break; + case NVPTXISD::LDGV2: + case NVPTXISD::LDGV4: + case NVPTXISD::LDUV2: + case NVPTXISD::LDUV4: + ResNode = SelectLDGLDUVector(N); + break; + case NVPTXISD::StoreV2: + case NVPTXISD::StoreV4: + ResNode = SelectStoreVector(N); + break; + default: break; } if (ResNode) return ResNode; @@ -214,16 +229,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { case MVT::i64: Opcode = NVPTX::LD_i64_avar; break; case MVT::f32: Opcode = NVPTX::LD_f32_avar; break; case MVT::f64: Opcode = NVPTX::LD_f64_avar; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break; default: return NULL; } SDValue Ops[] = { getI32Imm(isVolatile), @@ -244,16 +249,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { case MVT::i64: Opcode = NVPTX::LD_i64_asi; break; case MVT::f32: Opcode = NVPTX::LD_f32_asi; break; case MVT::f64: Opcode = NVPTX::LD_f64_asi; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break; default: return NULL; } SDValue Ops[] = { getI32Imm(isVolatile), @@ -267,24 +262,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { } else if (Subtarget.is64Bit()? SelectADDRri64(N1.getNode(), N1, Base, Offset): SelectADDRri(N1.getNode(), N1, Base, Offset)) { - switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; - case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; - case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; - case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; - case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; - case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break; - default: return NULL; + if (Subtarget.is64Bit()) { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break; + case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break; + case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break; + case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break; + case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break; + case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break; + default: return NULL; + } + } else { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; + case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; + case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; + case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; + case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; + case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; + default: return NULL; + } } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -296,24 +293,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { MVT::Other, Ops, 8); } else { - switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; - case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; - case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; - case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; - case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; - case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break; - default: return NULL; + if (Subtarget.is64Bit()) { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break; + case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break; + case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break; + case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break; + case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break; + case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break; + default: return NULL; + } + } else { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; + case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; + case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; + case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; + case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; + case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; + default: return NULL; + } } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -334,6 +333,370 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { return NVPTXLD; } +SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { + + SDValue Chain = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue Addr, Offset, Base; + unsigned Opcode; + DebugLoc DL = N->getDebugLoc(); + SDNode *LD; + MemSDNode *MemSD = cast<MemSDNode>(N); + EVT LoadedVT = MemSD->getMemoryVT(); + + + if (!LoadedVT.isSimple()) + return NULL; + + // Address Space Setting + unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); + + // Volatile Setting + // - .volatile is only availalble for .global and .shared + bool IsVolatile = MemSD->isVolatile(); + if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && + CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && + CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) + IsVolatile = false; + + // Vector Setting + MVT SimpleVT = LoadedVT.getSimpleVT(); + + // Type Setting: fromType + fromTypeWidth + // + // Sign : ISD::SEXTLOAD + // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the + // type is integer + // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float + MVT ScalarVT = SimpleVT.getScalarType(); + unsigned FromTypeWidth = ScalarVT.getSizeInBits(); + unsigned int FromType; + // The last operand holds the original LoadSDNode::getExtensionType() value + unsigned ExtensionType = + cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue(); + if (ExtensionType == ISD::SEXTLOAD) + FromType = NVPTX::PTXLdStInstCode::Signed; + else if (ScalarVT.isFloatingPoint()) + FromType = NVPTX::PTXLdStInstCode::Float; + else + FromType = NVPTX::PTXLdStInstCode::Unsigned; + + unsigned VecType; + + switch (N->getOpcode()) { + case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break; + case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break; + default: return NULL; + } + + EVT EltVT = N->getValueType(0); + + if (SelectDirectAddr(Op1, Addr)) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break; + } + break; + } + + SDValue Ops[] = { getI32Imm(IsVolatile), + getI32Imm(CodeAddrSpace), + getI32Imm(VecType), + getI32Imm(FromType), + getI32Imm(FromTypeWidth), + Addr, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); + } else if (Subtarget.is64Bit()? + SelectADDRsi64(Op1.getNode(), Op1, Base, Offset): + SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break; + } + break; + } + + SDValue Ops[] = { getI32Imm(IsVolatile), + getI32Imm(CodeAddrSpace), + getI32Imm(VecType), + getI32Imm(FromType), + getI32Imm(FromTypeWidth), + Base, Offset, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); + } else if (Subtarget.is64Bit()? + SelectADDRri64(Op1.getNode(), Op1, Base, Offset): + SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break; + } + break; + } + } + + SDValue Ops[] = { getI32Imm(IsVolatile), + getI32Imm(CodeAddrSpace), + getI32Imm(VecType), + getI32Imm(FromType), + getI32Imm(FromTypeWidth), + Base, Offset, Chain }; + + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); + } else { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break; + case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break; + case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break; + case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break; + case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break; + case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break; + } + break; + } + } + + SDValue Ops[] = { getI32Imm(IsVolatile), + getI32Imm(CodeAddrSpace), + getI32Imm(VecType), + getI32Imm(FromType), + getI32Imm(FromTypeWidth), + Op1, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); + } + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return LD; +} + +SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { + + SDValue Chain = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + unsigned Opcode; + DebugLoc DL = N->getDebugLoc(); + SDNode *LD; + + EVT RetVT = N->getValueType(0); + + // Select opcode + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LDGV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break; + case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break; + case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break; + } + break; + case NVPTXISD::LDGV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break; + } + break; + case NVPTXISD::LDUV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break; + case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break; + case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break; + } + break; + case NVPTXISD::LDUV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::LDGV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break; + case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break; + case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break; + } + break; + case NVPTXISD::LDGV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break; + } + break; + case NVPTXISD::LDUV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break; + case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break; + case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break; + } + break; + case NVPTXISD::LDUV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break; + case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break; + case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break; + case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break; + } + break; + } + } + + SDValue Ops[] = { Op1, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2); + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return LD; +} + + SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(N); @@ -400,16 +763,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { case MVT::i64: Opcode = NVPTX::ST_i64_avar; break; case MVT::f32: Opcode = NVPTX::ST_f32_avar; break; case MVT::f64: Opcode = NVPTX::ST_f64_avar; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break; default: return NULL; } SDValue Ops[] = { N1, @@ -431,16 +784,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { case MVT::i64: Opcode = NVPTX::ST_i64_asi; break; case MVT::f32: Opcode = NVPTX::ST_f32_asi; break; case MVT::f64: Opcode = NVPTX::ST_f64_asi; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break; default: return NULL; } SDValue Ops[] = { N1, @@ -455,24 +798,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { } else if (Subtarget.is64Bit()? SelectADDRri64(N2.getNode(), N2, Base, Offset): SelectADDRri(N2.getNode(), N2, Base, Offset)) { - switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; - case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; - case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; - case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; - case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; - case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break; - default: return NULL; + if (Subtarget.is64Bit()) { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break; + case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break; + case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break; + case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break; + case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break; + case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break; + default: return NULL; + } + } else { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; + case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; + case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; + case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; + case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; + case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; + default: return NULL; + } } SDValue Ops[] = { N1, getI32Imm(isVolatile), @@ -484,24 +829,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); } else { - switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; - case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; - case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; - case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; - case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; - case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break; - default: return NULL; + if (Subtarget.is64Bit()) { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break; + case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break; + case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break; + case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break; + case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break; + case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break; + default: return NULL; + } + } else { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; + case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; + case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; + case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; + case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; + case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; + default: return NULL; + } } SDValue Ops[] = { N1, getI32Imm(isVolatile), @@ -523,6 +870,244 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { return NVPTXST; } +SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue Addr, Offset, Base; + unsigned Opcode; + DebugLoc DL = N->getDebugLoc(); + SDNode *ST; + EVT EltVT = Op1.getValueType(); + MemSDNode *MemSD = cast<MemSDNode>(N); + EVT StoreVT = MemSD->getMemoryVT(); + + // Address Space Setting + unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); + + if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { + report_fatal_error("Cannot store to pointer that points to constant " + "memory space"); + } + + // Volatile Setting + // - .volatile is only availalble for .global and .shared + bool IsVolatile = MemSD->isVolatile(); + if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && + CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && + CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) + IsVolatile = false; + + // Type Setting: toType + toTypeWidth + // - for integer type, always use 'u' + assert(StoreVT.isSimple() && "Store value is not simple"); + MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); + unsigned ToTypeWidth = ScalarVT.getSizeInBits(); + unsigned ToType; + if (ScalarVT.isFloatingPoint()) + ToType = NVPTX::PTXLdStInstCode::Float; + else + ToType = NVPTX::PTXLdStInstCode::Unsigned; + + + SmallVector<SDValue, 12> StOps; + SDValue N2; + unsigned VecType; + + switch (N->getOpcode()) { + case NVPTXISD::StoreV2: + VecType = NVPTX::PTXLdStInstCode::V2; + StOps.push_back(N->getOperand(1)); + StOps.push_back(N->getOperand(2)); + N2 = N->getOperand(3); + break; + case NVPTXISD::StoreV4: + VecType = NVPTX::PTXLdStInstCode::V4; + StOps.push_back(N->getOperand(1)); + StOps.push_back(N->getOperand(2)); + StOps.push_back(N->getOperand(3)); + StOps.push_back(N->getOperand(4)); + N2 = N->getOperand(5); + break; + default: return NULL; + } + + StOps.push_back(getI32Imm(IsVolatile)); + StOps.push_back(getI32Imm(CodeAddrSpace)); + StOps.push_back(getI32Imm(VecType)); + StOps.push_back(getI32Imm(ToType)); + StOps.push_back(getI32Imm(ToTypeWidth)); + + if (SelectDirectAddr(N2, Addr)) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break; + } + break; + } + StOps.push_back(Addr); + } else if (Subtarget.is64Bit()? + SelectADDRsi64(N2.getNode(), N2, Base, Offset): + SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break; + } + break; + } + StOps.push_back(Base); + StOps.push_back(Offset); + } else if (Subtarget.is64Bit()? + SelectADDRri64(N2.getNode(), N2, Base, Offset): + SelectADDRri(N2.getNode(), N2, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break; + } + break; + } + } + StOps.push_back(Base); + StOps.push_back(Offset); + } else { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break; + case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break; + case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break; + case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break; + case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break; + case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break; + } + break; + } + } + StOps.push_back(N2); + } + + StOps.push_back(Chain); + + ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size()); + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return ST; +} + // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 14f2091..4ec9241 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -72,8 +72,11 @@ private: #include "NVPTXGenDAGISel.inc" SDNode *Select(SDNode *N); - SDNode* SelectLoad(SDNode *N); - SDNode* SelectStore(SDNode *N); + SDNode *SelectLoad(SDNode *N); + SDNode *SelectLoadVector(SDNode *N); + SDNode *SelectLDGLDUVector(SDNode *N); + SDNode *SelectStore(SDNode *N); + SDNode *SelectStoreVector(SDNode *N); inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index b3ab9fc..5ee747a 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -45,15 +45,27 @@ using namespace llvm; static unsigned int uniqueCallSite = 0; static cl::opt<bool> -RetainVectorOperands("nvptx-codegen-vectors", - cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"), - cl::init(true)); - -static cl::opt<bool> sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); +static bool IsPTXVectorType(MVT VT) { + switch (VT.SimpleTy) { + default: return false; + case MVT::v2i8: + case MVT::v4i8: + case MVT::v2i16: + case MVT::v4i16: + case MVT::v2i32: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v4f32: + case MVT::v2f64: + return true; + } +} + // NVPTXTargetLowering Constructor. NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) : TargetLowering(TM, new NVPTXTargetObjectFile()), @@ -63,9 +75,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) // always lower memset, memcpy, and memmove intrinsics to load/store // instructions, rather // then generating calls to memset, mempcy or memmove. - maxStoresPerMemset = (unsigned)0xFFFFFFFF; - maxStoresPerMemcpy = (unsigned)0xFFFFFFFF; - maxStoresPerMemmove = (unsigned)0xFFFFFFFF; + MaxStoresPerMemset = (unsigned)0xFFFFFFFF; + MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF; + MaxStoresPerMemmove = (unsigned)0xFFFFFFFF; setBooleanContents(ZeroOrNegativeOneBooleanContent); @@ -87,41 +99,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); - if (RetainVectorOperands) { - addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass); - addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass); - addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass); - addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass); - addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass); - addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass); - addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass); - addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass); - addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass); - addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom); - - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom); - } - // Operations not directly supported by NVPTX. setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::Other, Expand); @@ -191,42 +168,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) // TRAP can be lowered to PTX trap setOperationAction(ISD::TRAP, MVT::Other, Legal); - // By default, CONCAT_VECTORS is implemented via store/load - // through stack. It is slow and uses local memory. We need - // to custom-lowering them. - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom); - - // Expand vector int to float and float to int conversions - // - For SINT_TO_FP and UINT_TO_FP, the src type - // (Node->getOperand(0).getValueType()) - // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT, - // the dest type (Node->getValueType(0)) is used. - // - // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector - // case, and - // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case. - // - // That is why v4i32 or v2i32 are used here. - // - // The expansion for vectors happens in VectorLegalizer::LegalizeOp() - // (LegalizeVectorOps.cpp). - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); + // Register custom handling for vector loads/stores + for (int i = MVT::FIRST_VECTOR_VALUETYPE; + i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (IsPTXVectorType(VT)) { + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); + } + } // Now deduce the information based on the above mentioned // actions @@ -268,6 +219,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; + case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2"; + case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4"; + case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2"; + case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4"; + case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2"; + case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4"; + case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2"; + case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4"; } } @@ -868,12 +827,19 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { } +SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType() == MVT::i1) + return LowerLOADi1(Op, DAG); + else + return SDValue(); +} + // v = ld i1* addr // => // v1 = ld i8* addr // v = trunc v1 to i1 SDValue NVPTXTargetLowering:: -LowerLOAD(SDValue Op, SelectionDAG &DAG) const { +LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(Node); DebugLoc dl = Node->getDebugLoc(); @@ -893,12 +859,109 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, 2, dl); } +SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + EVT ValVT = Op.getOperand(1).getValueType(); + if (ValVT == MVT::i1) + return LowerSTOREi1(Op, DAG); + else if (ValVT.isVector()) + return LowerSTOREVector(Op, DAG); + else + return SDValue(); +} + +SDValue +NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { + SDNode *N = Op.getNode(); + SDValue Val = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + EVT ValVT = Val.getValueType(); + + if (ValVT.isVector()) { + // We only handle "native" vector sizes for now, e.g. <4 x double> is not + // legal. We can (and should) split that into 2 stores of <2 x double> here + // but I'm leaving that as a TODO for now. + if (!ValVT.isSimple()) + return SDValue(); + switch (ValVT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i8: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v2f64: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v4i32: + case MVT::v4f32: + // This is a "native" vector type + break; + } + + unsigned Opcode = 0; + EVT EltVT = ValVT.getVectorElementType(); + unsigned NumElts = ValVT.getVectorNumElements(); + + // Since StoreV2 is a target node, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // stored type to i16 and propogate the "real" type as the memory type. + bool NeedExt = false; + if (EltVT.getSizeInBits() < 16) + NeedExt = true; + + switch (NumElts) { + default: return SDValue(); + case 2: + Opcode = NVPTXISD::StoreV2; + break; + case 4: { + Opcode = NVPTXISD::StoreV4; + break; + } + } + + SmallVector<SDValue, 8> Ops; + + // First is the chain + Ops.push_back(N->getOperand(0)); + + // Then the split values + for (unsigned i = 0; i < NumElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, + DAG.getIntPtrConstant(i)); + if (NeedExt) + // ANY_EXTEND is correct here since the store will only look at the + // lower-order bits anyway. + ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); + Ops.push_back(ExtVal); + } + + // Then any remaining arguments + for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { + Ops.push_back(N->getOperand(i)); + } + + MemSDNode *MemSD = cast<MemSDNode>(N); + + SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL, + DAG.getVTList(MVT::Other), &Ops[0], + Ops.size(), MemSD->getMemoryVT(), + MemSD->getMemOperand()); + + + //return DCI.CombineTo(N, NewSt, true); + return NewSt; + } + + return SDValue(); +} + // st i1 v, addr // => // v1 = zxt v to i8 // st i8, addr SDValue NVPTXTargetLowering:: -LowerSTORE(SDValue Op, SelectionDAG &DAG) const { +LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(Node); @@ -1027,9 +1090,11 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, if (isABI || isKernel) { // If ABI, load from the param symbol SDValue Arg = getParamSymbol(DAG, idx); - Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT( - F->getContext()), - llvm::ADDRESS_SPACE_PARAM)); + // Conjure up a value that we can get the address space from. + // FIXME: Using a constant here is a hack. + Value *srcValue = Constant::getNullValue(PointerType::get( + ObjectVT.getTypeForEVT(F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, false, @@ -1346,3 +1411,242 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { return 4; } + +/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. +static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, + SmallVectorImpl<SDValue>& Results) { + EVT ResVT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + assert(ResVT.isVector() && "Vector load must have vector type"); + + // We only handle "native" vector sizes for now, e.g. <4 x double> is not + // legal. We can (and should) split that into 2 loads of <2 x double> here + // but I'm leaving that as a TODO for now. + assert(ResVT.isSimple() && "Can only handle simple types"); + switch (ResVT.getSimpleVT().SimpleTy) { + default: return; + case MVT::v2i8: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v2f64: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v4i32: + case MVT::v4f32: + // This is a "native" vector type + break; + } + + EVT EltVT = ResVT.getVectorElementType(); + unsigned NumElts = ResVT.getVectorNumElements(); + + // Since LoadV2 is a target node, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // loaded type to i16 and propogate the "real" type as the memory type. + bool NeedTrunc = false; + if (EltVT.getSizeInBits() < 16) { + EltVT = MVT::i16; + NeedTrunc = true; + } + + unsigned Opcode = 0; + SDVTList LdResVTs; + + switch (NumElts) { + default: return; + case 2: + Opcode = NVPTXISD::LoadV2; + LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); + break; + case 4: { + Opcode = NVPTXISD::LoadV4; + EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; + LdResVTs = DAG.getVTList(ListVTs, 5); + break; + } + } + + SmallVector<SDValue, 8> OtherOps; + + // Copy regular operands + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OtherOps.push_back(N->getOperand(i)); + + LoadSDNode *LD = cast<LoadSDNode>(N); + + // The select routine does not have access to the LoadSDNode instance, so + // pass along the extension information + OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); + + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], + OtherOps.size(), LD->getMemoryVT(), + LD->getMemOperand()); + + SmallVector<SDValue, 4> ScalarRes; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Res = NewLD.getValue(i); + if (NeedTrunc) + Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + ScalarRes.push_back(Res); + } + + SDValue LoadChain = NewLD.getValue(NumElts); + + SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + + Results.push_back(BuildVec); + Results.push_back(LoadChain); +} + +static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &Results) { + SDValue Chain = N->getOperand(0); + SDValue Intrin = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Get the intrinsic ID + unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); + switch(IntrinNo) { + default: return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: { + EVT ResVT = N->getValueType(0); + + if (ResVT.isVector()) { + // Vector LDG/LDU + + unsigned NumElts = ResVT.getVectorNumElements(); + EVT EltVT = ResVT.getVectorElementType(); + + // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // loaded type to i16 and propogate the "real" type as the memory type. + bool NeedTrunc = false; + if (EltVT.getSizeInBits() < 16) { + EltVT = MVT::i16; + NeedTrunc = true; + } + + unsigned Opcode = 0; + SDVTList LdResVTs; + + switch (NumElts) { + default: return; + case 2: + switch(IntrinNo) { + default: return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + Opcode = NVPTXISD::LDGV2; + break; + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + Opcode = NVPTXISD::LDUV2; + break; + } + LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); + break; + case 4: { + switch(IntrinNo) { + default: return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + Opcode = NVPTXISD::LDGV4; + break; + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + Opcode = NVPTXISD::LDUV4; + break; + } + EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; + LdResVTs = DAG.getVTList(ListVTs, 5); + break; + } + } + + SmallVector<SDValue, 8> OtherOps; + + // Copy regular operands + + OtherOps.push_back(Chain); // Chain + // Skip operand 1 (intrinsic ID) + // Others + for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) + OtherOps.push_back(N->getOperand(i)); + + MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); + + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], + OtherOps.size(), MemSD->getMemoryVT(), + MemSD->getMemOperand()); + + SmallVector<SDValue, 4> ScalarRes; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Res = NewLD.getValue(i); + if (NeedTrunc) + Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + ScalarRes.push_back(Res); + } + + SDValue LoadChain = NewLD.getValue(NumElts); + + SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + + Results.push_back(BuildVec); + Results.push_back(LoadChain); + } else { + // i8 LDG/LDU + assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && + "Custom handling of non-i8 ldu/ldg?"); + + // Just copy all operands as-is + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Ops.push_back(N->getOperand(i)); + + // Force output to i16 + SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); + + MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); + + // We make sure the memory type is i8, which will be used during isel + // to select the proper instruction. + SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, + LdResVTs, &Ops[0], + Ops.size(), MVT::i8, + MemSD->getMemOperand()); + + Results.push_back(NewLD.getValue(0)); + Results.push_back(NewLD.getValue(1)); + } + } + } +} + +void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + switch (N->getOpcode()) { + default: report_fatal_error("Unhandled custom legalization"); + case ISD::LOAD: + ReplaceLoadVector(N, DAG, Results); + return; + case ISD::INTRINSIC_W_CHAIN: + ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); + return; + } +} diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 0a1833a..95e7b55 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -58,7 +58,16 @@ enum NodeType { RETURN, CallSeqBegin, CallSeqEnd, - Dummy + Dummy, + + LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, + LoadV4, + LDGV2, // LDG.v2 + LDGV4, // LDG.v4 + LDUV2, // LDU.v2 + LDUV4, // LDU.v4 + StoreV2, + StoreV4 }; } @@ -143,8 +152,16 @@ private: SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; + + virtual void ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 6fe654cb..9e73d80 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -65,46 +65,6 @@ void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB, NVPTX::Float64RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4F32RegsRegClass.contains(DestReg) && - NVPTX::V4F32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4I32RegsRegClass.contains(DestReg) && - NVPTX::V4I32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2F32RegsRegClass.contains(DestReg) && - NVPTX::V2F32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I32RegsRegClass.contains(DestReg) && - NVPTX::V2I32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4I8RegsRegClass.contains(DestReg) && - NVPTX::V4I8RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I8RegsRegClass.contains(DestReg) && - NVPTX::V2I8RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4I16RegsRegClass.contains(DestReg) && - NVPTX::V4I16RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I16RegsRegClass.contains(DestReg) && - NVPTX::V2I16RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I64RegsRegClass.contains(DestReg) && - NVPTX::V2I64RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2F64RegsRegClass.contains(DestReg) && - NVPTX::V2F64RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); else { llvm_unreachable("Don't know how to copy a register"); } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 8a410b8..f43abe2 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -52,6 +52,7 @@ def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">; def hasVote : Predicate<"Subtarget.hasVote()">; def hasDouble : Predicate<"Subtarget.hasDouble()">; def reqPTX20 : Predicate<"Subtarget.reqPTX20()">; +def hasLDG : Predicate<"Subtarget.hasLDG()">; def hasLDU : Predicate<"Subtarget.hasLDU()">; def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; @@ -2153,11 +2154,21 @@ multiclass LD<NVPTXRegClass regclass> { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t$dst, [$addr];"), []>; + def _areg_64 : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", + " \t$dst, [$addr];"), []>; def _ari : NVPTXInst<(outs regclass:$dst), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t$dst, [$addr+$offset];"), []>; + def _ari_64 : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", + " \t$dst, [$addr+$offset];"), []>; def _asi : NVPTXInst<(outs regclass:$dst), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), @@ -2174,19 +2185,6 @@ defm LD_f32 : LD<Float32Regs>; defm LD_f64 : LD<Float64Regs>; } -let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in { -defm LD_v2i8 : LD<V2I8Regs>; -defm LD_v4i8 : LD<V4I8Regs>; -defm LD_v2i16 : LD<V2I16Regs>; -defm LD_v4i16 : LD<V4I16Regs>; -defm LD_v2i32 : LD<V2I32Regs>; -defm LD_v4i32 : LD<V4I32Regs>; -defm LD_v2f32 : LD<V2F32Regs>; -defm LD_v4f32 : LD<V4F32Regs>; -defm LD_v2i64 : LD<V2I64Regs>; -defm LD_v2f64 : LD<V2F64Regs>; -} - multiclass ST<NVPTXRegClass regclass> { def _avar : NVPTXInst<(outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, @@ -2198,11 +2196,21 @@ multiclass ST<NVPTXRegClass regclass> { LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", " \t[$addr], $src;"), []>; + def _areg_64 : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", + "\t[$addr], $src;"), []>; def _ari : NVPTXInst<(outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", " \t[$addr+$offset], $src;"), []>; + def _ari_64 : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", + "\t[$addr+$offset], $src;"), []>; def _asi : NVPTXInst<(outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), @@ -2219,19 +2227,6 @@ defm ST_f32 : ST<Float32Regs>; defm ST_f64 : ST<Float64Regs>; } -let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in { -defm ST_v2i8 : ST<V2I8Regs>; -defm ST_v4i8 : ST<V4I8Regs>; -defm ST_v2i16 : ST<V2I16Regs>; -defm ST_v4i16 : ST<V4I16Regs>; -defm ST_v2i32 : ST<V2I32Regs>; -defm ST_v4i32 : ST<V4I32Regs>; -defm ST_v2f32 : ST<V2F32Regs>; -defm ST_v4f32 : ST<V4F32Regs>; -defm ST_v2i64 : ST<V2I64Regs>; -defm ST_v2f64 : ST<V2F64Regs>; -} - // The following is used only in and after vector elementizations. // Vector elementization happens at the machine instruction level, so the // following instruction @@ -2247,11 +2242,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; + def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; + def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), @@ -2269,6 +2274,12 @@ multiclass LD_VEC<NVPTXRegClass regclass> { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; + def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2276,6 +2287,13 @@ multiclass LD_VEC<NVPTXRegClass regclass> { !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), []>; + def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), + []>; def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2304,12 +2322,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> { LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; + def _v2_areg_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; def _v2_ari : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; + def _v2_ari_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, + i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; def _v2_asi : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, @@ -2328,6 +2357,12 @@ multiclass ST_VEC<NVPTXRegClass regclass> { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; + def _v4_areg_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; def _v4_ari : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2335,6 +2370,13 @@ multiclass ST_VEC<NVPTXRegClass regclass> { !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), []>; + def _v4_ari_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), + []>; def _v4_asi : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2822,8 +2864,6 @@ def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>; -include "NVPTXVector.td" - include "NVPTXIntrinsics.td" diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 028a94b..49e2568 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1343,52 +1343,113 @@ defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; -// Vector ldu -multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp, - NVPTXInst eleInst, NVPTXInst eleInst64> { - def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>, - Requires<[hasLDU]>; - def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>, - Requires<[hasLDU]>; + +//----------------------------------- +// Support for ldg on sm_35 or later +//----------------------------------- + +def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{ + MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); + return M->getMemoryVT() == MVT::i8; +}]>; + +multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> { + def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; + def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, + Requires<[hasLDG]>; + def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; + def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; +} + +multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> { + def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; + def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, + Requires<[hasLDG]>; + def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; + def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; +} + +defm INT_PTX_LDG_GLOBAL_i8 + : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>; +defm INT_PTX_LDG_GLOBAL_i16 + : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>; +defm INT_PTX_LDG_GLOBAL_i32 + : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>; +defm INT_PTX_LDG_GLOBAL_i64 + : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>; +defm INT_PTX_LDG_GLOBAL_f32 + : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>; +defm INT_PTX_LDG_GLOBAL_f64 + : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>; +defm INT_PTX_LDG_GLOBAL_p32 + : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>; +defm INT_PTX_LDG_GLOBAL_p64 + : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>; + +// vector + +// Elementized vector ldg +multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { + def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; } -let VecInstType=isVecLD.Value in { -defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];", - V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32, - INT_PTX_LDU_G_v2i8_ELE_64>; -defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];", - V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32, - INT_PTX_LDU_G_v4i8_ELE_64>; -defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];", - V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32, - INT_PTX_LDU_G_v2i16_ELE_64>; -defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];", - V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32, - INT_PTX_LDU_G_v4i16_ELE_64>; -defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];", - V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32, - INT_PTX_LDU_G_v2i32_ELE_64>; -defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];", - V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32, - INT_PTX_LDU_G_v4i32_ELE_64>; -defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];", - V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32, - INT_PTX_LDU_G_v2f32_ELE_64>; -defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];", - V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32, - INT_PTX_LDU_G_v4f32_ELE_64>; -defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];", - V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32, - INT_PTX_LDU_G_v2i64_ELE_64>; -defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];", - V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32, - INT_PTX_LDU_G_v2f64_ELE_64>; +multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { + def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; } +// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. +defm INT_PTX_LDG_G_v2i8_ELE + : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v2i16_ELE + : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v2i32_ELE + : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; +defm INT_PTX_LDG_G_v2f32_ELE + : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; +defm INT_PTX_LDG_G_v2i64_ELE + : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; +defm INT_PTX_LDG_G_v2f64_ELE + : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; +defm INT_PTX_LDG_G_v4i8_ELE + : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v4i16_ELE + : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v4i32_ELE + : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; +defm INT_PTX_LDG_G_v4f32_ELE + : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; multiclass NG_TO_G<string Str, Intrinsic Intrin> { diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 08be917..350a2c5 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -54,36 +54,6 @@ std::string getNVPTXRegClassName (TargetRegisterClass const *RC) { else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; } - else if (RC == &NVPTX::V2F32RegsRegClass) { - return ".v2.f32"; - } - else if (RC == &NVPTX::V4F32RegsRegClass) { - return ".v4.f32"; - } - else if (RC == &NVPTX::V2I32RegsRegClass) { - return ".v2.s32"; - } - else if (RC == &NVPTX::V4I32RegsRegClass) { - return ".v4.s32"; - } - else if (RC == &NVPTX::V2F64RegsRegClass) { - return ".v2.f64"; - } - else if (RC == &NVPTX::V2I64RegsRegClass) { - return ".v2.s64"; - } - else if (RC == &NVPTX::V2I16RegsRegClass) { - return ".v2.s16"; - } - else if (RC == &NVPTX::V4I16RegsRegClass) { - return ".v4.s16"; - } - else if (RC == &NVPTX::V2I8RegsRegClass) { - return ".v2.s16"; - } - else if (RC == &NVPTX::V4I8RegsRegClass) { - return ".v4.s16"; - } else { return "INTERNAL"; } @@ -115,137 +85,11 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; } - else if (RC == &NVPTX::V2F32RegsRegClass) { - return "%v2f"; - } - else if (RC == &NVPTX::V4F32RegsRegClass) { - return "%v4f"; - } - else if (RC == &NVPTX::V2I32RegsRegClass) { - return "%v2r"; - } - else if (RC == &NVPTX::V4I32RegsRegClass) { - return "%v4r"; - } - else if (RC == &NVPTX::V2F64RegsRegClass) { - return "%v2fd"; - } - else if (RC == &NVPTX::V2I64RegsRegClass) { - return "%v2rd"; - } - else if (RC == &NVPTX::V2I16RegsRegClass) { - return "%v2s"; - } - else if (RC == &NVPTX::V4I16RegsRegClass) { - return "%v4rs"; - } - else if (RC == &NVPTX::V2I8RegsRegClass) { - return "%v2rc"; - } - else if (RC == &NVPTX::V4I8RegsRegClass) { - return "%v4rc"; - } else { return "INTERNAL"; } return ""; } - -bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return true; - return false; -} - -std::string getNVPTXElemClassName(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass); - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass); - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass); - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass); - llvm_unreachable("Not a vector register class"); -} - -const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return (&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return (&NVPTX::Float64RegsRegClass); - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return (&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return (&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return (&NVPTX::Int64RegsRegClass); - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return (&NVPTX::Int8RegsRegClass); - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return (&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return (&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return (&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return (&NVPTX::Int8RegsRegClass); - llvm_unreachable("Not a vector register class"); -} - -int getNVPTXVectorSize(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return 4; - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return 4; - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return 4; - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return 4; - llvm_unreachable("Not a vector register class"); -} } NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii, @@ -277,30 +121,22 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { void NVPTXRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, + int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); - unsigned i = 0; MachineInstr &MI = *II; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); MachineFunction &MF = *MI.getParent()->getParent(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(i+1).getImm(); + MI.getOperand(FIOperandNum+1).getImm(); // Using I0 as the frame pointer - MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); } - int NVPTXRegisterInfo:: getDwarfRegNum(unsigned RegNum, bool isEH) const { return 0; @@ -314,12 +150,3 @@ unsigned NVPTXRegisterInfo::getRARegister() const { return 0; } -// This function eliminates ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void NVPTXRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - // Simply discard ADJCALLSTACKDOWN, - // ADJCALLSTACKUP instructions. - MBB.erase(I); -} diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 5951783..69f73f2 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -55,13 +55,9 @@ public: virtual BitVector getReservedRegs(const MachineFunction &MF) const; virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, + int SPAdj, unsigned FIOperandNum, RegScavenger *RS=NULL) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; virtual unsigned getFrameRegister(const MachineFunction &MF) const; virtual unsigned getRARegister() const; @@ -81,10 +77,6 @@ public: std::string getNVPTXRegClassName (const TargetRegisterClass *RC); std::string getNVPTXRegClassStr (const TargetRegisterClass *RC); -bool isNVPTXVectorRegClass (const TargetRegisterClass *RC); -std::string getNVPTXElemClassName (const TargetRegisterClass *RC); -int getNVPTXVectorSize (const TargetRegisterClass *RC); -const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC); } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td index ba15825..8d100d6 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -37,9 +37,6 @@ foreach i = 0-395 in { def RL#i : NVPTXReg<"%rl"#i>; // 64-bit def F#i : NVPTXReg<"%f"#i>; // 32-bit float def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float - // Vectors - foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in - def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>; // Arguments def ia#i : NVPTXReg<"%ia"#i>; @@ -65,44 +62,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>; // Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; - -class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList, - NVPTXRegClass sClass, - int e, - string n> - : NVPTXRegClass<regTypes, alignment, regList> -{ - NVPTXRegClass scalarClass=sClass; - int elems=e; - string name=n; -} -def V2F32Regs - : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)), - Float32Regs, 2, ".v2.f32">; -def V4F32Regs - : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)), - Float32Regs, 4, ".v4.f32">; -def V2I32Regs - : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)), - Int32Regs, 2, ".v2.u32">; -def V4I32Regs - : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)), - Int32Regs, 4, ".v4.u32">; -def V2F64Regs - : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)), - Float64Regs, 2, ".v2.f64">; -def V2I64Regs - : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)), - Int64Regs, 2, ".v2.u64">; -def V2I16Regs - : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)), - Int16Regs, 2, ".v2.u16">; -def V4I16Regs - : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)), - Int16Regs, 4, ".v4.u16">; -def V2I8Regs - : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)), - Int8Regs, 2, ".v2.u8">; -def V4I8Regs - : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)), - Int8Regs, 4, ".v4.u8">; diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index e6cb7c2..beea77e 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -57,6 +57,7 @@ public: bool hasF32FTZ() const { return SmVersion >= 20; } bool hasFMAF32() const { return SmVersion >= 20; } bool hasFMAF64() const { return SmVersion >= 13; } + bool hasLDG() const { return SmVersion >= 32; } bool hasLDU() const { return SmVersion >= 20; } bool hasGenericLdSt() const { return SmVersion >= 20; } inline bool hasHWROT32() const { return false; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index b4e049e..cd765fa 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -123,7 +123,6 @@ bool NVPTXPassConfig::addInstSelector() { addPass(createSplitBBatBarPass()); addPass(createAllocaHoisting()); addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); - addPass(createVectorElementizePass(getNVPTXTargetMachine())); return false; } diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp deleted file mode 100644 index f1b285d..0000000 --- a/lib/Target/NVPTX/VectorElementize.cpp +++ /dev/null @@ -1,1239 +0,0 @@ -//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass converts operations on vector types to operations on their -// element types. -// -// For generic binary and unary vector instructions, the conversion is simple. -// Suppose we have -// av = bv Vop cv -// where av, bv, and cv are vector virtual registers, and Vop is a vector op. -// This gets converted to the following : -// a1 = b1 Sop c1 -// a2 = b2 Sop c2 -// -// VectorToScalarMap maintains the vector vreg to scalar vreg mapping. -// For the above example, the map will look as follows: -// av => [a1, a2] -// bv => [b1, b2] -// -// In addition, initVectorInfo creates the following opcode->opcode map. -// Vop => Sop -// OtherVop => OtherSop -// ... -// -// For vector specific instructions like vecbuild, vecshuffle etc, the -// conversion is different. Look at comments near the functions with -// prefix createVec<...>. -// -//===----------------------------------------------------------------------===// - -#include "NVPTX.h" -#include "NVPTXTargetMachine.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Target/TargetInstrInfo.h" - -using namespace llvm; - -namespace { - -class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass { - virtual bool runOnMachineFunction(MachineFunction &F); - - NVPTXTargetMachine &TM; - MachineRegisterInfo *MRI; - const NVPTXRegisterInfo *RegInfo; - const NVPTXInstrInfo *InstrInfo; - - llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *> - RegClassMap; - llvm::DenseMap<unsigned, bool> SimpleMoveMap; - - llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap; - - bool isVectorInstr(MachineInstr *); - - SmallVector<unsigned, 4> getScalarRegisters(unsigned); - unsigned getScalarVersion(unsigned); - unsigned getScalarVersion(MachineInstr *); - - bool isVectorRegister(unsigned); - const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC); - unsigned numCopiesNeeded(MachineInstr *); - - void createLoadCopy(MachineFunction&, MachineInstr *, - std::vector<MachineInstr *>&); - void createStoreCopy(MachineFunction&, MachineInstr *, - std::vector<MachineInstr *>&); - - void createVecDest(MachineFunction&, MachineInstr *, - std::vector<MachineInstr *>&); - - void createCopies(MachineFunction&, MachineInstr *, - std::vector<MachineInstr *>&); - - unsigned copyProp(MachineFunction&); - unsigned removeDeadMoves(MachineFunction&); - - void elementize(MachineFunction&); - - bool isSimpleMove(MachineInstr *); - - void createVecShuffle(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies); - - void createVecExtract(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies); - - void createVecInsert(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies); - - void createVecBuild(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies); - -public: - - static char ID; // Pass identification, replacement for typeid - VectorElementize(NVPTXTargetMachine &tm) - : MachineFunctionPass(ID), TM(tm) {} - - virtual const char *getPassName() const { - return "Convert LLVM vector types to their element types"; - } -}; - -char VectorElementize::ID = 1; -} - -static cl::opt<bool> -RemoveRedundantMoves("nvptx-remove-redundant-moves", - cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"), - cl::init(true)); - -#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \ - >> NVPTX::VecInstTypeShift) -#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP) -#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad) -#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore) -#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild) -#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle) -#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract) -#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert) -#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest) - -bool VectorElementize::isSimpleMove(MachineInstr *mi) { - if (mi->isCopy()) - return true; - unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask) - >> NVPTX::SimpleMoveShift; - return (TSFlags == 1); -} - -bool VectorElementize::isVectorInstr(MachineInstr *mi) { - if ((mi->getOpcode() == NVPTX::PHI) || - (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) { - MachineOperand dest = mi->getOperand(0); - return isVectorRegister(dest.getReg()); - } - return ISVECINST(mi); -} - -unsigned VectorElementize::getScalarVersion(MachineInstr *mi) { - return getScalarVersion(mi->getOpcode()); -} - -///============================================================================= -///Instr is assumed to be a vector instruction. For most vector instructions, -///the size of the destination vector register gives the number of scalar copies -///needed. For VecStore, size of getOperand(1) gives the number of scalar copies -///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the -///number of scalar copies needed. -///============================================================================= -unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) { - unsigned numDefs=0; - unsigned def; - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { - MachineOperand oper = Instr->getOperand(i); - - if (!oper.isReg()) continue; - if (!oper.isDef()) continue; - def = i; - numDefs++; - } - assert((numDefs <= 1) && "Only 0 or 1 defs supported"); - - if (numDefs == 1) { - unsigned regnum = Instr->getOperand(def).getReg(); - if (ISVECEXTRACT(Instr)) - regnum = Instr->getOperand(1).getReg(); - return getNVPTXVectorSize(MRI->getRegClass(regnum)); - } - else if (numDefs == 0) { - assert(ISVECSTORE(Instr) - && "Only 0 def instruction supported is vector store"); - - unsigned regnum = Instr->getOperand(0).getReg(); - return getNVPTXVectorSize(MRI->getRegClass(regnum)); - } - return 1; -} - -const TargetRegisterClass *VectorElementize:: -getScalarRegClass(const TargetRegisterClass *RC) { - assert(isNVPTXVectorRegClass(RC) && - "Not a vector register class"); - return getNVPTXElemClass(RC); -} - -bool VectorElementize::isVectorRegister(unsigned reg) { - const TargetRegisterClass *RC=MRI->getRegClass(reg); - return isNVPTXVectorRegClass(RC); -} - -///============================================================================= -///For every vector register 'v' that is not already in the VectorToScalarMap, -///create n scalar registers of the corresponding element type, where n -///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap. -///============================================================================= -SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) { - assert(isVectorRegister(regnum) && "Expecting a vector register here"); - // Create the scalar registers and put them in the map, if not already there. - if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) { - const TargetRegisterClass *vecClass = MRI->getRegClass(regnum); - const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass); - - SmallVector<unsigned, 4> temp; - - for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i) - temp.push_back(MRI->createVirtualRegister(scalarClass)); - - VectorToScalarMap[regnum] = temp; - } - return VectorToScalarMap[regnum]; -} - -///============================================================================= -///For a vector load of the form -///va <= ldv2 [addr] -///the following multi output instruction is created : -///[v1, v2] <= LD [addr] -///Look at NVPTXVector.td for the definitions of multi output loads. -///============================================================================= -void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - copies.push_back(F.CloneMachineInstr(Instr)); - - MachineInstrBuilder copy(F, copies[0]); - copy->setDesc(InstrInfo->get(getScalarVersion(copy))); - - // Remove the dest, that should be a vector operand. - MachineOperand dest = copy->getOperand(0); - unsigned regnum = dest.getReg(); - - SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); - copy->RemoveOperand(0); - - std::vector<MachineOperand> otherOperands; - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - otherOperands.push_back(copy->getOperand(i)); - - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - copy->RemoveOperand(0); - - for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) - copy.addReg(scalarRegs[i], RegState::Define); - - for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy.addOperand(otherOperands[i]); - -} - -///============================================================================= -///For a vector store of the form -///stv2 va, [addr] -///the following multi input instruction is created : -///ST v1, v2, [addr] -///Look at NVPTXVector.td for the definitions of multi input stores. -///============================================================================= -void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - copies.push_back(F.CloneMachineInstr(Instr)); - - MachineInstrBuilder copy(F, copies[0]); - copy->setDesc(InstrInfo->get(getScalarVersion(copy))); - - MachineOperand src = copy->getOperand(0); - unsigned regnum = src.getReg(); - - SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); - copy->RemoveOperand(0); - - std::vector<MachineOperand> otherOperands; - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - otherOperands.push_back(copy->getOperand(i)); - - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - copy->RemoveOperand(0); - - for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) - copy.addReg(scalarRegs[i]); - - for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy.addOperand(otherOperands[i]); -} - -///============================================================================= -///va <= shufflev2 vb, vc, <i1>, <i2> -///gets converted to 2 moves into a1 and a2. The source of the moves depend on -///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For -///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move -///instructions will be -///a1 <= c2 -///a2 <= b1 -///============================================================================= -void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - unsigned numcopies=numCopiesNeeded(Instr); - - unsigned destregnum = Instr->getOperand(0).getReg(); - unsigned src1regnum = Instr->getOperand(1).getReg(); - unsigned src2regnum = Instr->getOperand(2).getReg(); - - SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); - SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum); - SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum); - - DebugLoc DL = Instr->getDebugLoc(); - - for (unsigned i=0; i<numcopies; i++) { - MachineInstrBuilder copy = - BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]); - MachineOperand which=Instr->getOperand(3+i); - assert(which.isImm() && "Shuffle operand not a constant"); - - int src=which.getImm(); - int elem=src%numcopies; - - if (which.getImm() < numcopies) - copy.addReg(src1[elem]); - else - copy.addReg(src2[elem]); - copies.push_back(copy); - } -} - -///============================================================================= -///a <= extractv2 va, <i1> -///gets turned into a simple move to the scalar register a. The source depends -///on i1. -///============================================================================= -void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - unsigned srcregnum = Instr->getOperand(1).getReg(); - - SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum); - - MachineOperand which = Instr->getOperand(2); - assert(which.isImm() && "Extract operand not a constant"); - - DebugLoc DL = Instr->getDebugLoc(); - copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), - Instr->getOperand(0).getReg()) - .addReg(src[which.getImm()])); -} - -///============================================================================= -///va <= vecinsertv2 vb, c, <i1> -///This instruction copies all elements of vb to va, except the 'i1'th element. -///The scalar value c becomes the 'i1'th element of va. -///This gets translated to 2 (4 for vecinsertv4) moves. -///============================================================================= -void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - unsigned numcopies=numCopiesNeeded(Instr); - - unsigned destregnum = Instr->getOperand(0).getReg(); - unsigned srcregnum = Instr->getOperand(1).getReg(); - - SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); - SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum); - - MachineOperand which=Instr->getOperand(3); - assert(which.isImm() && "Insert operand not a constant"); - unsigned int elem=which.getImm(); - - DebugLoc DL = Instr->getDebugLoc(); - - for (unsigned i=0; i<numcopies; i++) { - MachineInstrBuilder copy = - BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]); - - if (i != elem) - copy.addReg(src[i]); - else - copy.addOperand(Instr->getOperand(2)); - - copies.push_back(copy); - } - -} - -///============================================================================= -///va <= buildv2 b1, b2 -///gets translated to -///a1 <= b1 -///a2 <= b2 -///============================================================================= -void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - unsigned numcopies=numCopiesNeeded(Instr); - - unsigned destregnum = Instr->getOperand(0).getReg(); - - SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); - - DebugLoc DL = Instr->getDebugLoc(); - - for (unsigned i=0; i<numcopies; i++) - copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), - dest[i]) - .addOperand(Instr->getOperand(1+i))); -} - -///============================================================================= -///For a tex inst of the form -///va <= op [scalar operands] -///the following multi output instruction is created : -///[v1, v2] <= op' [scalar operands] -///============================================================================= -void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - copies.push_back(F.CloneMachineInstr(Instr)); - - MachineInstrBuilder copy(F, copies[0]); - copy->setDesc(InstrInfo->get(getScalarVersion(copy))); - - // Remove the dest, that should be a vector operand. - MachineOperand dest = copy->getOperand(0); - unsigned regnum = dest.getReg(); - - SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); - copy->RemoveOperand(0); - - std::vector<MachineOperand> otherOperands; - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - otherOperands.push_back(copy->getOperand(i)); - - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - copy->RemoveOperand(0); - - for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) - copy.addReg(scalarRegs[i], RegState::Define); - - for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy.addOperand(otherOperands[i]); -} - -///============================================================================= -///Look at the vector instruction type and dispatch to the createVec<...> -///function that creates the scalar copies. -///============================================================================= -void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - if (ISVECLOAD(Instr)) { - createLoadCopy(F, Instr, copies); - return; - } - if (ISVECSTORE(Instr)) { - createStoreCopy(F, Instr, copies); - return; - } - if (ISVECSHUFFLE(Instr)) { - createVecShuffle(F, Instr, copies); - return; - } - if (ISVECEXTRACT(Instr)) { - createVecExtract(F, Instr, copies); - return; - } - if (ISVECINSERT(Instr)) { - createVecInsert(F, Instr, copies); - return; - } - if (ISVECDEST(Instr)) { - createVecDest(F, Instr, copies); - return; - } - if (ISVECBUILD(Instr)) { - createVecBuild(F, Instr, copies); - return; - } - - unsigned numcopies=numCopiesNeeded(Instr); - - for (unsigned i=0; i<numcopies; ++i) - copies.push_back(F.CloneMachineInstr(Instr)); - - for (unsigned i=0; i<numcopies; ++i) { - MachineInstrBuilder copy(F, copies[i]); - - std::vector<MachineOperand> allOperands; - std::vector<bool> isDef; - - for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) { - MachineOperand oper = copy->getOperand(j); - allOperands.push_back(oper); - if (oper.isReg()) - isDef.push_back(oper.isDef()); - else - isDef.push_back(false); - } - - for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) - copy->RemoveOperand(0); - - copy->setDesc(InstrInfo->get(getScalarVersion(Instr))); - - for (unsigned j=0, e=allOperands.size(); j!=e; ++j) { - MachineOperand oper=allOperands[j]; - if (oper.isReg()) { - unsigned regnum = oper.getReg(); - if (isVectorRegister(regnum)) { - - SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); - copy.addReg(scalarRegs[i], getDefRegState(isDef[j])); - } - else - copy.addOperand(oper); - } - else - copy.addOperand(oper); - } - } -} - -///============================================================================= -///Scan through all basic blocks, looking for vector instructions. -///For each vector instruction I, insert the scalar copies before I, and -///add I into toRemove vector. Finally remove all instructions in toRemove. -///============================================================================= -void VectorElementize::elementize(MachineFunction &F) { - for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); - BI!=BE; ++BI) { - MachineBasicBlock *BB = &*BI; - - std::vector<MachineInstr *> copies; - std::vector<MachineInstr *> toRemove; - - for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); - II!=IE; ++II) { - MachineInstr *Instr = &*II; - - if (!isVectorInstr(Instr)) - continue; - - copies.clear(); - createCopies(F, Instr, copies); - for (unsigned i=0, e=copies.size(); i!=e; ++i) - BB->insert(II, copies[i]); - - assert((copies.size() > 0) && "Problem in createCopies"); - toRemove.push_back(Instr); - } - for (unsigned i=0, e=toRemove.size(); i!=e; ++i) - F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i])); - } -} - -///============================================================================= -///a <= b -///... -///... -///x <= op(a, ...) -///gets converted to -/// -///x <= op(b, ...) -///The original move is still present. This works on SSA form machine code. -///Note that a <= b should be a simple vreg-to-vreg move instruction. -///TBD : I didn't find a function that can do replaceOperand, so I remove -///all operands and add all of them again, replacing the one while adding. -///============================================================================= -unsigned VectorElementize::copyProp(MachineFunction &F) { - unsigned numReplacements = 0; - - for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; - ++BI) { - MachineBasicBlock *BB = &*BI; - - for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; - ++II) { - MachineInstr *Instr = &*II; - - // Don't do copy propagation on PHI as it will cause unnecessary - // live range overlap. - if ((Instr->getOpcode() == TargetOpcode::PHI) || - (Instr->getOpcode() == TargetOpcode::DBG_VALUE)) - continue; - - bool needsReplacement = false; - - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { - MachineOperand oper = Instr->getOperand(i); - if (!oper.isReg()) continue; - if (oper.isDef()) continue; - if (!RegInfo->isVirtualRegister(oper.getReg())) continue; - - MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); - - if (!defInstr) continue; - - if (!isSimpleMove(defInstr)) continue; - - MachineOperand defSrc = defInstr->getOperand(1); - if (!defSrc.isReg()) continue; - if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue; - - needsReplacement = true; - - } - if (!needsReplacement) continue; - - numReplacements++; - - std::vector<MachineOperand> operands; - - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { - MachineOperand oper = Instr->getOperand(i); - bool flag = false; - do { - if (!(oper.isReg())) - break; - if (oper.isDef()) - break; - if (!(RegInfo->isVirtualRegister(oper.getReg()))) - break; - MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); - if (!(isSimpleMove(defInstr))) - break; - MachineOperand defSrc = defInstr->getOperand(1); - if (!(defSrc.isReg())) - break; - if (!(RegInfo->isVirtualRegister(defSrc.getReg()))) - break; - operands.push_back(defSrc); - flag = true; - } while (0); - if (flag == false) - operands.push_back(oper); - } - - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) - Instr->RemoveOperand(0); - for (unsigned i=0, e=operands.size(); i!=e; ++i) - Instr->addOperand(F, operands[i]); - - } - } - return numReplacements; -} - -///============================================================================= -///Look for simple vreg-to-vreg instructions whose use_empty() is true, add -///them to deadMoves vector. Then remove all instructions in deadMoves. -///============================================================================= -unsigned VectorElementize::removeDeadMoves(MachineFunction &F) { - std::vector<MachineInstr *> deadMoves; - for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; - ++BI) { - MachineBasicBlock *BB = &*BI; - - for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; - ++II) { - MachineInstr *Instr = &*II; - - if (!isSimpleMove(Instr)) continue; - - MachineOperand dest = Instr->getOperand(0); - assert(dest.isReg() && "dest of move not a register"); - assert(RegInfo->isVirtualRegister(dest.getReg()) && - "dest of move not a virtual register"); - - if (MRI->use_empty(dest.getReg())) { - deadMoves.push_back(Instr); - } - } - } - - for (unsigned i=0, e=deadMoves.size(); i!=e; ++i) - F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i])); - - return deadMoves.size(); -} - -///============================================================================= -///Main function for this pass. -///============================================================================= -bool VectorElementize::runOnMachineFunction(MachineFunction &F) { - MRI = &F.getRegInfo(); - - RegInfo = TM.getRegisterInfo(); - InstrInfo = TM.getInstrInfo(); - - VectorToScalarMap.clear(); - - elementize(F); - - if (RemoveRedundantMoves) - while (1) { - if (copyProp(F) == 0) break; - removeDeadMoves(F); - } - - return true; -} - -FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) { - return new VectorElementize(tm); -} - -unsigned VectorElementize::getScalarVersion(unsigned opcode) { - if (opcode == NVPTX::PHI) - return opcode; - if (opcode == NVPTX::IMPLICIT_DEF) - return opcode; - switch(opcode) { - default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td"); - case TargetOpcode::COPY: return TargetOpcode::COPY; - case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr; - case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr; - case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr; - case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr; - case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr; - case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr; - case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr; - case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr; - case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr; - case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr; - case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr; - case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr; - case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr; - case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr; - case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32; - case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32; - case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32; - case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32; - case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32; - case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr; - case NVPTX::F32MADV2: return NVPTX::FMAD32rrr; - case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr; - case NVPTX::F32MADV4: return NVPTX::FMAD32rrr; - case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr; - case NVPTX::F32FMAV2: return NVPTX::FMA32rrr; - case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr; - case NVPTX::F32FMAV4: return NVPTX::FMA32rrr; - case NVPTX::F64FMAV2: return NVPTX::FMA64rrr; - case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32; - case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64; - case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32; - case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32; - case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64; - case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32; - case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32; - case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64; - case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32; - case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32; - case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64; - case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32; - case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32; - case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64; - case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32; - case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32; - case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64; - case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32; - case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32; - case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64; - case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32; - case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32; - case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64; - case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32; - case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32; - case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64; - case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32; - case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32; - case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64; - case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32; - case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32; - case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64; - case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32; - case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32; - case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64; - case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32; - case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32; - case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64; - case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32; - case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32; - case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64; - case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32; - case NVPTX::I16MADV2: return NVPTX::MAD16rrr; - case NVPTX::I16MADV4: return NVPTX::MAD16rrr; - case NVPTX::I32MADV2: return NVPTX::MAD32rrr; - case NVPTX::I32MADV4: return NVPTX::MAD32rrr; - case NVPTX::I64MADV2: return NVPTX::MAD64rrr; - case NVPTX::I8MADV2: return NVPTX::MAD8rrr; - case NVPTX::I8MADV4: return NVPTX::MAD8rrr; - case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr; - case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr; - case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr; - case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr; - case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr; - case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr; - case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr; - case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr; - case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr; - case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr; - case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr; - case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr; - case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr; - case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr; - case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr; - case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr; - case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr; - case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr; - case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr; - case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr; - case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr; - case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr; - case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr; - case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr; - case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr; - case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; - case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec; - case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz; - case NVPTX::V2F32Div: return NVPTX::FDIV32rr; - case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr; - case NVPTX::V2F64Div: return NVPTX::FDIV64rr; - case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr; - case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr; - case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr; - case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr; - case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr; - case NVPTX::V2f32Extract: return NVPTX::FMOV32rr; - case NVPTX::V2f32Insert: return NVPTX::FMOV32rr; - case NVPTX::V2f32Mov: return NVPTX::FMOV32rr; - case NVPTX::V2f64Extract: return NVPTX::FMOV64rr; - case NVPTX::V2f64Insert: return NVPTX::FMOV64rr; - case NVPTX::V2f64Mov: return NVPTX::FMOV64rr; - case NVPTX::V2i16Extract: return NVPTX::IMOV16rr; - case NVPTX::V2i16Insert: return NVPTX::IMOV16rr; - case NVPTX::V2i16Mov: return NVPTX::IMOV16rr; - case NVPTX::V2i32Extract: return NVPTX::IMOV32rr; - case NVPTX::V2i32Insert: return NVPTX::IMOV32rr; - case NVPTX::V2i32Mov: return NVPTX::IMOV32rr; - case NVPTX::V2i64Extract: return NVPTX::IMOV64rr; - case NVPTX::V2i64Insert: return NVPTX::IMOV64rr; - case NVPTX::V2i64Mov: return NVPTX::IMOV64rr; - case NVPTX::V2i8Extract: return NVPTX::IMOV8rr; - case NVPTX::V2i8Insert: return NVPTX::IMOV8rr; - case NVPTX::V2i8Mov: return NVPTX::IMOV8rr; - case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; - case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec; - case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz; - case NVPTX::V4F32Div: return NVPTX::FDIV32rr; - case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr; - case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr; - case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr; - case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr; - case NVPTX::V4f32Extract: return NVPTX::FMOV32rr; - case NVPTX::V4f32Insert: return NVPTX::FMOV32rr; - case NVPTX::V4f32Mov: return NVPTX::FMOV32rr; - case NVPTX::V4i16Extract: return NVPTX::IMOV16rr; - case NVPTX::V4i16Insert: return NVPTX::IMOV16rr; - case NVPTX::V4i16Mov: return NVPTX::IMOV16rr; - case NVPTX::V4i32Extract: return NVPTX::IMOV32rr; - case NVPTX::V4i32Insert: return NVPTX::IMOV32rr; - case NVPTX::V4i32Mov: return NVPTX::IMOV32rr; - case NVPTX::V4i8Extract: return NVPTX::IMOV8rr; - case NVPTX::V4i8Insert: return NVPTX::IMOV8rr; - case NVPTX::V4i8Mov: return NVPTX::IMOV8rr; - case NVPTX::VAddV2I16: return NVPTX::ADDi16rr; - case NVPTX::VAddV2I32: return NVPTX::ADDi32rr; - case NVPTX::VAddV2I64: return NVPTX::ADDi64rr; - case NVPTX::VAddV2I8: return NVPTX::ADDi8rr; - case NVPTX::VAddV4I16: return NVPTX::ADDi16rr; - case NVPTX::VAddV4I32: return NVPTX::ADDi32rr; - case NVPTX::VAddV4I8: return NVPTX::ADDi8rr; - case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr; - case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz; - case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr; - case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr; - case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz; - case NVPTX::VAndV2I16: return NVPTX::ANDb16rr; - case NVPTX::VAndV2I32: return NVPTX::ANDb32rr; - case NVPTX::VAndV2I64: return NVPTX::ANDb64rr; - case NVPTX::VAndV2I8: return NVPTX::ANDb8rr; - case NVPTX::VAndV4I16: return NVPTX::ANDb16rr; - case NVPTX::VAndV4I32: return NVPTX::ANDb32rr; - case NVPTX::VAndV4I8: return NVPTX::ANDb8rr; - case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz; - case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr; - case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr; - case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz; - case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr; - case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr; - case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr; - case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr; - case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr; - case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr; - case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr; - case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr; - case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr; - case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr; - case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr; - case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr; - case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr; - case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr; - case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr; - case NVPTX::VMultV2I16: return NVPTX::MULTi16rr; - case NVPTX::VMultV2I32: return NVPTX::MULTi32rr; - case NVPTX::VMultV2I64: return NVPTX::MULTi64rr; - case NVPTX::VMultV2I8: return NVPTX::MULTi8rr; - case NVPTX::VMultV4I16: return NVPTX::MULTi16rr; - case NVPTX::VMultV4I32: return NVPTX::MULTi32rr; - case NVPTX::VMultV4I8: return NVPTX::MULTi8rr; - case NVPTX::VNegV2I16: return NVPTX::INEG16; - case NVPTX::VNegV2I32: return NVPTX::INEG32; - case NVPTX::VNegV2I64: return NVPTX::INEG64; - case NVPTX::VNegV2I8: return NVPTX::INEG8; - case NVPTX::VNegV4I16: return NVPTX::INEG16; - case NVPTX::VNegV4I32: return NVPTX::INEG32; - case NVPTX::VNegV4I8: return NVPTX::INEG8; - case NVPTX::VNegv2f32: return NVPTX::FNEGf32; - case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz; - case NVPTX::VNegv2f64: return NVPTX::FNEGf64; - case NVPTX::VNegv4f32: return NVPTX::FNEGf32; - case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz; - case NVPTX::VNotV2I16: return NVPTX::NOT16; - case NVPTX::VNotV2I32: return NVPTX::NOT32; - case NVPTX::VNotV2I64: return NVPTX::NOT64; - case NVPTX::VNotV2I8: return NVPTX::NOT8; - case NVPTX::VNotV4I16: return NVPTX::NOT16; - case NVPTX::VNotV4I32: return NVPTX::NOT32; - case NVPTX::VNotV4I8: return NVPTX::NOT8; - case NVPTX::VOrV2I16: return NVPTX::ORb16rr; - case NVPTX::VOrV2I32: return NVPTX::ORb32rr; - case NVPTX::VOrV2I64: return NVPTX::ORb64rr; - case NVPTX::VOrV2I8: return NVPTX::ORb8rr; - case NVPTX::VOrV4I16: return NVPTX::ORb16rr; - case NVPTX::VOrV4I32: return NVPTX::ORb32rr; - case NVPTX::VOrV4I8: return NVPTX::ORb8rr; - case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr; - case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr; - case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr; - case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr; - case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr; - case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr; - case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr; - case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr; - case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr; - case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr; - case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr; - case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr; - case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr; - case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr; - case NVPTX::VSubV2I16: return NVPTX::SUBi16rr; - case NVPTX::VSubV2I32: return NVPTX::SUBi32rr; - case NVPTX::VSubV2I64: return NVPTX::SUBi64rr; - case NVPTX::VSubV2I8: return NVPTX::SUBi8rr; - case NVPTX::VSubV4I16: return NVPTX::SUBi16rr; - case NVPTX::VSubV4I32: return NVPTX::SUBi32rr; - case NVPTX::VSubV4I8: return NVPTX::SUBi8rr; - case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz; - case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr; - case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr; - case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz; - case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr; - case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr; - case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr; - case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr; - case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr; - case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr; - case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr; - case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr; - case NVPTX::VURemV2I16: return NVPTX::UREMi16rr; - case NVPTX::VURemV2I32: return NVPTX::UREMi32rr; - case NVPTX::VURemV2I64: return NVPTX::UREMi64rr; - case NVPTX::VURemV2I8: return NVPTX::UREMi8rr; - case NVPTX::VURemV4I16: return NVPTX::UREMi16rr; - case NVPTX::VURemV4I32: return NVPTX::UREMi32rr; - case NVPTX::VURemV4I8: return NVPTX::UREMi8rr; - case NVPTX::VXorV2I16: return NVPTX::XORb16rr; - case NVPTX::VXorV2I32: return NVPTX::XORb32rr; - case NVPTX::VXorV2I64: return NVPTX::XORb64rr; - case NVPTX::VXorV2I8: return NVPTX::XORb8rr; - case NVPTX::VXorV4I16: return NVPTX::XORb16rr; - case NVPTX::VXorV4I32: return NVPTX::XORb32rr; - case NVPTX::VXorV4I8: return NVPTX::XORb8rr; - case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16; - case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32; - case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64; - case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8; - case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16; - case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32; - case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8; - case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16; - case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32; - case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64; - case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8; - case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16; - case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32; - case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8; - case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16; - case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32; - case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64; - case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8; - case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16; - case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32; - case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8; - case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16; - case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32; - case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64; - case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8; - case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16; - case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32; - case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8; - case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16; - case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32; - case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64; - case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8; - case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16; - case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32; - case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8; - case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16; - case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32; - case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64; - case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8; - case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16; - case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32; - case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8; - case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr; - case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr; - case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr; - case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr; - case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr; - case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr; - case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr; - case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr; - case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr; - case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr; - case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16; - case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32; - case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64; - case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8; - case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16; - case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32; - case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8; - case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16; - case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32; - case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64; - case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8; - case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16; - case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32; - case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8; - case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16; - case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32; - case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64; - case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8; - case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16; - case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32; - case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8; - case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16; - case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32; - case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64; - case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8; - case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16; - case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32; - case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8; - case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16; - case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32; - case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64; - case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8; - case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16; - case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32; - case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8; - case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16; - case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32; - case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64; - case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8; - case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16; - case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32; - case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8; - case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; - - case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32; - case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16; - case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8; - case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64; - case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32; - case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16; - case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8; - case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32; - case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32; - case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64; - case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32; - case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16; - case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8; - case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64; - case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32; - case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16; - case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8; - case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32; - case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32; - case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64; - case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32; - case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16; - case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8; - case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64; - case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32; - case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16; - case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8; - case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32; - case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32; - case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64; - case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8; - case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16; - case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8; - case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16; - case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32; - case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32; - - case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar; - case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg; - case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari; - case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi; - case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar; - case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg; - case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari; - case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi; - - case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar; - case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg; - case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari; - case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi; - case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar; - case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg; - case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari; - case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi; - - case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar; - case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg; - case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari; - case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi; - case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar; - case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg; - case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari; - case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi; - - case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar; - case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg; - case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari; - case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi; - case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar; - case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg; - case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari; - case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi; - - case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar; - case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg; - case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari; - case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi; - case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar; - case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg; - case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari; - case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi; - - case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar; - case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg; - case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari; - case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi; - case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar; - case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg; - case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari; - case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi; - - case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar; - case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg; - case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari; - case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi; - case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar; - case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg; - case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari; - case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi; - - case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar; - case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg; - case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari; - case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi; - case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar; - case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg; - case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari; - case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi; - - case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar; - case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg; - case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari; - case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi; - case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar; - case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg; - case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari; - case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi; - - case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar; - case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg; - case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari; - case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi; - case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar; - case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg; - case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari; - case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi; - } - return 0; -} diff --git a/lib/Target/NVPTX/gen-register-defs.py b/lib/Target/NVPTX/gen-register-defs.py deleted file mode 100644 index ed06668..0000000 --- a/lib/Target/NVPTX/gen-register-defs.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python - -num_regs = 396 - -outFile = open('NVPTXRegisterInfo.td', 'w') - -outFile.write(''' -//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the PTX register file -//===----------------------------------------------------------------------===// - -class NVPTXReg<string n> : Register<n> { - let Namespace = "NVPTX"; -} - -class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList> - : RegisterClass <"NVPTX", regTypes, alignment, regList>; - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// - -// Special Registers used as stack pointer -def VRFrame : NVPTXReg<"%SP">; -def VRFrameLocal : NVPTXReg<"%SPL">; - -// Special Registers used as the stack -def VRDepot : NVPTXReg<"%Depot">; -''') - -# Predicates -outFile.write(''' -//===--- Predicate --------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i)) - -# Int8 -outFile.write(''' -//===--- 8-bit ------------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i)) - -# Int16 -outFile.write(''' -//===--- 16-bit -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i)) - -# Int32 -outFile.write(''' -//===--- 32-bit -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i)) - -# Int64 -outFile.write(''' -//===--- 64-bit -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i)) - -# F32 -outFile.write(''' -//===--- 32-bit float -----------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i)) - -# F64 -outFile.write(''' -//===--- 64-bit float -----------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i)) - -# Vector registers -outFile.write(''' -//===--- Vector -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i)) - -for i in range(0, num_regs): - outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i)) - -# Argument registers -outFile.write(''' -//===--- Arguments --------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i)) - -outFile.write(''' -//===----------------------------------------------------------------------===// -// Register classes -//===----------------------------------------------------------------------===// -''') - -outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1)) - -outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1)) - -outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1)) - -outFile.write(''' -// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. -def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; -''') - -outFile.write(''' -class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList, - NVPTXRegClass sClass, - int e, - string n> - : NVPTXRegClass<regTypes, alignment, regList> -{ - NVPTXRegClass scalarClass=sClass; - int elems=e; - string name=n; -} -''') - - -outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1)) -outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1)) - -outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1)) -outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1)) - -outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1)) -outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1)) - -outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1)) -outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1)) - -outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1)) -outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1)) - -outFile.close() - - -outFile = open('NVPTXNumRegisters.h', 'w') -outFile.write(''' -//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef NVPTX_NUM_REGISTERS_H -#define NVPTX_NUM_REGISTERS_H - -namespace llvm { - -const unsigned NVPTXNumRegisters = %d; - -} - -#endif -''' % num_regs) - -outFile.close() diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 192d18d..6036428 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_target(PowerPCCodeGen PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp + PPCTargetTransformInfo.cpp PPCSelectionDAGInfo.cpp ) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index d61e741..61868d4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -151,7 +151,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, Type = ELF::R_PPC64_TOC; break; case PPC::fixup_ppc_toc16: - Type = ELF::R_PPC64_TOC16; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TPREL16_LO: + Type = ELF::R_PPC64_TPREL16_LO; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: + Type = ELF::R_PPC64_DTPREL16_LO; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC64_TOC16; + break; + case MCSymbolRefExpr::VK_PPC_TOC16_LO: + Type = ELF::R_PPC64_TOC16_LO; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: + Type = ELF::R_PPC64_GOT_TLSLD16_LO; + break; + } break; case PPC::fixup_ppc_toc16_ds: switch (Modifier) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 215aa40..a25d7fe 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -17,8 +17,9 @@ using namespace llvm; void PPCMCAsmInfoDarwin::anchor() { } PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { - if (is64Bit) - PointerSize = 8; + if (is64Bit) { + PointerSize = CalleeSaveStackSlotSize = 8; + } IsLittleEndian = false; PCSymbol = "."; @@ -35,8 +36,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { void PPCLinuxMCAsmInfo::anchor() { } PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { - if (is64Bit) - PointerSize = 8; + if (is64Bit) { + PointerSize = CalleeSaveStackSlotSize = 8; + } IsLittleEndian = false; // ".comm align is in bytes but .align is pow-2." diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index e6d38eb..f71979f 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -25,6 +25,7 @@ namespace llvm { class PPCTargetMachine; class FunctionPass; + class ImmutablePass; class JITCodeEmitter; class MachineInstr; class AsmPrinter; @@ -37,6 +38,9 @@ namespace llvm { JITCodeEmitter &MCE); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); + + /// \brief Creates an PPC-specific Target Transformation Info pass. + ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM); namespace PPCII { @@ -53,26 +57,32 @@ namespace llvm { /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to /// the function's picbase, e.g. lo16(symbol-picbase). - MO_PIC_FLAG = 4, + MO_PIC_FLAG = 2, /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase). - MO_NLP_FLAG = 8, + MO_NLP_FLAG = 4, /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a /// symbol with hidden visibility. This causes a different kind of /// non-lazy-pointer to be generated. - MO_NLP_HIDDEN_FLAG = 16, + MO_NLP_HIDDEN_FLAG = 8, /// The next are not flags but distinct values. - MO_ACCESS_MASK = 0xe0, + MO_ACCESS_MASK = 0xf0, /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol) - MO_LO16 = 1 << 5, - MO_HA16 = 2 << 5, + MO_LO16 = 1 << 4, + MO_HA16 = 2 << 4, + + MO_TPREL16_HA = 3 << 4, + MO_TPREL16_LO = 4 << 4, - MO_TPREL16_HA = 3 << 5, - MO_TPREL16_LO = 4 << 5 + /// These values identify relocations on immediates folded + /// into memory operations. + MO_DTPREL16_LO = 5 << 4, + MO_TLSLD16_LO = 6 << 4, + MO_TOC16_LO = 7 << 4 }; } // end namespace PPCII diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index cb15dad..9929136 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -39,7 +39,12 @@ def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E500mc", "">; def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E5500", "">; +def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">; +def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">; +def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">; +def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; +def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", @@ -58,6 +63,25 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; +def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", + "Enable QPX instructions">; + +// Note: Future features to add when support is extended to more +// recent ISA levels: +// +// CMPB p6, p6x, p7 cmpb +// DFP p6, p6x, p7 decimal floating-point instructions +// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz +// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz +// FRE p5 through p7 fre (vs. fres, available since p3) +// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3) +// LDBRX p7 load with byte reversal +// LFIWAX p6, p6x, p7 lfiwax +// LFIWZX p7 lfiwzx +// POPCNTB p5 through p7 popcntb and related instructions +// POPCNTD p7 popcntd and related instructions +// RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates +// VSX p7 vector-scalar instruction set //===----------------------------------------------------------------------===// // Register File Description @@ -109,10 +133,30 @@ def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureSTFIWX, FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE, + FeatureMFOCRF, FeatureFSqrt, + FeatureSTFIWX, FeatureISEL, + Feature64Bit /*, Feature64BitRegs */, + FeatureQPX]>; +def : Processor<"pwr3", G5Itineraries, + [DirectivePwr3, FeatureAltivec, FeatureMFOCRF, + FeatureSTFIWX, Feature64Bit]>; +def : Processor<"pwr4", G5Itineraries, + [DirectivePwr4, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; +def : Processor<"pwr5", G5Itineraries, + [DirectivePwr5, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; +def : Processor<"pwr5x", G5Itineraries, + [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; def : Processor<"pwr6", G5Itineraries, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"pwr6x", G5Itineraries, + [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index adb673b..eae9b7b 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -464,12 +464,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // associated TOC entry. Otherwise reference the symbol directly. TmpInst.setOpcode(PPC::LDrs); const MachineOperand &MO = MI->getOperand(1); - assert((MO.isGlobal() || MO.isJTI()) && "Invalid operand for LDtocL!"); + assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) && + "Invalid operand for LDtocL!"); MCSymbol *MOSymbol = 0; if (MO.isJTI()) MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); - else { + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); const GlobalValue *RealGValue = GAlias ? @@ -732,14 +735,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function // entry point. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), - 8/*size*/, 0/*addrspace*/); + 8 /*size*/); MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); // Generates a R_PPC64_TOC relocation for TOC base insertion. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, MCSymbolRefExpr::VK_PPC_TOC, OutContext), - 8/*size*/, 0/*addrspace*/); + 8/*size*/); // Emit a null environment pointer. - OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */); + OutStreamer.EmitIntValue(0, 8 /* size */); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -768,6 +771,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { } } + MachineModuleInfoELF &MMIELF = + MMI->getObjFileInfo<MachineModuleInfoELF>(); + + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .long _foo + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + } + + Stubs.clear(); + OutStreamer.AddBlankLine(); + } + return AsmPrinter::doFinalization(M); } @@ -802,7 +824,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppcA2", "ppce500mc", "ppce5500", + "power3", + "power4", + "power5", + "power5x", "power6", + "power6x", "power7", "ppc64" }; @@ -817,8 +844,11 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { assert(Directive <= PPC::DIR_64 && "Directive out of range."); // FIXME: This is a total hack, finish mc'izing the PPC backend. - if (OutStreamer.hasRawTextSupport()) + if (OutStreamer.hasRawTextSupport()) { + assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) && + "CPUDirectives[] might not be up-to-date!"); OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive])); + } // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. @@ -1031,7 +1061,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/); else // Internal to current translation unit. // @@ -1041,7 +1071,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { // fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); @@ -1060,7 +1090,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 9911575..bd1c378 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -28,10 +28,16 @@ using namespace llvm; STATISTIC(NumExpanded, "Number of branches expanded to long format"); +namespace llvm { + void initializePPCBSelPass(PassRegistry&); +} + namespace { struct PPCBSel : public MachineFunctionPass { static char ID; - PPCBSel() : MachineFunctionPass(ID) {} + PPCBSel() : MachineFunctionPass(ID) { + initializePPCBSelPass(*PassRegistry::getPassRegistry()); + } /// BlockSizes - The sizes of the basic blocks in the function. std::vector<unsigned> BlockSizes; @@ -45,6 +51,9 @@ namespace { char PPCBSel::ID = 0; } +INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector", + false, false) + /// createPPCBranchSelectionPass - returns an instance of the Branch Selection /// Pass /// diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index a74932c..b98cc48 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -54,6 +54,10 @@ using namespace llvm; STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); +namespace llvm { + void initializePPCCTRLoopsPass(PassRegistry&); +} + namespace { class CountValue; struct PPCCTRLoops : public MachineFunctionPass { @@ -64,7 +68,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - PPCCTRLoops() : MachineFunctionPass(ID) {} + PPCCTRLoops() : MachineFunctionPass(ID) { + initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -174,6 +180,12 @@ namespace { }; } // end anonymous namespace +INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", + false, false) /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 3f87e88..caeb179 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -27,9 +27,10 @@ def RetCC_PPC : CallingConv<[ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, - CCIfType<[f32], CCAssignToReg<[F1]>>, - CCIfType<[f64], CCAssignToReg<[F1, F2]>>, + CCIfType<[f32], CCAssignToReg<[F1, F2]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, // Vector types are always returned in V2. CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> @@ -37,49 +38,20 @@ def RetCC_PPC : CallingConv<[ //===----------------------------------------------------------------------===// -// PowerPC Argument Calling Conventions -//===----------------------------------------------------------------------===// -/* -def CC_PPC : CallingConv<[ - // The first 8 integer arguments are passed in integer registers. - CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, - CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, - - // Common sub-targets passes FP values in F1 - F13 - CCIfType<[f32, f64], - CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>, - - // The first 12 Vector arguments are passed in altivec registers. - CCIfType<[v16i8, v8i16, v4i32, v4f32], - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>> - -/* - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToStack<16, 16>>*/ -]>; - -*/ - -//===----------------------------------------------------------------------===// -// PowerPC System V Release 4 ABI +// PowerPC System V Release 4 32-bit ABI //===----------------------------------------------------------------------===// -def CC_PPC_SVR4_Common : CallingConv<[ +def CC_PPC32_SVR4_Common : CallingConv<[ // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. - CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>, + CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>, // The first 8 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, // Make sure the i64 words from a long double are either both passed in // registers or both passed on the stack. - CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>, + CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>, // FP values are passed in F1 - F8. CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, @@ -100,18 +72,18 @@ def CC_PPC_SVR4_Common : CallingConv<[ // This calling convention puts vector arguments always on the stack. It is used // to assign vector arguments which belong to the variable portion of the // parameter list of a variable argument function. -def CC_PPC_SVR4_VarArg : CallingConv<[ - CCDelegateTo<CC_PPC_SVR4_Common> +def CC_PPC32_SVR4_VarArg : CallingConv<[ + CCDelegateTo<CC_PPC32_SVR4_Common> ]>; -// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put -// vector arguments in vector registers before putting them on the stack. -def CC_PPC_SVR4 : CallingConv<[ +// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to +// put vector arguments in vector registers before putting them on the stack. +def CC_PPC32_SVR4 : CallingConv<[ // The first 12 Vector arguments are passed in AltiVec registers. CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, - CCDelegateTo<CC_PPC_SVR4_Common> + CCDelegateTo<CC_PPC32_SVR4_Common> ]>; // Helper "calling convention" to handle aggregate by value arguments. @@ -122,15 +94,15 @@ def CC_PPC_SVR4 : CallingConv<[ // Still, the address of the aggregate copy in the callers stack frame is passed // in a GPR (or in the parameter list area if all GPRs are allocated) from the // caller to the callee. The location for the address argument is assigned by -// the CC_PPC_SVR4 calling convention. +// the CC_PPC32_SVR4 calling convention. // -// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are +// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are // not passed by value. -def CC_PPC_SVR4_ByVal : CallingConv<[ +def CC_PPC32_SVR4_ByVal : CallingConv<[ CCIfByVal<CCPassByVal<4, 4>>, - CCCustom<"CC_PPC_SVR4_Custom_Dummy"> + CCCustom<"CC_PPC32_SVR4_Custom_Dummy"> ]>; def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 5901f36..0a396e6 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -119,12 +119,21 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) { - unsigned RegNo = getPPCRegisterNumbering(*I); - if (VRRegNo[RegNo] == *I) // If this really is a vector reg. - UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. + + // Live out registers appear as use operands on return instructions. + for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); + UsedRegMask != 0 && BI != BE; ++BI) { + const MachineBasicBlock &MBB = *BI; + if (MBB.empty() || !MBB.back().isReturn()) + continue; + const MachineInstr &Ret = MBB.back(); + for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = Ret.getOperand(I); + if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) + continue; + unsigned RegNo = getPPCRegisterNumbering(MO.getReg()); + UsedRegMask &= ~(1 << (31-RegNo)); + } } // If no registers are used, turn this into a copy. @@ -198,13 +207,14 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // to adjust the stack pointer (we fit in the Red Zone). For 64-bit // SVR4, we also require a stack frame if we need to spill the CR, // since this spill area is addressed relative to the stack pointer. + // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate + // stackless code if all local vars are reg-allocated. bool DisableRedZone = MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone); - // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can - // still generate stackless code if all local vars are reg-allocated. - // Try: (FrameSize <= 224 - // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI())) if (!DisableRedZone && + (Subtarget.isPPC64() || // 32-bit SVR4, no stack- + !Subtarget.isSVR4ABI() || // allocated locals. + FrameSize == 0) && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. @@ -777,7 +787,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned LR = RegInfo->getRARegister(); FI->setMustSaveLR(MustSaveLR(MF, LR)); - MF.getRegInfo().setPhysRegUnused(LR); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.setPhysRegUnused(LR); // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); @@ -802,6 +813,16 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } + // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the + // function uses CR 2, 3, or 4. + if (!isPPC64 && !isDarwinABI && + (MRI.isPhysRegUsed(PPC::CR2) || + MRI.isPhysRegUsed(PPC::CR3) || + MRI.isPhysRegUsed(PPC::CR4))) { + int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); + FI->setCRSpillFrameIndex(FrameIdx); + } + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or // a large stack, which will require scavenging a register to materialize a // large offset. @@ -1115,6 +1136,47 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, .addReg(MoveReg)); } +void PPCFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + if (MF.getTarget().Options.GuaranteedTailCallOpt && + I->getOpcode() == PPC::ADJCALLSTACKUP) { + // Add (actually subtract) back the amount the callee popped on return. + if (int CalleeAmt = I->getOperand(1).getImm()) { + bool is64Bit = Subtarget.isPPC64(); + CalleeAmt *= -1; + unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; + unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; + MachineInstr *MI = I; + DebugLoc dl = MI->getDebugLoc(); + + if (isInt<16>(CalleeAmt)) { + BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) + .addReg(StackReg, RegState::Kill) + .addImm(CalleeAmt); + } else { + MachineBasicBlock::iterator MBBI = I; + BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + .addImm(CalleeAmt >> 16); + BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, RegState::Kill) + .addImm(CalleeAmt & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) + .addReg(StackReg, RegState::Kill) + .addReg(TmpReg); + } + } + } + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + bool PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 3517d8c..d09e47f 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -27,7 +27,8 @@ class PPCFrameLowering: public TargetFrameLowering { public: PPCFrameLowering(const PPCSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0), + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, + (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0), Subtarget(sti) { } @@ -50,6 +51,10 @@ public: const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 762b346..17bea8a 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -23,9 +23,9 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -34,6 +34,10 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +namespace llvm { + void initializePPCDAGToDAGISelPass(PassRegistry&); +} + namespace { //===--------------------------------------------------------------------===// /// PPCDAGToDAGISel - PPC specific code to select PPC machine @@ -48,7 +52,9 @@ namespace { explicit PPCDAGToDAGISel(PPCTargetMachine &tm) : SelectionDAGISel(tm), TM(tm), PPCLowering(*TM.getTargetLowering()), - PPCSubTarget(*TM.getSubtargetImpl()) {} + PPCSubTarget(*TM.getSubtargetImpl()) { + initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF) { // Make sure we re-emit a set of the global base reg if necessary @@ -61,6 +67,8 @@ namespace { return true; } + virtual void PostprocessISelDAG(); + /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -1273,16 +1281,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case PPCISD::TOC_ENTRY: { assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); - // For medium code model, we generate two instructions as described - // below. Otherwise we allow SelectCodeCommon to handle this, selecting - // one of LDtoc, LDtocJTI, and LDtocCPT. - if (TM.getCodeModel() != CodeModel::Medium) + // For medium and large code model, we generate two instructions as + // described below. Otherwise we allow SelectCodeCommon to handle this, + // selecting one of LDtoc, LDtocJTI, and LDtocCPT. + CodeModel::Model CModel = TM.getCodeModel(); + if (CModel != CodeModel::Medium && CModel != CodeModel::Large) break; // The first source operand is a TargetGlobalAddress or a // TargetJumpTable. If it is an externally defined symbol, a symbol // with common linkage, a function address, or a jump table address, - // we generate: + // or if we are generating code for large code model, we generate: // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) // Otherwise we generate: // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) @@ -1291,7 +1300,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, TOCbase, GA); - if (isa<JumpTableSDNode>(GA)) + if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); @@ -1316,11 +1325,231 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA); } + case PPCISD::VADD_SPLAT: { + // This expands into one of three sequences, depending on whether + // the first operand is odd or even, positive or negative. + assert(isa<ConstantSDNode>(N->getOperand(0)) && + isa<ConstantSDNode>(N->getOperand(1)) && + "Invalid operand on VADD_SPLAT!"); + + int Elt = N->getConstantOperandVal(0); + int EltSize = N->getConstantOperandVal(1); + unsigned Opc1, Opc2, Opc3; + EVT VT; + + if (EltSize == 1) { + Opc1 = PPC::VSPLTISB; + Opc2 = PPC::VADDUBM; + Opc3 = PPC::VSUBUBM; + VT = MVT::v16i8; + } else if (EltSize == 2) { + Opc1 = PPC::VSPLTISH; + Opc2 = PPC::VADDUHM; + Opc3 = PPC::VSUBUHM; + VT = MVT::v8i16; + } else { + assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); + Opc1 = PPC::VSPLTISW; + Opc2 = PPC::VADDUWM; + Opc3 = PPC::VSUBUWM; + VT = MVT::v4i32; + } + + if ((Elt & 1) == 0) { + // Elt is even, in the range [-32,-18] + [16,30]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp = VSPLTIS[BHW] elt + // VADDU[BHW]M tmp, tmp + // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 + SDValue EltVal = getI32Imm(Elt >> 1); + SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + SDValue TmpVal = SDValue(Tmp, 0); + return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal); + + } else if (Elt > 0) { + // Elt is odd and positive, in the range [17,31]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp1 = VSPLTIS[BHW] elt-16 + // tmp2 = VSPLTIS[BHW] -16 + // VSUBU[BHW]M tmp1, tmp2 + SDValue EltVal = getI32Imm(Elt - 16); + SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + EltVal = getI32Imm(-16); + SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + + } else { + // Elt is odd and negative, in the range [-31,-17]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp1 = VSPLTIS[BHW] elt+16 + // tmp2 = VSPLTIS[BHW] -16 + // VADDU[BHW]M tmp1, tmp2 + SDValue EltVal = getI32Imm(Elt + 16); + SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + EltVal = getI32Imm(-16); + SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + } + } } return SelectCode(N); } +/// PostProcessISelDAG - Perform some late peephole optimizations +/// on the DAG representation. +void PPCDAGToDAGISel::PostprocessISelDAG() { + + // Skip peepholes at -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + + // These optimizations are currently supported only for 64-bit SVR4. + if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) + return; + + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = --Position; + // Skip dead nodes and any non-machine opcodes. + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + unsigned FirstOp; + unsigned StorageOpcode = N->getMachineOpcode(); + + switch (StorageOpcode) { + default: continue; + + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LD: + case PPC::LFD: + case PPC::LFS: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWA: + case PPC::LWZ: + case PPC::LWZ8: + FirstOp = 0; + break; + + case PPC::STB: + case PPC::STB8: + case PPC::STD: + case PPC::STFD: + case PPC::STFS: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + FirstOp = 1; + break; + } + + // If this is a load or store with a zero offset, we may be able to + // fold an add-immediate into the memory operation. + if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) || + N->getConstantOperandVal(FirstOp) != 0) + continue; + + SDValue Base = N->getOperand(FirstOp + 1); + if (!Base.isMachineOpcode()) + continue; + + unsigned Flags = 0; + bool ReplaceFlags = true; + + // When the feeding operation is an add-immediate of some sort, + // determine whether we need to add relocation information to the + // target flags on the immediate operand when we fold it into the + // load instruction. + // + // For something like ADDItocL, the relocation information is + // inferred from the opcode; when we process it in the AsmPrinter, + // we add the necessary relocation there. A load, though, can receive + // relocation from various flavors of ADDIxxx, so we need to carry + // the relocation information in the target flags. + switch (Base.getMachineOpcode()) { + default: continue; + + case PPC::ADDI8: + case PPC::ADDI8L: + case PPC::ADDIL: + // In some cases (such as TLS) the relocation information + // is already in place on the operand, so copying the operand + // is sufficient. + ReplaceFlags = false; + // For these cases, the immediate may not be divisible by 4, in + // which case the fold is illegal for DS-form instructions. (The + // other cases provide aligned addresses and are always safe.) + if ((StorageOpcode == PPC::LWA || + StorageOpcode == PPC::LD || + StorageOpcode == PPC::STD) && + (!isa<ConstantSDNode>(Base.getOperand(1)) || + Base.getConstantOperandVal(1) % 4 != 0)) + continue; + break; + case PPC::ADDIdtprelL: + Flags = PPCII::MO_DTPREL16_LO; + break; + case PPC::ADDItlsldL: + Flags = PPCII::MO_TLSLD16_LO; + break; + case PPC::ADDItocL: + Flags = PPCII::MO_TOC16_LO; + break; + } + + // We found an opportunity. Reverse the operands from the add + // immediate and substitute them into the load or store. If + // needed, update the target flags for the immediate operand to + // reflect the necessary relocation information. + DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + DEBUG(Base->dump(CurDAG)); + DEBUG(dbgs() << "\nN: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + SDValue ImmOpnd = Base.getOperand(1); + + // If the relocation information isn't already present on the + // immediate operand, add it now. + if (ReplaceFlags) { + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { + DebugLoc dl = GA->getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); + } else if (ConstantPoolSDNode *CP = + dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { + const Constant *C = CP->getConstVal(); + ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, + CP->getAlignment(), + 0, Flags); + } + } + + if (FirstOp == 1) // Store + (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, + Base.getOperand(0), N->getOperand(3)); + else // Load + (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), + N->getOperand(2)); + + // The add-immediate may now be dead, in which case remove it. + if (Base.getNode()->use_empty()) + CurDAG->RemoveDeadNode(Base.getNode()); + } +} /// createPPCISelDag - This pass converts a legalized DAG into a @@ -1330,3 +1559,14 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { return new PPCDAGToDAGISel(TM); } +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection"; + PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0, + false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce); +} + diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 9966b2c..cf1f459 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -36,20 +36,20 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, +static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); +static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); @@ -132,11 +132,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Legal); @@ -498,15 +500,15 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || Subtarget->getDarwinDirective() == PPC::DIR_E5500) { - maxStoresPerMemset = 32; - maxStoresPerMemsetOptSize = 16; - maxStoresPerMemcpy = 32; - maxStoresPerMemcpyOptSize = 8; - maxStoresPerMemmove = 32; - maxStoresPerMemmoveOptSize = 8; + MaxStoresPerMemset = 32; + MaxStoresPerMemsetOptSize = 16; + MaxStoresPerMemcpy = 32; + MaxStoresPerMemcpyOptSize = 8; + MaxStoresPerMemmove = 32; + MaxStoresPerMemmoveOptSize = 8; setPrefFunctionAlignment(4); - benefitFromCodePlacementOpt = true; + BenefitFromCodePlacementOpt = true; } } @@ -592,6 +594,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; + case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; } } @@ -1746,18 +1749,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { return true; } -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { static const uint16_t ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, @@ -1780,11 +1783,11 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { static const uint16_t ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 @@ -1907,7 +1910,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); - CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4); + CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1968,7 +1971,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); - CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal); + CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); @@ -2160,13 +2163,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { + unsigned CurArgIdx = 0; + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; + std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[ArgNo].OrigArgIndex; unsigned CurArgOffset = ArgOffset; @@ -2501,6 +2507,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; + // FIXME: FuncArg and Ins[ArgNo] must reference the same argument. + // When passing anonymous aggregates, this is currently not true. + // See LowerFormalArguments_64SVR4 for a fix. Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { SDValue ArgVal; @@ -3323,7 +3332,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in - // PPCRegisterInfo::eliminateCallFramePseudoInstr. + // PPCFrameLowering::eliminateCallFramePseudoInstr. int BytesCalleePops = (CallConv == CallingConv::Fast && getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; @@ -3339,17 +3348,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // Emit tail call. if (isTailCall) { - // If this is the first return lowered for this function, add the regs - // to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_PPC); - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - assert(((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || @@ -3493,11 +3491,11 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, bool Result; if (Outs[i].IsFixed) { - Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, - CCInfo); + Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, + CCInfo); } else { - Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo); + Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo); } if (Result) { @@ -3510,7 +3508,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, } } else { // All arguments are treated the same. - CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4); + CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); } // Assign locations to all of the outgoing aggregate by value arguments. @@ -3521,7 +3519,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); - CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal); + CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); // Size of the linkage area, parameter list area and the part of the local // space variable where copies of aggregates which are passed by value are @@ -4415,14 +4413,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain, getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_PPC); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -4447,12 +4439,17 @@ PPCTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - else - return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain); + RetOps.push_back(Flag); + + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, + &RetOps[0], RetOps.size()); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, @@ -5028,11 +5025,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // Two instruction sequences. // If this value is in the range [-32,30] and is even, use: - // tmp = VSPLTI[bhw], result = add tmp, tmp - if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { - SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); - Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) + // If this value is in the range [17,31] and is odd, use: + // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) + // If this value is in the range [-31,-17] and is odd, use: + // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) + // Note the last two are three-instruction sequences. + if (SextVal >= -32 && SextVal <= 31) { + // To avoid having these optimizations undone by constant folding, + // we convert to a pseudo that will be expanded later into one of + // the above forms. + SDValue Elt = DAG.getConstant(SextVal, MVT::i32); + EVT VT = Op.getValueType(); + int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); + SDValue EltSize = DAG.getConstant(Size, MVT::i32); + return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is @@ -5128,23 +5135,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } } - // Three instruction sequences. - - // Odd, in range [17,31]: (vsplti C)-(vsplti -16). - if (SextVal >= 0 && SextVal <= 31) { - SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); - SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); - LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); - } - // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). - if (SextVal >= -31 && SextVal <= 0) { - SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); - SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); - LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); - } - return SDValue(); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 12b3df7..f5d418c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -237,6 +237,12 @@ namespace llvm { /// sym@got@dtprel@l. ADDI_DTPREL_L, + /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded + /// during instruction selection to optimize a BUILD_VECTOR into + /// operations on splats. This is necessary to avoid losing these + /// optimizations due to constant folding. + VADD_SPLAT, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, @@ -252,13 +258,14 @@ namespace llvm { /// or i32. LBRX, - /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium code model, produces - /// an ADDIS8 instruction that adds the TOC base register to sym@toc@ha. + /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, + /// produces an ADDIS8 instruction that adds the TOC base register to + /// sym@toc@ha. ADDIS_TOC_HA, - /// G8RC = LD_TOC_L Symbol, G8RReg - For medium code model, produces a - /// LD instruction with base register G8RReg and offset sym@toc@l. - /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. + /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model, + /// produces a LD instruction with base register G8RReg and offset + /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. LD_TOC_L, /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 1dd5415..0120130 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -701,7 +701,7 @@ def : Pat<(PPCload ixaddr:$src), def : Pat<(PPCload xaddr:$src), (LDX xaddr:$src)>; -// Support for medium code model. +// Support for medium and large code model. def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), "#ADDIStocHA", [(set G8RC:$rD, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 8c077b7..460e943 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -181,7 +181,7 @@ def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx, [SDNPHasChain, SDNPMayStore]>; -// Instructions to support medium code model +// Instructions to support medium and large code model def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>; def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>; def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>; @@ -346,7 +346,7 @@ def crbitm: Operand<i8> { // Address operands def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIEncoding"; } def memrr : Operand<iPTR> { @@ -355,7 +355,7 @@ def memrr : Operand<iPTR> { } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; - let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIXEncoding"; } diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 851de17..cfcd749 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -115,7 +115,7 @@ asm( "lwz r2, 208(r1)\n" // stub's frame "lwz r4, 8(r2)\n" // stub's lr "li r5, 0\n" // 0 == 32 bit - "bl _PPCCompilationCallbackC\n" + "bl _LLVMPPCCompilationCallback\n" "mtctr r3\n" // Restore all int arg registers "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n" @@ -178,7 +178,7 @@ asm( "lwz 5, 104(1)\n" // stub's frame "lwz 4, 4(5)\n" // stub's lr "li 5, 0\n" // 0 == 32 bit - "bl PPCCompilationCallbackC\n" + "bl LLVMPPCCompilationCallback\n" "mtctr 3\n" // Restore all int arg registers "lwz 10, 100(1)\n" "lwz 9, 96(1)\n" @@ -259,10 +259,10 @@ asm( "ld 4, 16(5)\n" // stub's lr "li 5, 1\n" // 1 == 64 bit #ifdef __ELF__ - "bl PPCCompilationCallbackC\n" + "bl LLVMPPCCompilationCallback\n" "nop\n" #else - "bl _PPCCompilationCallbackC\n" + "bl _LLVMPPCCompilationCallback\n" #endif "mtctr 3\n" // Restore all int arg registers @@ -292,9 +292,10 @@ void PPC64CompilationCallback() { #endif extern "C" { -static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, - unsigned *OrigCallAddrPlus4, - bool is64Bit) { +LLVM_LIBRARY_VISIBILITY void * +LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub // emitted by emitFunctionStub, rather than the instruction after it. unsigned *StubCallAddr = StubCallAddrPlus4 - 1; diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 73f7a2c..9b0df3e 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -114,6 +115,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, break; case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; break; + case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO; + break; + case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO; + break; + case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO; + break; } // FIXME: This isn't right, but we don't have a good way to express this in diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 24caffa..045b375 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -71,6 +71,9 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// register for parameter passing. unsigned VarArgsNumFPR; + /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4. + int CRSpillFrameIndex; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -83,7 +86,8 @@ public: VarArgsFrameIndex(0), VarArgsStackOffset(0), VarArgsNumGPR(0), - VarArgsNumFPR(0) {} + VarArgsNumFPR(0), + CRSpillFrameIndex(0) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -125,6 +129,9 @@ public: unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } + + int getCRSpillFrameIndex() const { return CRSpillFrameIndex; } + void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; } }; } // end of namespace llvm diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 378c147..df245cc 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -71,7 +71,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, ST.isPPC64() ? 0 : 1, ST.isPPC64() ? 0 : 1), - Subtarget(ST), TII(tii), CRSpillFrameIdx(0) { + Subtarget(ST), TII(tii) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -111,11 +111,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : CSR_Darwin32_SaveList; - // For 32-bit SVR4, also initialize the frame index associated with - // the CR spill slot. - if (!Subtarget.isPPC64()) - CRSpillFrameIdx = 0; - return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; } @@ -222,45 +217,6 @@ PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -void PPCRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - if (MF.getTarget().Options.GuaranteedTailCallOpt && - I->getOpcode() == PPC::ADJCALLSTACKUP) { - // Add (actually subtract) back the amount the callee popped on return. - if (int CalleeAmt = I->getOperand(1).getImm()) { - bool is64Bit = Subtarget.isPPC64(); - CalleeAmt *= -1; - unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; - unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; - unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; - unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; - unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; - MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); - - if (isInt<16>(CalleeAmt)) { - BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) - .addReg(StackReg, RegState::Kill) - .addImm(CalleeAmt); - } else { - MachineBasicBlock::iterator MBBI = I; - BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) - .addImm(CalleeAmt >> 16); - BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) - .addReg(TmpReg, RegState::Kill) - .addImm(CalleeAmt & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) - .addReg(StackReg, RegState::Kill) - .addReg(TmpReg); - } - } - } - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); -} - /// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered /// register first and then a spilled callee-saved register if that fails. static @@ -489,19 +445,14 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 // ABI, return true to prevent allocating an additional frame slot. // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0 - // is arbitrary and will be subsequently ignored. For 32-bit, we must - // create exactly one stack slot and return its FrameIdx for all - // nonvolatiles. + // is arbitrary and will be subsequently ignored. For 32-bit, we have + // previously created the stack slot if needed, so return its FrameIdx. if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) { - if (Subtarget.isPPC64()) { + if (Subtarget.isPPC64()) FrameIdx = 0; - } else if (CRSpillFrameIdx) { - FrameIdx = CRSpillFrameIdx; - } else { - MachineFrameInfo *MFI = - (const_cast<MachineFunction &>(MF)).getFrameInfo(); - FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); - CRSpillFrameIdx = FrameIdx; + else { + const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + FrameIdx = FI->getCRSpillFrameIndex(); } return true; } @@ -510,7 +461,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); // Get the instruction. @@ -524,20 +476,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); - // Find out which operand is the frame index. - unsigned FIOperandNo = 0; - while (!MI.getOperand(FIOperandNo).isFI()) { - ++FIOperandNo; - assert(FIOperandNo != MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } // Take into account whether it's an add or mem instruction - unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2; + unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2; if (MI.isInlineAsm()) - OffsetOperandNo = FIOperandNo-1; + OffsetOperandNo = FIOperandNum-1; // Get the frame index. - int FrameIndex = MI.getOperand(FIOperandNo).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Get the frame pointer save index. Users of this index are primarily // DYNALLOC instructions. @@ -567,7 +512,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). bool is64Bit = Subtarget.isPPC64(); - MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ? + MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ? (is64Bit ? PPC::X31 : PPC::R31) : (is64Bit ? PPC::X1 : PPC::R1), false); @@ -649,7 +594,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, OperandBase = OffsetOperandNo; } - unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); + unsigned StackReg = MI.getOperand(FIOperandNum).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index a8fd796..9840666 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -30,7 +30,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { std::map<unsigned, unsigned> ImmToIdxMap; const PPCSubtarget &Subtarget; const TargetInstrInfo &TII; - mutable int CRSpillFrameIdx; public: PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); @@ -56,10 +55,6 @@ public: bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void lowerDynamicAlloc(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, @@ -69,7 +64,8 @@ public: bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 5ca3876..8ee9b1e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -63,142 +63,28 @@ class CRBIT<bits<5> num, string n> : PPCReg<n> { field bits<5> Num = num; } - // General-purpose registers -def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>; -def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>; -def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>; -def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>; -def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>; -def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>; -def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>; -def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>; -def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>; -def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>; -def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>; -def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>; -def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>; -def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>; -def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>; -def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>; -def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>; -def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>; -def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>; -def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>; -def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>; -def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>; -def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>; -def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>; -def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>; -def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>; -def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>; -def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>; -def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>; -def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>; -def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>; -def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>; +foreach Index = 0-31 in { + def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>; +} // 64-bit General-purpose registers -def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>; -def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>; -def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>; -def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>; -def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>; -def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>; -def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>; -def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>; -def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>; -def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>; -def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>; -def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>; -def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>; -def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>; -def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>; -def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>; -def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>; -def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>; -def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>; -def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>; -def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>; -def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>; -def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>; -def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>; -def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>; -def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>; -def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>; -def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>; -def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>; -def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>; -def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>; -def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>; +foreach Index = 0-31 in { + def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>, + DwarfRegNum<[Index, -2]>; +} // Floating-point registers -def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>; -def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>; -def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>; -def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>; -def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>; -def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>; -def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>; -def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>; -def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>; -def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>; -def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>; -def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>; -def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>; -def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>; -def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>; -def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>; -def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>; -def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>; -def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>; -def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>; -def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>; -def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>; -def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>; -def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>; -def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>; -def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>; -def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>; -def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>; -def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>; -def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>; -def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>; -def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>; +foreach Index = 0-31 in { + def F#Index : FPR<Index, "f"#Index>, + DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; +} // Vector registers -def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>; -def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>; -def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>; -def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>; -def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>; -def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>; -def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>; -def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>; -def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>; -def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>; -def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>; -def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>; -def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>; -def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>; -def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>; -def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>; -def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>; -def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>; -def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>; -def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>; -def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>; -def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>; -def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>; -def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>; -def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>; -def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>; -def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>; -def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>; -def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>; -def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>; -def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>; -def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>; +foreach Index = 0-31 in { + def V#Index : VR<Index, "v"#Index>, + DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; +} // Condition register bits def CR0LT : CRBIT< 0, "0">; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index d9b4e30..18e4c07 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -36,6 +36,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , Use64BitRegs(false) , IsPPC64(is64Bit) , HasAltivec(false) + , HasQPX(false) , HasFSQRT(false) , HasSTFIWX(false) , HasISEL(false) @@ -82,6 +83,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, // Set up darwin-specific properties. if (isDarwin()) HasLazyResolverStubs = true; + + // QPX requires a 32-byte aligned stack. Note that we need to do this if + // we're compiling for a BG/Q system regardless of whether or not QPX + // is enabled because external functions will assume this alignment. + if (hasQPX() || isBGQ()) + StackAlignment = 32; } /// SetJITMode - This is called to inform the subtarget info that we are diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 416c0f3..15885bd 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -43,7 +43,12 @@ namespace PPC { DIR_A2, DIR_E500mc, DIR_E5500, + DIR_PWR3, + DIR_PWR4, + DIR_PWR5, + DIR_PWR5X, DIR_PWR6, + DIR_PWR6X, DIR_PWR7, DIR_64 }; @@ -70,6 +75,7 @@ protected: bool Use64BitRegs; bool IsPPC64; bool HasAltivec; + bool HasQPX; bool HasFSQRT; bool HasSTFIWX; bool HasISEL; @@ -150,6 +156,7 @@ public: bool hasFSQRT() const { return HasFSQRT; } bool hasSTFIWX() const { return HasSTFIWX; } bool hasAltivec() const { return HasAltivec; } + bool hasQPX() const { return HasQPX; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool isBookE() const { return IsBookE; } @@ -160,6 +167,8 @@ public: bool isDarwin() const { return TargetTriple.isMacOSX(); } /// isBGP - True if this is a BG/P platform. bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; } + /// isBGQ - True if this is a BG/Q platform. + bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; } bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index b8b7882..fe851c1 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -126,3 +126,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, return false; } + +void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our PPC pass. This + // allows the PPC pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createPPCTargetTransformInfoPass(this)); +} + diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index d917d99..606ccb3 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -68,6 +68,9 @@ public: virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); + + /// \brief Register PPC analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); }; /// PPC32TargetMachine - PowerPC 32-bit target machine. diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp new file mode 100644 index 0000000..5e9ad34 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -0,0 +1,236 @@ +//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// PPC target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ppctti" +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializePPCTTIPass(PassRegistry &); +} + +namespace { + +class PPCTTI : public ImmutablePass, public TargetTransformInfo { + const PPCTargetMachine *TM; + const PPCSubtarget *ST; + const PPCTargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + PPCTTI(const PPCTargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { + initializePPCTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getRegisterBitWidth(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti", + "PPC Target Transform Info", true, true, false) +char PPCTTI::ID = 0; + +ImmutablePass * +llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { + return new PPCTTI(TM); +} + + +//===----------------------------------------------------------------------===// +// +// PPC cost model. +// +//===----------------------------------------------------------------------===// + +PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // FIXME: PPC currently does not have custom popcnt lowering even though + // there is hardware support. Once this is fixed, update this function + // to reflect the real capabilities of the hardware. + return PSK_Software; +} + +unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { + if (Vector && !ST->hasAltivec()) + return 0; + return 32; +} + +unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasAltivec()) return 128; + return 0; + } + + if (ST->isPPC64()) + return 64; + return 32; + +} + +unsigned PPCTTI::getMaximumUnrollFactor() const { + unsigned Directive = ST->getDarwinDirective(); + // The 440 has no SIMD support, but floating-point instructions + // have a 5-cycle latency, so unroll by 5x for latency hiding. + if (Directive == PPC::DIR_440) + return 5; + + // The A2 has no SIMD support, but floating-point instructions + // have a 6-cycle latency, so unroll by 6x for latency hiding. + if (Directive == PPC::DIR_A2) + return 6; + + // FIXME: For lack of any better information, do no harm... + if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) + return 1; + + // For most things, modern systems have two execution units (and + // out-of-order execution). + return 2; +} + +unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); + + // Fallback to the default implementation. + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); +} + +unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); +} + +unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Estimated cost of a load-hit-store delay. This was obtained + // experimentally as a minimum needed to prevent unprofitable + // vectorization for the paq8p benchmark. It may need to be + // raised further if other unprofitable cases remain. + unsigned LHSPenalty = 12; + + // Vector element insert/extract with Altivec is very expensive, + // because they require store and reload with the attendant + // processor stall for load-hit-store. Until VSX is available, + // these need to be estimated as very costly. + if (ISD == ISD::EXTRACT_VECTOR_ELT || + ISD == ISD::INSERT_VECTOR_ELT) + return LHSPenalty + + TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); +} + +unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + // Each load/store unit costs 1. + unsigned Cost = LT.first * 1; + + // PPC in general does not support unaligned loads and stores. They'll need + // to be decomposed based on the alignment factor. + unsigned SrcBytes = LT.second.getStoreSize(); + if (SrcBytes && Alignment && Alignment < SrcBytes) + Cost *= (SrcBytes/Alignment); + + return Cost; +} + diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 0f5125d..ba87918 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -23,17 +23,19 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); +FunctionPass *createR600LowerConstCopy(TargetMachine &tm); // SI Passes FunctionPass *createSIAnnotateControlFlowPass(); FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); -FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm); +FunctionPass *createSIInsertWaits(TargetMachine &tm); // Passes common to R600 and SI Pass *createAMDGPUStructurizeCFGPass(); FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); +FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm); } // End namespace llvm diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 754506c..c30dbe4 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -47,6 +47,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { #endif } SetupMachineFunction(MF); + if (OutStreamer.hasRawTextSupport()) { + OutStreamer.EmitRawText("@" + MF.getName() + ":"); + } OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { EmitProgramInfo(MF); @@ -88,8 +91,6 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { switch (reg) { default: break; case AMDGPU::EXEC: - case AMDGPU::SI_LITERAL_CONSTANT: - case AMDGPU::SREG_LIT_0: case AMDGPU::M0: continue; } @@ -115,10 +116,16 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { } else if (AMDGPU::SReg_256RegClass.contains(reg)) { isSGPR = true; width = 8; + } else if (AMDGPU::VReg_256RegClass.contains(reg)) { + isSGPR = false; + width = 8; + } else if (AMDGPU::VReg_512RegClass.contains(reg)) { + isSGPR = false; + width = 16; } else { assert(!"Unknown register class"); } - hwReg = RI->getEncodingValue(reg); + hwReg = RI->getEncodingValue(reg) & 0xff; maxUsed = hwReg + width - 1; if (isSGPR) { MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; diff --git a/lib/Target/R600/AMDGPUCodeEmitter.h b/lib/Target/R600/AMDGPUCodeEmitter.h deleted file mode 100644 index 84f3588..0000000 --- a/lib/Target/R600/AMDGPUCodeEmitter.h +++ /dev/null @@ -1,49 +0,0 @@ -//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief CodeEmitter interface for R600 and SI codegen. -// -//===----------------------------------------------------------------------===// - -#ifndef AMDGPUCODEEMITTER_H -#define AMDGPUCODEEMITTER_H - -namespace llvm { - -class AMDGPUCodeEmitter { -public: - uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; - virtual uint64_t getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const { return 0; } - virtual unsigned GPR4AlignEncode(const MachineInstr &MI, - unsigned OpNo) const { - return 0; - } - virtual unsigned GPR2AlignEncode(const MachineInstr &MI, - unsigned OpNo) const { - return 0; - } - virtual uint64_t VOPPostEncode(const MachineInstr &MI, - uint64_t Value) const { - return Value; - } - virtual uint64_t i32LiteralEncode(const MachineInstr &MI, - unsigned OpNo) const { - return 0; - } - virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo) - const { - return 0; - } -}; - -} // End namespace llvm - -#endif // AMDGPUCODEEMITTER_H diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp new file mode 100644 index 0000000..815d6f7 --- /dev/null +++ b/lib/Target/R600/AMDGPUFrameLowering.cpp @@ -0,0 +1,122 @@ +//===----------------------- AMDGPUFrameLowering.cpp ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface to describe a layout of a stack frame on a AMDIL target machine +// +//===----------------------------------------------------------------------===// +#include "AMDGPUFrameLowering.h" +#include "AMDGPURegisterInfo.h" +#include "R600MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; +AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, + int LAO, unsigned TransAl) + : TargetFrameLowering(D, StackAl, LAO, TransAl) { } + +AMDGPUFrameLowering::~AMDGPUFrameLowering() { } + +unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const { + + // XXX: Hardcoding to 1 for now. + // + // I think the StackWidth should stored as metadata associated with the + // MachineFunction. This metadata can either be added by a frontend, or + // calculated by a R600 specific LLVM IR pass. + // + // The StackWidth determines how stack objects are laid out in memory. + // For a vector stack variable, like: int4 stack[2], the data will be stored + // in the following ways depending on the StackWidth. + // + // StackWidth = 1: + // + // T0.X = stack[0].x + // T1.X = stack[0].y + // T2.X = stack[0].z + // T3.X = stack[0].w + // T4.X = stack[1].x + // T5.X = stack[1].y + // T6.X = stack[1].z + // T7.X = stack[1].w + // + // StackWidth = 2: + // + // T0.X = stack[0].x + // T0.Y = stack[0].y + // T1.X = stack[0].z + // T1.Y = stack[0].w + // T2.X = stack[1].x + // T2.Y = stack[1].y + // T3.X = stack[1].z + // T3.Y = stack[1].w + // + // StackWidth = 4: + // T0.X = stack[0].x + // T0.Y = stack[0].y + // T0.Z = stack[0].z + // T0.W = stack[0].w + // T1.X = stack[1].x + // T1.Y = stack[1].y + // T1.Z = stack[1].z + // T1.W = stack[1].w + return 1; +} + +/// \returns The number of registers allocated for \p FI. +int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Offset = 0; + int UpperBound = FI == -1 ? MFI->getNumObjects() : FI; + + for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) { + const AllocaInst *Alloca = MFI->getObjectAllocation(i); + unsigned ArrayElements; + const Type *AllocaType = Alloca->getAllocatedType(); + const Type *ElementType; + + if (AllocaType->isArrayTy()) { + ArrayElements = AllocaType->getArrayNumElements(); + ElementType = AllocaType->getArrayElementType(); + } else { + ArrayElements = 1; + ElementType = AllocaType; + } + + unsigned VectorElements; + if (ElementType->isVectorTy()) { + VectorElements = ElementType->getVectorNumElements(); + } else { + VectorElements = 1; + } + + Offset += (VectorElements / getStackWidth(MF)) * ArrayElements; + } + return Offset; +} + +const TargetFrameLowering::SpillSlot * +AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { + NumEntries = 0; + return 0; +} +void +AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const { +} +void +AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { +} + +bool +AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const { + return false; +} diff --git a/lib/Target/R600/AMDILFrameLowering.h b/lib/Target/R600/AMDGPUFrameLowering.h index 51337c3..cf5742e 100644 --- a/lib/Target/R600/AMDILFrameLowering.h +++ b/lib/Target/R600/AMDGPUFrameLowering.h @@ -1,4 +1,4 @@ -//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===// +//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -30,6 +30,10 @@ public: AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned TransAl = 1); virtual ~AMDGPUFrameLowering(); + + /// \returns The number of 32-bit sub-registers that are used when storing + /// values to the stack. + virtual unsigned getStackWidth(const MachineFunction &MF) const; virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const; virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const; virtual void emitPrologue(MachineFunction &MF) const; diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 473dac4..0a33264 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -127,9 +127,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return LowerIntrinsicLRP(Op, DAG); case AMDGPUIntrinsic::AMDIL_fraction: return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); - case AMDGPUIntrinsic::AMDIL_mad: - return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3)); case AMDGPUIntrinsic::AMDIL_max: return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), Op.getOperand(2)); @@ -176,9 +173,9 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, Op.getOperand(1)); SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, Op.getOperand(3)); - return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), - Op.getOperand(2), - OneSubAC); + return DAG.getNode(ISD::FADD, DL, VT, + DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), + OneSubAC); } /// \brief Generate Min/Max node @@ -393,7 +390,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; // AMDIL DAG nodes - NODE_NAME_CASE(MAD); NODE_NAME_CASE(CALL); NODE_NAME_CASE(UMUL); NODE_NAME_CASE(DIV_INF); @@ -410,8 +406,9 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SMIN) NODE_NAME_CASE(UMIN) NODE_NAME_CASE(URECIP) - NODE_NAME_CASE(INTERP) - NODE_NAME_CASE(INTERP_P0) NODE_NAME_CASE(EXPORT) + NODE_NAME_CASE(CONST_ADDRESS) + NODE_NAME_CASE(REGISTER_LOAD) + NODE_NAME_CASE(REGISTER_STORE) } } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index c7abaf6..9e7d997 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -53,6 +53,11 @@ public: const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, DebugLoc DL, SelectionDAG &DAG) const; + virtual SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + CLI.Callee.dump(); + llvm_unreachable("Undefined function"); + } virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; @@ -60,6 +65,10 @@ public: SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; virtual const char* getTargetNodeName(unsigned Opcode) const; + virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const { + return N; + } + // Functions defined in AMDILISelLowering.cpp public: @@ -103,7 +112,6 @@ namespace AMDGPUISD { enum { // AMDIL ISD Opcodes FIRST_NUMBER = ISD::BUILTIN_OP_END, - MAD, // 32bit Fused Multiply Add instruction CALL, // Function call based on a single integer UMUL, // 32bit unsigned multiplication DIV_INF, // Divide with infinity returned on zero divisor @@ -120,25 +128,16 @@ enum { SMIN, UMIN, URECIP, - INTERP, - INTERP_P0, EXPORT, + CONST_ADDRESS, + REGISTER_LOAD, + REGISTER_STORE, LAST_AMDGPU_ISD_NUMBER }; } // End namespace AMDGPUISD -namespace SIISD { - -enum { - SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER, - VCC_AND, - VCC_BITCAST -}; - -} // End namespace SIISD - } // End namespace llvm #endif // AMDGPUISELLOWERING_H diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp new file mode 100644 index 0000000..15840b3 --- /dev/null +++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp @@ -0,0 +1,344 @@ +//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// Instructions can use indirect addressing to index the register file as if it +/// were memory. This pass lowers RegisterLoad and RegisterStore instructions +/// to either a COPY or a MOV that uses indirect addressing. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +namespace { + +class AMDGPUIndirectAddressingPass : public MachineFunctionPass { + +private: + static char ID; + const AMDGPUInstrInfo *TII; + + bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const; + +public: + AMDGPUIndirectAddressingPass(TargetMachine &tm) : + MachineFunctionPass(ID), + TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo())) + { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "R600 Handle indirect addressing"; } + +}; + +} // End anonymous namespace + +char AMDGPUIndirectAddressingPass::ID = 0; + +FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) { + return new AMDGPUIndirectAddressingPass(tm); +} + +bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + int IndirectBegin = TII->getIndirectIndexBegin(MF); + int IndirectEnd = TII->getIndirectIndexEnd(MF); + + if (IndirectBegin == -1) { + // No indirect addressing, we can skip this pass + assert(IndirectEnd == -1); + return false; + } + + // The map keeps track of the indirect address that is represented by + // each virtual register. The key is the register and the value is the + // indirect address it uses. + std::map<unsigned, unsigned> RegisterAddressMap; + + // First pass - Lower all of the RegisterStore instructions and track which + // registers are live. + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + // This map keeps track of the current live indirect registers. + // The key is the address and the value is the register + std::map<unsigned, unsigned> LiveAddressRegisterMap; + MachineBasicBlock &MBB = *BB; + + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next) { + Next = llvm::next(I); + MachineInstr &MI = *I; + + if (!TII->isRegisterStore(MI)) { + continue; + } + + // Lower RegisterStore + + unsigned RegIndex = MI.getOperand(2).getImm(); + unsigned Channel = MI.getOperand(3).getImm(); + unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel); + const TargetRegisterClass *IndirectStoreRegClass = + TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg()); + + if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) { + // Direct register access. + unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass); + + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg) + .addOperand(MI.getOperand(0)); + + RegisterAddressMap[DstReg] = Address; + LiveAddressRegisterMap[Address] = DstReg; + } else { + // Indirect register access. + MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I, + MI.getOperand(0).getReg(), // Value + Address, + MI.getOperand(1).getReg()); // Offset + for (int i = IndirectBegin; i <= IndirectEnd; ++i) { + unsigned Addr = TII->calculateIndirectAddress(i, Channel); + unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass); + MOV.addReg(DstReg, RegState::Define | RegState::Implicit); + RegisterAddressMap[DstReg] = Addr; + LiveAddressRegisterMap[Addr] = DstReg; + } + } + MI.eraseFromParent(); + } + + // Update the live-ins of the succesor blocks + for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(), + SuccEnd = MBB.succ_end(); + SuccEnd != Succ; ++Succ) { + std::map<unsigned, unsigned>::const_iterator Key, KeyEnd; + for (Key = LiveAddressRegisterMap.begin(), + KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) { + (*Succ)->addLiveIn(Key->second); + } + } + } + + // Second pass - Lower the RegisterLoad instructions + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + // Key is the address and the value is the register + std::map<unsigned, unsigned> LiveAddressRegisterMap; + MachineBasicBlock &MBB = *BB; + + MachineBasicBlock::livein_iterator LI = MBB.livein_begin(); + while (LI != MBB.livein_end()) { + std::vector<unsigned> PhiRegisters; + + // Make sure this live in is used for indirect addressing + if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) { + ++LI; + continue; + } + + unsigned Address = RegisterAddressMap[*LI]; + LiveAddressRegisterMap[Address] = *LI; + PhiRegisters.push_back(*LI); + + // Check if there are other live in registers which map to the same + // indirect address. + for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI), + LE = MBB.livein_end(); + LJ != LE; ++LJ) { + unsigned Reg = *LJ; + if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) { + continue; + } + + if (RegisterAddressMap[Reg] == Address) { + PhiRegisters.push_back(Reg); + } + } + + if (PhiRegisters.size() == 1) { + // We don't need to insert a Phi instruction, so we can just add the + // registers to the live list for the block. + LiveAddressRegisterMap[Address] = *LI; + MBB.removeLiveIn(*LI); + } else { + // We need to insert a PHI, because we have the same address being + // written in multiple predecessor blocks. + const TargetRegisterClass *PhiDstClass = + TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin())); + unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass); + MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(), + MBB.findDebugLoc(MBB.begin()), + TII->get(AMDGPU::PHI), PhiDstReg); + + for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(), + RE = PhiRegisters.end(); + RI != RE; ++RI) { + unsigned Reg = *RI; + MachineInstr *DefInst = MRI.getVRegDef(Reg); + assert(DefInst); + MachineBasicBlock *RegBlock = DefInst->getParent(); + Phi.addReg(Reg); + Phi.addMBB(RegBlock); + MBB.removeLiveIn(Reg); + } + RegisterAddressMap[PhiDstReg] = Address; + LiveAddressRegisterMap[Address] = PhiDstReg; + } + LI = MBB.livein_begin(); + } + + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next) { + Next = llvm::next(I); + MachineInstr &MI = *I; + + if (!TII->isRegisterLoad(MI)) { + if (MI.getOpcode() == AMDGPU::PHI) { + continue; + } + // Check for indirect register defs + for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands(); + OpIdx < NumOperands; ++OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + if (MO.isReg() && MO.isDef() && + RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) { + unsigned Reg = MO.getReg(); + unsigned LiveAddress = RegisterAddressMap[Reg]; + // Chain the live-ins + if (LiveAddressRegisterMap.find(LiveAddress) != + RegisterAddressMap.end()) { + MI.addOperand(MachineOperand::CreateReg( + LiveAddressRegisterMap[LiveAddress], + false, // isDef + true, // isImp + true)); // isKill + } + LiveAddressRegisterMap[LiveAddress] = Reg; + } + } + continue; + } + + const TargetRegisterClass *SuperIndirectRegClass = + TII->getSuperIndirectRegClass(); + const TargetRegisterClass *IndirectLoadRegClass = + TII->getIndirectAddrLoadRegClass(); + unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass); + + unsigned RegIndex = MI.getOperand(2).getImm(); + unsigned Channel = MI.getOperand(3).getImm(); + unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel); + + if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) { + // Direct register access + unsigned Reg = LiveAddressRegisterMap[Address]; + unsigned AddrReg = IndirectLoadRegClass->getRegister(Address); + + if (regHasExplicitDef(MRI, Reg)) { + // If the register we are reading from has an explicit def, then that + // means it was written via a direct register access (i.e. COPY + // or other instruction that doesn't use indirect addressing). In + // this case we know where the value has been stored, so we can just + // issue a copy. + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), + MI.getOperand(0).getReg()) + .addReg(Reg); + } else { + // If the register we are reading has an implicit def, then that + // means it was written by an indirect register access (i.e. An + // instruction that uses indirect addressing. + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), + MI.getOperand(0).getReg()) + .addReg(AddrReg) + .addReg(Reg, RegState::Implicit); + } + } else { + // Indirect register access + + // Note on REQ_SEQUENCE instructons: You can't actually use the register + // it defines unless you have an instruction that takes the defined + // register class as an operand. + + MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I), + TII->get(AMDGPU::REG_SEQUENCE), + IndirectReg); + for (int i = IndirectBegin; i <= IndirectEnd; ++i) { + unsigned Addr = TII->calculateIndirectAddress(i, Channel); + if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) { + continue; + } + unsigned Reg = LiveAddressRegisterMap[Addr]; + + // We only need to use REG_SEQUENCE for explicit defs, since the + // register coalescer won't do anything with the implicit defs. + MachineInstr *DefInstr = MRI.getVRegDef(Reg); + if (!regHasExplicitDef(MRI, Reg)) { + continue; + } + + // Insert a REQ_SEQUENCE instruction to force the register allocator + // to allocate the virtual register to the correct physical register. + Sequence.addReg(LiveAddressRegisterMap[Addr]); + Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr)); + } + MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I, + MI.getOperand(0).getReg(), // Value + Address, + MI.getOperand(1).getReg()); // Offset + + + + Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill); + Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit); + + } + MI.eraseFromParent(); + } + } + return false; +} + +bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI, + unsigned Reg) const { + MachineInstr *DefInstr = MRI.getVRegDef(Reg); + + if (!DefInstr) { + return false; + } + + if (DefInstr->getOpcode() == AMDGPU::PHI) { + bool Explicit = false; + for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(), + E = DefInstr->operands_end(); + I != E; ++I) { + const MachineOperand &MO = *I; + if (!MO.isReg() || MO.isDef()) { + continue; + } + + Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg()); + } + return Explicit; + } + + return DefInstr->getOperand(0).isReg() && + DefInstr->getOperand(0).getReg() == Reg; +} diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index e42a46d..30f736c 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #define GET_INSTRINFO_CTOR +#define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" using namespace llvm; @@ -234,7 +235,16 @@ AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { // TODO: Implement this function return true; } - + +bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const { + return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE; +} + +bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const { + return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD; +} + + void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const { MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index cb97af9..3909e4e 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -40,9 +40,10 @@ class MachineInstrBuilder; class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { private: const AMDGPURegisterInfo RI; - TargetMachine &TM; bool getNextBranchInstr(MachineBasicBlock::iterator &iter, MachineBasicBlock &MBB) const; +protected: + TargetMachine &TM; public: explicit AMDGPUInstrInfo(TargetMachine &tm); @@ -130,12 +131,66 @@ public: bool isAExtLoadInst(llvm::MachineInstr *MI) const; bool isStoreInst(llvm::MachineInstr *MI) const; bool isTruncStoreInst(llvm::MachineInstr *MI) const; + bool isRegisterStore(const MachineInstr &MI) const; + bool isRegisterLoad(const MachineInstr &MI) const; + +//===---------------------------------------------------------------------===// +// Pure virtual funtions to be implemented by sub-classes. +//===---------------------------------------------------------------------===// virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg, int64_t Imm) const = 0; virtual unsigned getIEQOpcode() const = 0; virtual bool isMov(unsigned opcode) const = 0; + /// \returns the smallest register index that will be accessed by an indirect + /// read or write or -1 if indirect addressing is not used by this program. + virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0; + + /// \returns the largest register index that will be accessed by an indirect + /// read or write or -1 if indirect addressing is not used by this program. + virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0; + + /// \brief Calculate the "Indirect Address" for the given \p RegIndex and + /// \p Channel + /// + /// We model indirect addressing using a virtual address space that can be + /// accesed with loads and stores. The "Indirect Address" is the memory + /// address in this virtual address space that maps to the given \p RegIndex + /// and \p Channel. + virtual unsigned calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const = 0; + + /// \returns The register class to be used for storing values to an + /// "Indirect Address" . + virtual const TargetRegisterClass *getIndirectAddrStoreRegClass( + unsigned SourceReg) const = 0; + + /// \returns The register class to be used for loading values from + /// an "Indirect Address" . + virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0; + + /// \brief Build instruction(s) for an indirect register write. + /// + /// \returns The instruction that performs the indirect register write + virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const = 0; + + /// \brief Build instruction(s) for an indirect register read. + /// + /// \returns The instruction that performs the indirect register read + virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const = 0; + + /// \returns the register class whose sub registers are the set of all + /// possible registers that can be used for indirect addressing. + virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0; + + /// \brief Convert the AMDIL MachineInstr to a supported ISA /// MachineInstr virtual void convertToISA(MachineInstr & MI, MachineFunction &MF, @@ -145,4 +200,7 @@ public: } // End llvm namespace +#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63) +#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62) + #endif // AMDGPUINSTRINFO_H diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 96368e8..b66ae87 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -72,3 +72,11 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>; + +def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD", + SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, + [SDNPHasChain, SDNPMayLoad]>; + +def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", + SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, + [SDNPHasChain, SDNPMayStore]>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index e634d20..960f108 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction { - field bits<16> AMDILOp = 0; - field bits<3> Gen = 0; + field bit isRegisterLoad = 0; + field bit isRegisterStore = 0; let Namespace = "AMDGPU"; let OutOperandList = outs; @@ -22,8 +22,9 @@ class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instructio let AsmString = asm; let Pattern = pattern; let Itinerary = NullALU; - let TSFlags{42-40} = Gen; - let TSFlags{63-48} = AMDILOp; + + let TSFlags{63} = isRegisterLoad; + let TSFlags{62} = isRegisterStore; } class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> @@ -76,6 +77,11 @@ def COND_LE : PatLeaf < case ISD::SETLE: return true;}}}] >; +def COND_NULL : PatLeaf < + (cond), + [{return false;}] +>; + //===----------------------------------------------------------------------===// // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// @@ -101,7 +107,9 @@ def FP_ONE : PatLeaf < [{return N->isExactlyValue(1.0);}] >; -let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in { +let isCodeGenOnly = 1, isPseudo = 1 in { + +let usesCustomInserter = 1 in { class CLAMP <RegisterClass rc> : AMDGPUShaderInst < (outs rc:$dst), @@ -131,7 +139,31 @@ def SHADER_TYPE : AMDGPUShaderInst < [(int_AMDGPU_shader_type imm:$type)] >; -} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1 +} // usesCustomInserter = 1 + +multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, + ComplexPattern addrPat> { + def RegisterLoad : AMDGPUShaderInst < + (outs dstClass:$dst), + (ins addrClass:$addr, i32imm:$chan), + "RegisterLoad $dst, $addr", + [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr, + (i32 timm:$chan)))] + > { + let isRegisterLoad = 1; + } + + def RegisterStore : AMDGPUShaderInst < + (outs), + (ins dstClass:$val, addrClass:$addr, i32imm:$chan), + "RegisterStore $val, $addr", + [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))] + > { + let isRegisterStore = 1; + } +} + +} // End isCodeGenOnly = 1, isPseudo = 1 /* Generic helper patterns for intrinsics */ /* -------------------------------------- */ @@ -164,13 +196,64 @@ class Insert_Element <ValueType elem_type, ValueType vec_type, >; // Vector Build pattern +class Vector1_Build <ValueType vecType, RegisterClass vectorClass, + ValueType elemType, RegisterClass elemClass> : Pat < + (vecType (build_vector (elemType elemClass:$src))), + (vecType elemClass:$src) +>; + +class Vector2_Build <ValueType vecType, RegisterClass vectorClass, + ValueType elemType, RegisterClass elemClass> : Pat < + (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))), + (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1) +>; + class Vector_Build <ValueType vecType, RegisterClass vectorClass, ValueType elemType, RegisterClass elemClass> : Pat < (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), (elemType elemClass:$z), (elemType elemClass:$w))), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y), - elemClass:$z, sel_z), elemClass:$w, sel_w) + (vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1), + elemClass:$z, sub2), elemClass:$w, sub3) +>; + +class Vector8_Build <ValueType vecType, RegisterClass vectorClass, + ValueType elemType, RegisterClass elemClass> : Pat < + (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1), + (elemType elemClass:$sub2), (elemType elemClass:$sub3), + (elemType elemClass:$sub4), (elemType elemClass:$sub5), + (elemType elemClass:$sub6), (elemType elemClass:$sub7))), + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1), + elemClass:$sub2, sub2), elemClass:$sub3, sub3), + elemClass:$sub4, sub4), elemClass:$sub5, sub5), + elemClass:$sub6, sub6), elemClass:$sub7, sub7) +>; + +class Vector16_Build <ValueType vecType, RegisterClass vectorClass, + ValueType elemType, RegisterClass elemClass> : Pat < + (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1), + (elemType elemClass:$sub2), (elemType elemClass:$sub3), + (elemType elemClass:$sub4), (elemType elemClass:$sub5), + (elemType elemClass:$sub6), (elemType elemClass:$sub7), + (elemType elemClass:$sub8), (elemType elemClass:$sub9), + (elemType elemClass:$sub10), (elemType elemClass:$sub11), + (elemType elemClass:$sub12), (elemType elemClass:$sub13), + (elemType elemClass:$sub14), (elemType elemClass:$sub15))), + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1), + elemClass:$sub2, sub2), elemClass:$sub3, sub3), + elemClass:$sub4, sub4), elemClass:$sub5, sub5), + elemClass:$sub6, sub6), elemClass:$sub7, sub7), + elemClass:$sub8, sub8), elemClass:$sub9, sub9), + elemClass:$sub10, sub10), elemClass:$sub11, sub11), + elemClass:$sub12, sub12), elemClass:$sub13, sub13), + elemClass:$sub14, sub14), elemClass:$sub15, sub15) >; // bitconvert pattern diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp index eeafec8..fe994d2 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.cpp +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp @@ -38,6 +38,7 @@ const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, RegScavenger *RS) const { assert(!"Subroutines not supported yet"); } @@ -47,5 +48,28 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { return 0; } +unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const { + + switch(IndirectIndex) { + case 0: return AMDGPU::sub0; + case 1: return AMDGPU::sub1; + case 2: return AMDGPU::sub2; + case 3: return AMDGPU::sub3; + case 4: return AMDGPU::sub4; + case 5: return AMDGPU::sub5; + case 6: return AMDGPU::sub6; + case 7: return AMDGPU::sub7; + case 8: return AMDGPU::sub8; + case 9: return AMDGPU::sub9; + case 10: return AMDGPU::sub10; + case 11: return AMDGPU::sub11; + case 12: return AMDGPU::sub12; + case 13: return AMDGPU::sub13; + case 14: return AMDGPU::sub14; + case 15: return AMDGPU::sub15; + default: llvm_unreachable("indirect index out of range"); + } +} + #define GET_REGINFO_TARGET_DESC #include "AMDGPUGenRegisterInfo.inc" diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h index 76ee7ae..1fc88e7 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.h +++ b/lib/Target/R600/AMDGPURegisterInfo.h @@ -53,9 +53,12 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, RegScavenger *RS) const; unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getIndirectSubReg(unsigned IndirectIndex) const; + }; } // End namespace llvm diff --git a/lib/Target/R600/AMDGPURegisterInfo.td b/lib/Target/R600/AMDGPURegisterInfo.td index 8181e02..b5aca03 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.td +++ b/lib/Target/R600/AMDGPURegisterInfo.td @@ -12,10 +12,13 @@ //===----------------------------------------------------------------------===// let Namespace = "AMDGPU" in { - def sel_x : SubRegIndex; - def sel_y : SubRegIndex; - def sel_z : SubRegIndex; - def sel_w : SubRegIndex; + +foreach Index = 0-15 in { + def sub#Index : SubRegIndex; +} + +def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">; + } include "R600RegisterInfo.td" diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index 8295efd..26f842e 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -22,30 +22,101 @@ #include "llvm/Analysis/RegionPass.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; +using namespace llvm::PatternMatch; namespace { // Definition of the complex types used in this pass. typedef std::pair<BasicBlock *, Value *> BBValuePair; -typedef ArrayRef<BasicBlock*> BBVecRef; typedef SmallVector<RegionNode*, 8> RNVector; typedef SmallVector<BasicBlock*, 8> BBVector; +typedef SmallVector<BranchInst*, 8> BranchVector; typedef SmallVector<BBValuePair, 2> BBValueVector; +typedef SmallPtrSet<BasicBlock *, 8> BBSet; + typedef DenseMap<PHINode *, BBValueVector> PhiMap; +typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap; typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap; typedef DenseMap<BasicBlock *, Value *> BBPredicates; typedef DenseMap<BasicBlock *, BBPredicates> PredMap; -typedef DenseMap<BasicBlock *, unsigned> VisitedMap; +typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap; +typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap; // The name for newly created blocks. static const char *FlowBlockName = "Flow"; +/// @brief Find the nearest common dominator for multiple BasicBlocks +/// +/// Helper class for AMDGPUStructurizeCFG +/// TODO: Maybe move into common code +class NearestCommonDominator { + + DominatorTree *DT; + + DTN2UnsignedMap IndexMap; + + BasicBlock *Result; + unsigned ResultIndex; + bool ExplicitMentioned; + +public: + /// \brief Start a new query + NearestCommonDominator(DominatorTree *DomTree) { + DT = DomTree; + Result = 0; + } + + /// \brief Add BB to the resulting dominator + void addBlock(BasicBlock *BB, bool Remember = true) { + + DomTreeNode *Node = DT->getNode(BB); + + if (Result == 0) { + unsigned Numbering = 0; + for (;Node;Node = Node->getIDom()) + IndexMap[Node] = ++Numbering; + Result = BB; + ResultIndex = 1; + ExplicitMentioned = Remember; + return; + } + + for (;Node;Node = Node->getIDom()) + if (IndexMap.count(Node)) + break; + else + IndexMap[Node] = 0; + + assert(Node && "Dominator tree invalid!"); + + unsigned Numbering = IndexMap[Node]; + if (Numbering > ResultIndex) { + Result = Node->getBlock(); + ResultIndex = Numbering; + ExplicitMentioned = Remember && (Result == BB); + } else if (Numbering == ResultIndex) { + ExplicitMentioned |= Remember; + } + } + + /// \brief Is "Result" one of the BBs added with "Remember" = True? + bool wasResultExplicitMentioned() { + return ExplicitMentioned; + } + + /// \brief Get the query result + BasicBlock *getResult() { + return Result; + } +}; + /// @brief Transforms the control flow graph on one single entry/exit region /// at a time. /// @@ -106,45 +177,62 @@ class AMDGPUStructurizeCFG : public RegionPass { DominatorTree *DT; RNVector Order; - VisitedMap Visited; - PredMap Predicates; + BBSet Visited; + BBPhiMap DeletedPhis; - BBVector FlowsInserted; + BB2BBVecMap AddedPhis; + + PredMap Predicates; + BranchVector Conditions; - BasicBlock *LoopStart; - BasicBlock *LoopEnd; - BBPredicates LoopPred; + BB2BBMap Loops; + PredMap LoopPreds; + BranchVector LoopConds; + + RegionNode *PrevNode; void orderNodes(); - void buildPredicate(BranchInst *Term, unsigned Idx, - BBPredicates &Pred, bool Invert); + void analyzeLoops(RegionNode *N); + + Value *invert(Value *Condition); - void analyzeBlock(BasicBlock *BB); + Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); - void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx); + void gatherPredicates(RegionNode *N); void collectInfos(); - bool dominatesPredicates(BasicBlock *A, BasicBlock *B); + void insertConditions(bool Loops); + + void delPhiValues(BasicBlock *From, BasicBlock *To); + + void addPhiValues(BasicBlock *From, BasicBlock *To); + + void setPhiValues(); void killTerminator(BasicBlock *BB); - RegionNode *skipChained(RegionNode *Node); + void changeExit(RegionNode *Node, BasicBlock *NewExit, + bool IncludeDominator); - void delPhiValues(BasicBlock *From, BasicBlock *To); + BasicBlock *getNextFlow(BasicBlock *Dominator); - void addPhiValues(BasicBlock *From, BasicBlock *To); + BasicBlock *needPrefix(bool NeedEmpty); - BasicBlock *getNextFlow(BasicBlock *Prev); + BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); - bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node); + void setPrevNode(BasicBlock *BB); - BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node); + bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); - void createFlow(); + bool isPredictableTrue(RegionNode *Node); + + void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); - void insertConditions(); + void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); + + void createFlow(); void rebuildSSA(); @@ -198,212 +286,214 @@ void AMDGPUStructurizeCFG::orderNodes() { } } -/// \brief Build blocks and loop predicates -void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx, - BBPredicates &Pred, bool Invert) { - Value *True = Invert ? BoolFalse : BoolTrue; - Value *False = Invert ? BoolTrue : BoolFalse; +/// \brief Determine the end of the loops +void AMDGPUStructurizeCFG::analyzeLoops(RegionNode *N) { - RegionInfo *RI = ParentRegion->getRegionInfo(); - BasicBlock *BB = Term->getParent(); + if (N->isSubRegion()) { + // Test for exit as back edge + BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); + if (Visited.count(Exit)) + Loops[Exit] = N->getEntry(); + + } else { + // Test for sucessors as back edge + BasicBlock *BB = N->getNodeAs<BasicBlock>(); + BranchInst *Term = cast<BranchInst>(BB->getTerminator()); + + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = Term->getSuccessor(i); - // Handle the case where multiple regions start at the same block - Region *R = BB != ParentRegion->getEntry() ? - RI->getRegionFor(BB) : ParentRegion; + if (Visited.count(Succ)) + Loops[Succ] = BB; + } + } +} - if (R == ParentRegion) { - // It's a top level block in our region - Value *Cond = True; - if (Term->isConditional()) { - BasicBlock *Other = Term->getSuccessor(!Idx); +/// \brief Invert the given condition +Value *AMDGPUStructurizeCFG::invert(Value *Condition) { - if (Visited.count(Other)) { - if (!Pred.count(Other)) - Pred[Other] = False; + // First: Check if it's a constant + if (Condition == BoolTrue) + return BoolFalse; - if (!Pred.count(BB)) - Pred[BB] = True; - return; - } - Cond = Term->getCondition(); + if (Condition == BoolFalse) + return BoolTrue; - if (Idx != Invert) - Cond = BinaryOperator::CreateNot(Cond, "", Term); - } + if (Condition == BoolUndef) + return BoolUndef; - Pred[BB] = Cond; + // Second: If the condition is already inverted, return the original value + if (match(Condition, m_Not(m_Value(Condition)))) + return Condition; - } else if (ParentRegion->contains(R)) { - // It's a block in a sub region - while(R->getParent() != ParentRegion) - R = R->getParent(); + // Third: Check all the users for an invert + BasicBlock *Parent = cast<Instruction>(Condition)->getParent(); + for (Value::use_iterator I = Condition->use_begin(), + E = Condition->use_end(); I != E; ++I) { - Pred[R->getEntry()] = True; + Instruction *User = dyn_cast<Instruction>(*I); + if (!User || User->getParent() != Parent) + continue; - } else { - // It's a branch from outside into our parent region - Pred[BB] = True; + if (match(*I, m_Not(m_Specific(Condition)))) + return *I; } -} -/// \brief Analyze the successors of each block and build up predicates -void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) { - pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - BBPredicates &Pred = Predicates[BB]; + // Last option: Create a new instruction + return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator()); +} - for (; PI != PE; ++PI) { - BranchInst *Term = cast<BranchInst>((*PI)->getTerminator()); +/// \brief Build the condition for one edge +Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, + bool Invert) { + Value *Cond = Invert ? BoolFalse : BoolTrue; + if (Term->isConditional()) { + Cond = Term->getCondition(); - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = Term->getSuccessor(i); - if (Succ != BB) - continue; - buildPredicate(Term, i, Pred, false); - } + if (Idx != Invert) + Cond = invert(Cond); } + return Cond; } -/// \brief Analyze the conditions leading to loop to a previous block -void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) { - BranchInst *Term = cast<BranchInst>(BB->getTerminator()); +/// \brief Analyze the predecessors of each block and build up predicates +void AMDGPUStructurizeCFG::gatherPredicates(RegionNode *N) { - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = Term->getSuccessor(i); + RegionInfo *RI = ParentRegion->getRegionInfo(); + BasicBlock *BB = N->getEntry(); + BBPredicates &Pred = Predicates[BB]; + BBPredicates &LPred = LoopPreds[BB]; + + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + PI != PE; ++PI) { - // Ignore it if it's not a back edge - if (!Visited.count(Succ)) + // Ignore it if it's a branch from outside into our region entry + if (!ParentRegion->contains(*PI)) continue; - buildPredicate(Term, i, LoopPred, true); + Region *R = RI->getRegionFor(*PI); + if (R == ParentRegion) { - LoopEnd = BB; - if (Visited[Succ] < LoopIdx) { - LoopIdx = Visited[Succ]; - LoopStart = Succ; + // It's a top level block in our region + BranchInst *Term = cast<BranchInst>((*PI)->getTerminator()); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = Term->getSuccessor(i); + if (Succ != BB) + continue; + + if (Visited.count(*PI)) { + // Normal forward edge + if (Term->isConditional()) { + // Try to treat it like an ELSE block + BasicBlock *Other = Term->getSuccessor(!i); + if (Visited.count(Other) && !Loops.count(Other) && + !Pred.count(Other) && !Pred.count(*PI)) { + + Pred[Other] = BoolFalse; + Pred[*PI] = BoolTrue; + continue; + } + } + Pred[*PI] = buildCondition(Term, i, false); + + } else { + // Back edge + LPred[*PI] = buildCondition(Term, i, true); + } + } + + } else { + + // It's an exit from a sub region + while(R->getParent() != ParentRegion) + R = R->getParent(); + + // Edge from inside a subregion to its entry, ignore it + if (R == N) + continue; + + BasicBlock *Entry = R->getEntry(); + if (Visited.count(Entry)) + Pred[Entry] = BoolTrue; + else + LPred[Entry] = BoolFalse; } } } /// \brief Collect various loop and predicate infos void AMDGPUStructurizeCFG::collectInfos() { - unsigned Number = 0, LoopIdx = ~0; // Reset predicate Predicates.clear(); // and loop infos - LoopStart = LoopEnd = 0; - LoopPred.clear(); + Loops.clear(); + LoopPreds.clear(); + + // Reset the visited nodes + Visited.clear(); - RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); - for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) { + for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); + OI != OE; ++OI) { // Analyze all the conditions leading to a node - analyzeBlock((*OI)->getEntry()); + gatherPredicates(*OI); - if ((*OI)->isSubRegion()) - continue; + // Remember that we've seen this node + Visited.insert((*OI)->getEntry()); - // Find the first/last loop nodes and loop predicates - analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx); + // Find the last back edges + analyzeLoops(*OI); } } -/// \brief Does A dominate all the predicates of B ? -bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) { - BBPredicates &Preds = Predicates[B]; - for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); - PI != PE; ++PI) { +/// \brief Insert the missing branch conditions +void AMDGPUStructurizeCFG::insertConditions(bool Loops) { + BranchVector &Conds = Loops ? LoopConds : Conditions; + Value *Default = Loops ? BoolTrue : BoolFalse; + SSAUpdater PhiInserter; - if (!DT->dominates(A, PI->first)) - return false; - } - return true; -} + for (BranchVector::iterator I = Conds.begin(), + E = Conds.end(); I != E; ++I) { -/// \brief Remove phi values from all successors and the remove the terminator. -void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) { - TerminatorInst *Term = BB->getTerminator(); - if (!Term) - return; + BranchInst *Term = *I; + assert(Term->isConditional()); - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); - SI != SE; ++SI) { + BasicBlock *Parent = Term->getParent(); + BasicBlock *SuccTrue = Term->getSuccessor(0); + BasicBlock *SuccFalse = Term->getSuccessor(1); - delPhiValues(BB, *SI); - } + PhiInserter.Initialize(Boolean, ""); + PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); + PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); - Term->eraseFromParent(); -} + BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; -/// First: Skip forward to the first region node that either isn't a subregion or not -/// dominating it's exit, remove all the skipped nodes from the node order. -/// -/// Second: Handle the first successor directly if the resulting nodes successor -/// predicates are still dominated by the original entry -RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) { - BasicBlock *Entry = Node->getEntry(); + NearestCommonDominator Dominator(DT); + Dominator.addBlock(Parent, false); - // Skip forward as long as it is just a linear flow - while (true) { - BasicBlock *Entry = Node->getEntry(); - BasicBlock *Exit; + Value *ParentValue = 0; + for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { - if (Node->isSubRegion()) { - Exit = Node->getNodeAs<Region>()->getExit(); - } else { - TerminatorInst *Term = Entry->getTerminator(); - if (Term->getNumSuccessors() != 1) + if (PI->first == Parent) { + ParentValue = PI->second; break; - Exit = Term->getSuccessor(0); + } + PhiInserter.AddAvailableValue(PI->first, PI->second); + Dominator.addBlock(PI->first); } - // It's a back edge, break here so we can insert a loop node - if (!Visited.count(Exit)) - return Node; - - // More than node edges are pointing to exit - if (!DT->dominates(Entry, Exit)) - return Node; - - RegionNode *Next = ParentRegion->getNode(Exit); - RNVector::iterator I = std::find(Order.begin(), Order.end(), Next); - assert(I != Order.end()); - - Visited.erase(Next->getEntry()); - Order.erase(I); - Node = Next; - } + if (ParentValue) { + Term->setCondition(ParentValue); + } else { + if (!Dominator.wasResultExplicitMentioned()) + PhiInserter.AddAvailableValue(Dominator.getResult(), Default); - BasicBlock *BB = Node->getEntry(); - TerminatorInst *Term = BB->getTerminator(); - if (Term->getNumSuccessors() != 2) - return Node; - - // Our node has exactly two succesors, check if we can handle - // any of them directly - BasicBlock *Succ = Term->getSuccessor(0); - if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) { - Succ = Term->getSuccessor(1); - if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) - return Node; - } else { - BasicBlock *Succ2 = Term->getSuccessor(1); - if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] && - dominatesPredicates(Entry, Succ2)) - Succ = Succ2; + Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); + } } - - RegionNode *Next = ParentRegion->getNode(Succ); - RNVector::iterator E = Order.end(); - RNVector::iterator I = std::find(Order.begin(), E, Next); - assert(I != E); - - killTerminator(BB); - FlowsInserted.push_back(BB); - Visited.erase(Succ); - Order.erase(I); - return ParentRegion->getNode(wireFlowBlock(BB, Next)); } /// \brief Remove all PHI values coming from "From" into "To" and remember @@ -421,224 +511,306 @@ void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { } } -/// \brief Add the PHI values back once we knew the new predecessor +/// \brief Add a dummy PHI value as soon as we knew the new predecessor void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { - if (!DeletedPhis.count(To)) - return; + for (BasicBlock::iterator I = To->begin(), E = To->end(); + I != E && isa<PHINode>(*I);) { + + PHINode &Phi = cast<PHINode>(*I++); + Value *Undef = UndefValue::get(Phi.getType()); + Phi.addIncoming(Undef, From); + } + AddedPhis[To].push_back(From); +} + +/// \brief Add the real PHI value as soon as everything is set up +void AMDGPUStructurizeCFG::setPhiValues() { - PhiMap &Map = DeletedPhis[To]; SSAUpdater Updater; + for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end(); + AI != AE; ++AI) { - for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { + BasicBlock *To = AI->first; + BBVector &From = AI->second; - PHINode *Phi = I->first; - Updater.Initialize(Phi->getType(), ""); - BasicBlock *Fallback = To; - bool HaveFallback = false; + if (!DeletedPhis.count(To)) + continue; - for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end(); - VI != VE; ++VI) { + PhiMap &Map = DeletedPhis[To]; + for (PhiMap::iterator PI = Map.begin(), PE = Map.end(); + PI != PE; ++PI) { - Updater.AddAvailableValue(VI->first, VI->second); - BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first); - if (Dom == VI->first) - HaveFallback = true; - else if (Dom != Fallback) - HaveFallback = false; - Fallback = Dom; - } - if (!HaveFallback) { + PHINode *Phi = PI->first; Value *Undef = UndefValue::get(Phi->getType()); - Updater.AddAvailableValue(Fallback, Undef); + Updater.Initialize(Phi->getType(), ""); + Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); + Updater.AddAvailableValue(To, Undef); + + NearestCommonDominator Dominator(DT); + Dominator.addBlock(To, false); + for (BBValueVector::iterator VI = PI->second.begin(), + VE = PI->second.end(); VI != VE; ++VI) { + + Updater.AddAvailableValue(VI->first, VI->second); + Dominator.addBlock(VI->first); + } + + if (!Dominator.wasResultExplicitMentioned()) + Updater.AddAvailableValue(Dominator.getResult(), Undef); + + for (BBVector::iterator FI = From.begin(), FE = From.end(); + FI != FE; ++FI) { + + int Idx = Phi->getBasicBlockIndex(*FI); + assert(Idx != -1); + Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI)); + } + } + + DeletedPhis.erase(To); + } + assert(DeletedPhis.empty()); +} + +/// \brief Remove phi values from all successors and then remove the terminator. +void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) { + TerminatorInst *Term = BB->getTerminator(); + if (!Term) + return; + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + + delPhiValues(BB, *SI); + } + + Term->eraseFromParent(); +} + +/// \brief Let node exit(s) point to NewExit +void AMDGPUStructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, + bool IncludeDominator) { + + if (Node->isSubRegion()) { + Region *SubRegion = Node->getNodeAs<Region>(); + BasicBlock *OldExit = SubRegion->getExit(); + BasicBlock *Dominator = 0; + + // Find all the edges from the sub region to the exit + for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit); + I != E;) { + + BasicBlock *BB = *I++; + if (!SubRegion->contains(BB)) + continue; + + // Modify the edges to point to the new exit + delPhiValues(BB, OldExit); + BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); + addPhiValues(BB, NewExit); + + // Find the new dominator (if requested) + if (IncludeDominator) { + if (!Dominator) + Dominator = BB; + else + Dominator = DT->findNearestCommonDominator(Dominator, BB); + } } - Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From); + // Change the dominator (if requested) + if (Dominator) + DT->changeImmediateDominator(NewExit, Dominator); + + // Update the region info + SubRegion->replaceExit(NewExit); + + } else { + BasicBlock *BB = Node->getNodeAs<BasicBlock>(); + killTerminator(BB); + BranchInst::Create(NewExit, BB); + addPhiValues(BB, NewExit); + if (IncludeDominator) + DT->changeImmediateDominator(NewExit, BB); } - DeletedPhis.erase(To); } /// \brief Create a new flow node and update dominator tree and region info -BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) { +BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Dominator) { LLVMContext &Context = Func->getContext(); BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : Order.back()->getEntry(); BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, Func, Insert); - DT->addNewBlock(Flow, Prev); + DT->addNewBlock(Flow, Dominator); ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); - FlowsInserted.push_back(Flow); return Flow; } +/// \brief Create a new or reuse the previous node as flow node +BasicBlock *AMDGPUStructurizeCFG::needPrefix(bool NeedEmpty) { + + BasicBlock *Entry = PrevNode->getEntry(); + + if (!PrevNode->isSubRegion()) { + killTerminator(Entry); + if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) + return Entry; + + } + + // create a new flow node + BasicBlock *Flow = getNextFlow(Entry); + + // and wire it up + changeExit(PrevNode, Flow, true); + PrevNode = ParentRegion->getBBNode(Flow); + return Flow; +} + +/// \brief Returns the region exit if possible, otherwise just a new flow node +BasicBlock *AMDGPUStructurizeCFG::needPostfix(BasicBlock *Flow, + bool ExitUseAllowed) { + + if (Order.empty() && ExitUseAllowed) { + BasicBlock *Exit = ParentRegion->getExit(); + DT->changeImmediateDominator(Exit, Flow); + addPhiValues(Flow, Exit); + return Exit; + } + return getNextFlow(Flow); +} + +/// \brief Set the previous node +void AMDGPUStructurizeCFG::setPrevNode(BasicBlock *BB) { + PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0; +} + +/// \brief Does BB dominate all the predicates of Node ? +bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { + BBPredicates &Preds = Predicates[Node->getEntry()]; + for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { + + if (!DT->dominates(BB, PI->first)) + return false; + } + return true; +} + /// \brief Can we predict that this node will always be called? -bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev, - BasicBlock *Node) { - BBPredicates &Preds = Predicates[Node]; +bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Node) { + + BBPredicates &Preds = Predicates[Node->getEntry()]; bool Dominated = false; + // Regionentry is always true + if (PrevNode == 0) + return true; + for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { if (I->second != BoolTrue) return false; - if (!Dominated && DT->dominates(I->first, Prev)) + if (!Dominated && DT->dominates(I->first, PrevNode->getEntry())) Dominated = true; } + + // TODO: The dominator check is too strict return Dominated; } -/// \brief Wire up the new control flow by inserting or updating the branch -/// instructions at node exits -BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev, - RegionNode *Node) { - BasicBlock *Entry = Node->getEntry(); - - if (LoopStart == Entry) { - LoopStart = Prev; - LoopPred[Prev] = BoolTrue; - } +/// Take one node from the order vector and wire it up +void AMDGPUStructurizeCFG::wireFlow(bool ExitUseAllowed, + BasicBlock *LoopEnd) { - // Wire it up temporary, skipChained may recurse into us - BranchInst::Create(Entry, Prev); - DT->changeImmediateDominator(Entry, Prev); - addPhiValues(Prev, Entry); + RegionNode *Node = Order.pop_back_val(); + Visited.insert(Node->getEntry()); - Node = skipChained(Node); + if (isPredictableTrue(Node)) { + // Just a linear flow + if (PrevNode) { + changeExit(PrevNode, Node->getEntry(), true); + } + PrevNode = Node; - BasicBlock *Next = getNextFlow(Prev); - if (!isPredictableTrue(Prev, Entry)) { - // Let Prev point to entry and next block - Prev->getTerminator()->eraseFromParent(); - BranchInst::Create(Entry, Next, BoolUndef, Prev); } else { - DT->changeImmediateDominator(Next, Entry); - } + // Insert extra prefix node (or reuse last one) + BasicBlock *Flow = needPrefix(false); - // Let node exit(s) point to next block - if (Node->isSubRegion()) { - Region *SubRegion = Node->getNodeAs<Region>(); - BasicBlock *Exit = SubRegion->getExit(); + // Insert extra postfix node (or use exit instead) + BasicBlock *Entry = Node->getEntry(); + BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); - // Find all the edges from the sub region to the exit - BBVector ToDo; - for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { - if (SubRegion->contains(*I)) - ToDo.push_back(*I); - } + // let it point to entry and next block + Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); + addPhiValues(Flow, Entry); + DT->changeImmediateDominator(Entry, Flow); - // Modify the edges to point to the new flow block - for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) { - delPhiValues(*I, Exit); - TerminatorInst *Term = (*I)->getTerminator(); - Term->replaceUsesOfWith(Exit, Next); + PrevNode = Node; + while (!Order.empty() && !Visited.count(LoopEnd) && + dominatesPredicates(Entry, Order.back())) { + handleLoops(false, LoopEnd); } - // Update the region info - SubRegion->replaceExit(Next); - - } else { - BasicBlock *BB = Node->getNodeAs<BasicBlock>(); - killTerminator(BB); - BranchInst::Create(Next, BB); - - if (BB == LoopEnd) - LoopEnd = 0; + changeExit(PrevNode, Next, false); + setPrevNode(Next); } - - return Next; } -/// Destroy node order and visited map, build up flow order instead. -/// After this function control flow looks like it should be, but -/// branches only have undefined conditions. -void AMDGPUStructurizeCFG::createFlow() { - DeletedPhis.clear(); +void AMDGPUStructurizeCFG::handleLoops(bool ExitUseAllowed, + BasicBlock *LoopEnd) { + RegionNode *Node = Order.back(); + BasicBlock *LoopStart = Node->getEntry(); - BasicBlock *Prev = Order.pop_back_val()->getEntry(); - assert(Prev == ParentRegion->getEntry() && "Incorrect node order!"); - Visited.erase(Prev); - - if (LoopStart == Prev) { - // Loop starts at entry, split entry so that we can predicate it - BasicBlock::iterator Insert = Prev->getFirstInsertionPt(); - BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName); - DT->addNewBlock(Split, Prev); - ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); - Predicates[Split] = Predicates[Prev]; - Order.push_back(ParentRegion->getBBNode(Split)); - LoopPred[Prev] = BoolTrue; - - } else if (LoopStart == Order.back()->getEntry()) { - // Loop starts behind entry, split entry so that we can jump to it - Instruction *Term = Prev->getTerminator(); - BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName); - DT->addNewBlock(Split, Prev); - ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); - Prev = Split; + if (!Loops.count(LoopStart)) { + wireFlow(ExitUseAllowed, LoopEnd); + return; } - killTerminator(Prev); - FlowsInserted.clear(); - FlowsInserted.push_back(Prev); + if (!isPredictableTrue(Node)) + LoopStart = needPrefix(true); - while (!Order.empty()) { - RegionNode *Node = Order.pop_back_val(); - Visited.erase(Node->getEntry()); - Prev = wireFlowBlock(Prev, Node); - if (LoopStart && !LoopEnd) { - // Create an extra loop end node - LoopEnd = Prev; - Prev = getNextFlow(LoopEnd); - BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd); - addPhiValues(LoopEnd, LoopStart); - } + LoopEnd = Loops[Node->getEntry()]; + wireFlow(false, LoopEnd); + while (!Visited.count(LoopEnd)) { + handleLoops(false, LoopEnd); } - BasicBlock *Exit = ParentRegion->getExit(); - BranchInst::Create(Exit, Prev); - addPhiValues(Prev, Exit); - if (DT->dominates(ParentRegion->getEntry(), Exit)) - DT->changeImmediateDominator(Exit, Prev); - - if (LoopStart && LoopEnd) { - BBVector::iterator FI = std::find(FlowsInserted.begin(), - FlowsInserted.end(), - LoopStart); - for (; *FI != LoopEnd; ++FI) { - addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0)); - } - } - - assert(Order.empty()); - assert(Visited.empty()); - assert(DeletedPhis.empty()); + // Create an extra loop end node + LoopEnd = needPrefix(false); + BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); + LoopConds.push_back(BranchInst::Create(Next, LoopStart, + BoolUndef, LoopEnd)); + addPhiValues(LoopEnd, LoopStart); + setPrevNode(Next); } -/// \brief Insert the missing branch conditions -void AMDGPUStructurizeCFG::insertConditions() { - SSAUpdater PhiInserter; - - for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end(); - FI != FE; ++FI) { - - BranchInst *Term = cast<BranchInst>((*FI)->getTerminator()); - if (Term->isUnconditional()) - continue; +/// After this function control flow looks like it should be, but +/// branches and PHI nodes only have undefined conditions. +void AMDGPUStructurizeCFG::createFlow() { - PhiInserter.Initialize(Boolean, ""); - PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse); + BasicBlock *Exit = ParentRegion->getExit(); + bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); - BasicBlock *Succ = Term->getSuccessor(0); - BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ]; - for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); - PI != PE; ++PI) { + DeletedPhis.clear(); + AddedPhis.clear(); + Conditions.clear(); + LoopConds.clear(); - PhiInserter.AddAvailableValue(PI->first, PI->second); - } + PrevNode = 0; + Visited.clear(); - Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI)); + while (!Order.empty()) { + handleLoops(EntryDominatesExit, 0); } + + if (PrevNode) + changeExit(PrevNode, Exit, EntryDominatesExit); + else + assert(EntryDominatesExit); } /// Handle a rare case where the disintegrated nodes instructions @@ -696,14 +868,21 @@ bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { orderNodes(); collectInfos(); createFlow(); - insertConditions(); + insertConditions(false); + insertConditions(true); + setPhiValues(); rebuildSSA(); + // Cleanup Order.clear(); Visited.clear(); - Predicates.clear(); DeletedPhis.clear(); - FlowsInserted.clear(); + AddedPhis.clear(); + Predicates.clear(); + Conditions.clear(); + Loops.clear(); + LoopPreds.clear(); + LoopConds.clear(); return true; } diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index cab7884..1973fc6 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -44,7 +44,7 @@ public: virtual ~AMDGPUSubtarget(); const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } - virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS); + virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool isOverride(AMDGPUDeviceInfo::Caps) const; bool is64bit() const; diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index d09dc2e..e2f00be 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -102,6 +102,12 @@ AMDGPUPassConfig::addPreISel() { bool AMDGPUPassConfig::addInstSelector() { addPass(createAMDGPUPeepholeOpt(*TM)); addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); + + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + // This callbacks this pass uses are not implemented yet on SI. + addPass(createAMDGPUIndirectAddressingPass(*TM)); + } return false; } @@ -116,6 +122,11 @@ bool AMDGPUPassConfig::addPreRegAlloc() { } bool AMDGPUPassConfig::addPostRegAlloc() { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + + if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + addPass(createSIInsertWaits(*TM)); + } return false; } @@ -132,8 +143,8 @@ bool AMDGPUPassConfig::addPreEmitPass() { addPass(createAMDGPUCFGStructurizerPass(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); addPass(&FinalizeMachineBundlesID); + addPass(createR600LowerConstCopy(*TM)); } else { - addPass(createSILowerLiteralConstantsPass(*TM)); addPass(createSILowerControlFlowPass(*TM)); } diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h index 91f9a83..2afe787 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.h +++ b/lib/Target/R600/AMDGPUTargetMachine.h @@ -15,9 +15,9 @@ #ifndef AMDGPU_TARGET_MACHINE_H #define AMDGPU_TARGET_MACHINE_H +#include "AMDGPUFrameLowering.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUSubtarget.h" -#include "AMDILFrameLowering.h" #include "AMDILIntrinsicInfo.h" #include "R600ISelLowering.h" #include "llvm/ADT/OwningPtr.h" diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h index 4e577dc..b39fbdb 100644 --- a/lib/Target/R600/AMDIL.h +++ b/lib/Target/R600/AMDIL.h @@ -90,14 +90,30 @@ namespace AMDGPUAS { enum AddressSpaces { PRIVATE_ADDRESS = 0, ///< Address space for private memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, ///< Address space for constant memory. + CONSTANT_ADDRESS = 2, ///< Address space for constant memory LOCAL_ADDRESS = 3, ///< Address space for local memory. REGION_ADDRESS = 4, ///< Address space for region memory. ADDRESS_NONE = 5, ///< Address space for unknown memory. PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI - LAST_ADDRESS = 9 + CONSTANT_BUFFER_0 = 9, + CONSTANT_BUFFER_1 = 10, + CONSTANT_BUFFER_2 = 11, + CONSTANT_BUFFER_3 = 12, + CONSTANT_BUFFER_4 = 13, + CONSTANT_BUFFER_5 = 14, + CONSTANT_BUFFER_6 = 15, + CONSTANT_BUFFER_7 = 16, + CONSTANT_BUFFER_8 = 17, + CONSTANT_BUFFER_9 = 18, + CONSTANT_BUFFER_10 = 19, + CONSTANT_BUFFER_11 = 20, + CONSTANT_BUFFER_12 = 21, + CONSTANT_BUFFER_13 = 22, + CONSTANT_BUFFER_14 = 23, + CONSTANT_BUFFER_15 = 24, + LAST_ADDRESS = 25 }; } // namespace AMDGPUAS diff --git a/lib/Target/R600/AMDILDevice.h b/lib/Target/R600/AMDILDevice.h index b9a1560..97df98c 100644 --- a/lib/Target/R600/AMDILDevice.h +++ b/lib/Target/R600/AMDILDevice.h @@ -104,7 +104,7 @@ public: static const unsigned int QuarterWavefrontSize = 16; protected: virtual void setCaps(); - llvm::BitVector mHWBits; + BitVector mHWBits; llvm::BitVector mSWBits; AMDGPUSubtarget *mSTM; uint32_t DeviceFlag; diff --git a/lib/Target/R600/AMDILFrameLowering.cpp b/lib/Target/R600/AMDILFrameLowering.cpp deleted file mode 100644 index 9ad495a..0000000 --- a/lib/Target/R600/AMDILFrameLowering.cpp +++ /dev/null @@ -1,47 +0,0 @@ -//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -/// \file -/// \brief Interface to describe a layout of a stack frame on a AMDGPU target -/// machine. -// -//===----------------------------------------------------------------------===// -#include "AMDILFrameLowering.h" -#include "llvm/CodeGen/MachineFrameInfo.h" - -using namespace llvm; -AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, - int LAO, unsigned TransAl) - : TargetFrameLowering(D, StackAl, LAO, TransAl) { -} - -AMDGPUFrameLowering::~AMDGPUFrameLowering() { -} - -int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getObjectOffset(FI); -} - -const TargetFrameLowering::SpillSlot * -AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { - NumEntries = 0; - return 0; -} -void -AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const { -} -void -AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { -} -bool -AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const { - return false; -} diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index d15ed39..e77b9dc 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -16,10 +16,12 @@ #include "AMDGPURegisterInfo.h" #include "AMDILDevices.h" #include "R600InstrInfo.h" +#include "SIISelLowering.h" #include "llvm/ADT/ValueMap.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" +#include "llvm/CodeGen/SelectionDAG.h" #include <list> #include <queue> @@ -42,9 +44,11 @@ public: SDNode *Select(SDNode *N); virtual const char *getPassName() const; + virtual void PostprocessISelDAG(); private: inline SDValue getSmallIPtrImm(unsigned Imm); + bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); // Complex pattern selectors bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); @@ -67,9 +71,11 @@ private: static bool isLocalLoad(const LoadSDNode *N); static bool isRegionLoad(const LoadSDNode *N); - bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); - bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); + bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); + bool SelectGlobalValueVariableOffset(SDValue Addr, + SDValue &BaseReg, SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" @@ -156,16 +162,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } switch (Opc) { default: break; - case ISD::FrameIndex: { - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) { - unsigned int FI = FIN->getIndex(); - EVT OpVT = N->getValueType(0); - unsigned int NewOpc = AMDGPU::COPY; - SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); - return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI); - } - break; - } case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); @@ -224,7 +220,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { continue; } } else { - if (!TII->isALUInstr(Use->getMachineOpcode())) { + if (!TII->isALUInstr(Use->getMachineOpcode()) || + (TII->get(Use->getMachineOpcode()).TSFlags & + R600_InstFlag::VECTOR)) { continue; } @@ -259,7 +257,116 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { break; } } - return SelectCode(N); + SDNode *Result = SelectCode(N); + + // Fold operands of selected node + + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + const R600InstrInfo *TII = + static_cast<const R600InstrInfo*>(TM.getInstrInfo()); + if (Result && Result->isMachineOpcode() && + !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR) + && TII->isALUInstr(Result->getMachineOpcode())) { + // Fold FNEG/FABS/CONST_ADDRESS + // TODO: Isel can generate multiple MachineInst, we need to recursively + // parse Result + bool IsModified = false; + do { + std::vector<SDValue> Ops; + for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end(); + I != E; ++I) + Ops.push_back(*I); + IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops); + if (IsModified) { + Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size()); + } + } while (IsModified); + + // If node has a single use which is CLAMP_R600, folds it + if (Result->hasOneUse() && Result->isMachineOpcode()) { + SDNode *PotentialClamp = *Result->use_begin(); + if (PotentialClamp->isMachineOpcode() && + PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) { + unsigned ClampIdx = + TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP); + std::vector<SDValue> Ops; + unsigned NumOp = Result->getNumOperands(); + for (unsigned i = 0; i < NumOp; ++i) { + Ops.push_back(Result->getOperand(i)); + } + Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32); + Result = CurDAG->SelectNodeTo(PotentialClamp, + Result->getMachineOpcode(), PotentialClamp->getVTList(), + Ops.data(), NumOp); + } + } + } + } + + return Result; +} + +bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, + const R600InstrInfo *TII, std::vector<SDValue> &Ops) { + int OperandIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0), + TII->getOperandIdx(Opcode, R600Operands::SRC1), + TII->getOperandIdx(Opcode, R600Operands::SRC2) + }; + int SelIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL), + TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL), + TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL) + }; + int NegIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG), + TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG), + TII->getOperandIdx(Opcode, R600Operands::SRC2_NEG) + }; + int AbsIdx[] = { + TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS), + TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS), + -1 + }; + + for (unsigned i = 0; i < 3; i++) { + if (OperandIdx[i] < 0) + return false; + SDValue Operand = Ops[OperandIdx[i] - 1]; + switch (Operand.getOpcode()) { + case AMDGPUISD::CONST_ADDRESS: { + if (i == 2) + break; + SDValue CstOffset; + if (!Operand.getValueType().isVector() && + SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) { + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); + Ops[SelIdx[i] - 1] = CstOffset; + return true; + } + } + break; + case ISD::FNEG: + if (NegIdx[i] < 0) + break; + Ops[OperandIdx[i] - 1] = Operand.getOperand(0); + Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32); + return true; + case ISD::FABS: + if (AbsIdx[i] < 0) + break; + Ops[OperandIdx[i] - 1] = Operand.getOperand(0); + Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32); + return true; + case ISD::BITCAST: + Ops[OperandIdx[i] - 1] = Operand.getOperand(0); + return true; + default: + break; + } + } + return false; } bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { @@ -406,41 +513,23 @@ const char *AMDGPUDAGToDAGISel::getPassName() const { ///==== AMDGPU Functions ====/// -bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, - SDValue& Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress) { - return false; +bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, + SDValue& IntPtr) { + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { + IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true); + return true; } + return false; +} - - if (Addr.getOpcode() == ISD::ADD) { - bool Match = false; - - // Find the base ptr and the offset - for (unsigned i = 0; i < Addr.getNumOperands(); i++) { - SDValue Arg = Addr.getOperand(i); - ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg); - // This arg isn't a constant so it must be the base PTR. - if (!OffsetNode) { - Base = Addr.getOperand(i); - continue; - } - // Check if the constant argument fits in 8-bits. The offset is in bytes - // so we need to convert it to dwords. - if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) { - Match = true; - Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2, - MVT::i32); - } - } - return Match; +bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, + SDValue& BaseReg, SDValue &Offset) { + if (!dyn_cast<ConstantSDNode>(Addr)) { + BaseReg = Addr; + Offset = CurDAG->getIntPtrConstant(0, true); + return true; } - - // Default case, no offset - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; + return false; } bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, @@ -470,16 +559,39 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, return true; } -bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, - SDValue& Offset) { - if (Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress || - Addr.getOpcode() != ISD::ADD) { - return false; +bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, + SDValue &Offset) { + ConstantSDNode *C; + + if ((C = dyn_cast<ConstantSDNode>(Addr))) { + Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && + (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + } else { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); } - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; } + +void AMDGPUDAGToDAGISel::PostprocessISelDAG() { + + // Go over all selected nodes and try to fold them a bit more + const AMDGPUTargetLowering& Lowering = ((const AMDGPUTargetLowering&)TLI); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) { + + MachineSDNode *Node = dyn_cast<MachineSDNode>(I); + if (!Node) + continue; + + SDNode *ResNode = Lowering.PostISelFolding(Node, *CurDAG); + if (ResNode != Node) + ReplaceUses(Node, ResNode); + } +} + diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp index 2e60adc..f65e1f3 100644 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -220,9 +220,9 @@ void AMDGPUTargetLowering::InitAMDILLowering() { setSelectIsExpensive(true); setJumpIsExpensive(true); - maxStoresPerMemcpy = 4096; - maxStoresPerMemmove = 4096; - maxStoresPerMemset = 4096; + MaxStoresPerMemcpy = 4096; + MaxStoresPerMemmove = 4096; + MaxStoresPerMemset = 4096; } @@ -451,7 +451,8 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); // float fr = mad(fqneg, fb, fa); - SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa); + SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY, + DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa); // int iq = (int)fq; SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td index e969bbf..110f147 100644 --- a/lib/Target/R600/AMDILInstrInfo.td +++ b/lib/Target/R600/AMDILInstrInfo.td @@ -116,7 +116,6 @@ def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, //===--------------------------------------------------------------------===// // Floating point math functions def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>; -def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>; //===----------------------------------------------------------------------===// // Integer functions diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td index 3f9e20f..6ec3559 100644 --- a/lib/Target/R600/AMDILIntrinsics.td +++ b/lib/Target/R600/AMDILIntrinsics.td @@ -92,12 +92,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in { TernaryIntInt; def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">, BinaryIntInt; - def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">, - TernaryIntInt; - def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">, - TernaryIntInt; - def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">, - TernaryIntFloat; def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">, BinaryIntInt; def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">, @@ -110,10 +104,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in { BinaryIntInt; def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">, BinaryIntInt; - def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">, - TernaryIntInt; - def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">, - TernaryIntInt; def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">, BinaryIntInt; def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">, diff --git a/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/lib/Target/R600/AMDILPeepholeOptimizer.cpp index a3d30af..3a28038 100644 --- a/lib/Target/R600/AMDILPeepholeOptimizer.cpp +++ b/lib/Target/R600/AMDILPeepholeOptimizer.cpp @@ -366,7 +366,7 @@ AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) { std::string buffer(F->getName().str() + "_noret"); F = dyn_cast<Function>( F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); - atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F)); + atomicFuncs.push_back(std::make_pair(CI, F)); } if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment) @@ -613,7 +613,7 @@ AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) { if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } Function *Func = dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); + getOrInsertFunction(StringRef(name), funcType)); Value *Operands[4] = { width, offset, @@ -777,7 +777,7 @@ AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) { // Lets create the function. Function *Func = dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); + getOrInsertFunction(StringRef(name), funcType)); Value *Operands[3] = { ShiftInst->getOperand(0), shiftValConst, @@ -967,7 +967,7 @@ AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) { } Function *Func = dyn_cast<Function>( CI->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); + getOrInsertFunction(StringRef(name), funcType)); Value *Operands[3] = { CI->getOperand(0), CI->getOperand(1), @@ -999,7 +999,7 @@ AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) { } Function *Func = dyn_cast<Function>( CI->getParent()->getParent()->getParent()-> - getOrInsertFunction(llvm::StringRef(name), funcType)); + getOrInsertFunction(StringRef(name), funcType)); Value *Operands[2] = { CI->getOperand(0), CI->getOperand(1) diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index ce0b56b..00f8b10 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen AMDILDevice.cpp AMDILDeviceInfo.cpp AMDILEvergreenDevice.cpp - AMDILFrameLowering.cpp AMDILIntrinsicInfo.cpp AMDILISelDAGToDAG.cpp AMDILISelLowering.cpp @@ -25,6 +24,8 @@ add_llvm_target(R600CodeGen AMDILPeepholeOptimizer.cpp AMDILSIDevice.cpp AMDGPUAsmPrinter.cpp + AMDGPUFrameLowering.cpp + AMDGPUIndirectAddressing.cpp AMDGPUMCInstLower.cpp AMDGPUSubtarget.cpp AMDGPUStructurizeCFG.cpp @@ -36,13 +37,14 @@ add_llvm_target(R600CodeGen R600ExpandSpecialInstrs.cpp R600InstrInfo.cpp R600ISelLowering.cpp + R600LowerConstCopy.cpp R600MachineFunctionInfo.cpp R600RegisterInfo.cpp SIAnnotateControlFlow.cpp SIAssignInterpRegs.cpp + SIInsertWaits.cpp SIInstrInfo.cpp SIISelLowering.cpp - SILowerLiteralConstants.cpp SILowerControlFlow.cpp SIMachineFunctionInfo.cpp SIRegisterInfo.cpp diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index e6c550b..10547a5 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -11,6 +11,7 @@ #include "AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" using namespace llvm; @@ -35,11 +36,29 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << Op.getImm(); } else if (Op.isFPImm()) { O << Op.getFPImm(); + } else if (Op.isExpr()) { + const MCExpr *Exp = Op.getExpr(); + Exp->print(O); } else { assert(!"unknown operand type in printOperand"); } } +void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + + if (Imm == 2) { + O << "P0"; + } else if (Imm == 1) { + O << "P20"; + } else if (Imm == 0) { + O << "P10"; + } else { + assert(!"Invalid interpolation parameter slot"); + } +} + void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printOperand(MI, OpNo, O); @@ -105,10 +124,7 @@ void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.getImm() != 0) { - O << " + " << Op.getImm(); - } + printIfSet(MI, OpNo, O, "+"); } void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo, @@ -129,4 +145,28 @@ void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo, } } +void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const char * chans = "XYZW"; + int sel = MI->getOperand(OpNo).getImm(); + + int chan = sel & 3; + sel >>= 2; + + if (sel >= 512) { + sel -= 512; + int cb = sel >> 12; + sel &= 4095; + O << cb << "[" << sel << "]"; + } else if (sel >= 448) { + sel -= 448; + O << sel; + } else if (sel >= 0){ + O << sel; + } + + if (sel >= 0) + O << "." << chans[chan]; +} + #include "AMDGPUGenAsmWriter.inc" diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 96e0e46..767a708 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -33,6 +33,7 @@ public: private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm); void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O); @@ -45,6 +46,7 @@ private: void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); }; } // End namespace llvm diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h index 9d0d6cf..8721f80 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -42,17 +42,6 @@ public: SmallVectorImpl<MCFixup> &Fixups) const { return 0; } - virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const { - return Value; - } - virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - return 0; - } - virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - return 0; - } }; } // End namespace llvm diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 36deae9..d207160 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -38,8 +38,8 @@ using namespace llvm; namespace { class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { - R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT + R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; const MCSubtargetInfo &STI; @@ -63,8 +63,8 @@ private: void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, raw_ostream &OS) const; void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; - void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value, - raw_ostream &OS) const; + void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx, + raw_ostream &OS) const; void EmitDst(const MCInst &MI, raw_ostream &OS) const; void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, raw_ostream &OS) const; @@ -161,9 +161,12 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, case AMDGPU::VTX_READ_PARAM_8_eg: case AMDGPU::VTX_READ_PARAM_16_eg: case AMDGPU::VTX_READ_PARAM_32_eg: + case AMDGPU::VTX_READ_PARAM_128_eg: case AMDGPU::VTX_READ_GLOBAL_8_eg: case AMDGPU::VTX_READ_GLOBAL_32_eg: - case AMDGPU::VTX_READ_GLOBAL_128_eg: { + case AMDGPU::VTX_READ_GLOBAL_128_eg: + case AMDGPU::TEX_VTX_CONSTBUF: + case AMDGPU::TEX_VTX_TEXBUF : { uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset @@ -193,7 +196,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, raw_ostream &OS) const { const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); - unsigned NumOperands = MI.getNumOperands(); // Emit instruction type EmitByte(INSTR_ALU, OS); @@ -209,19 +211,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, InstWord01 |= ISAOpCode << 1; } - unsigned SrcIdx = 0; - for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) { - if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() || - OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) { - continue; - } - EmitSrcISA(MI, OpIdx, InstWord01, OS); - SrcIdx++; - } + unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 : + MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1; - // Emit zeros for unused sources - for ( ; SrcIdx < 3; SrcIdx++) { - EmitNullBytes(SRC_BYTE_COUNT - 6, OS); + EmitByte(SrcNum, OS); + + const unsigned SrcOps[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL} + }; + + for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) { + unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]]; + unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]]; + EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS); } Emit(InstWord01, OS); @@ -292,34 +296,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, } -void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx, - uint64_t &Value, raw_ostream &OS) const { - const MCOperand &MO = MI.getOperand(OpIdx); +void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, + unsigned SelOpIdx, raw_ostream &OS) const { + const MCOperand &RegMO = MI.getOperand(RegOpIdx); + const MCOperand &SelMO = MI.getOperand(SelOpIdx); + union { float f; uint32_t i; } InlineConstant; InlineConstant.i = 0; - // Emit the source select (2 bytes). For GPRs, this is the register index. - // For other potential instruction operands, (e.g. constant registers) the - // value of the source select is defined in the r600isa docs. - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); - } + // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0 + // and select is 0 (GPR index is encoded in the instr encoding. For constants + // type is 1 and select is the original const select passed from the driver. + unsigned Reg = RegMO.getReg(); + if (Reg == AMDGPU::ALU_CONST) { + EmitByte(1, OS); + uint32_t Sel = SelMO.getImm(); + Emit(Sel, OS); + } else { + EmitByte(0, OS); + Emit((uint32_t)0, OS); + } - if (Reg == AMDGPU::ALU_LITERAL_X) { - unsigned ImmOpIndex = MI.getNumOperands() - 1; - MCOperand ImmOp = MI.getOperand(ImmOpIndex); - if (ImmOp.isFPImm()) { - InlineConstant.f = ImmOp.getFPImm(); - } else { - assert(ImmOp.isImm()); - InlineConstant.i = ImmOp.getImm(); - } + if (Reg == AMDGPU::ALU_LITERAL_X) { + unsigned ImmOpIndex = MI.getNumOperands() - 1; + MCOperand ImmOp = MI.getOperand(ImmOpIndex); + if (ImmOp.isFPImm()) { + InlineConstant.f = ImmOp.getFPImm(); + } else { + assert(ImmOp.isImm()); + InlineConstant.i = ImmOp.getImm(); } } diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index b4bdb25..6cc0077 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -24,46 +24,33 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/raw_ostream.h" -#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1)) -#define SI_INSTR_FLAGS_ENCODING_MASK 0xf - -// These must be kept in sync with SIInstructions.td and also the -// InstrEncodingInfo array in SIInstrInfo.cpp. -// -// NOTE: This enum is only used to identify the encoding type within LLVM, -// the actual encoding type that is part of the instruction format is different -namespace SIInstrEncodingType { - enum Encoding { - EXP = 0, - LDS = 1, - MIMG = 2, - MTBUF = 3, - MUBUF = 4, - SMRD = 5, - SOP1 = 6, - SOP2 = 7, - SOPC = 8, - SOPK = 9, - SOPP = 10, - VINTRP = 11, - VOP1 = 12, - VOP2 = 13, - VOP3 = 14, - VOPC = 15 - }; -} - using namespace llvm; namespace { + +/// \brief Helper type used in encoding +typedef union { + int32_t I; + float F; +} IntFloatUnion; + class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { - SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT + SIMCCodeEmitter(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; const MCSubtargetInfo &STI; MCContext &Ctx; + /// \brief Encode a sequence of registers with the correct alignment. + unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const; + + /// \brief Can this operand also contain immediate values? + bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const; + + /// \brief Encode an fp or int literal + uint32_t getLitEncoding(const MCOperand &MO) const; + public: SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, const MCSubtargetInfo &sti, MCContext &ctx) @@ -79,11 +66,6 @@ public: virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const; -public: - - /// \brief Encode a sequence of registers with the correct alignment. - unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const; - /// \brief Encoding for when 2 consecutive registers are used virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixup) const; @@ -91,73 +73,142 @@ public: /// \brief Encoding for when 4 consectuive registers are used virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixup) const; +}; - /// \brief Encoding for SMRD indexed loads - virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixup) const; +} // End anonymous namespace + +MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new SIMCCodeEmitter(MCII, MRI, STI, Ctx); +} - /// \brief Post-Encoder method for VOP instructions - virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const; +bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc, + unsigned OpNo) const { -private: + unsigned RegClass = Desc.OpInfo[OpNo].RegClass; + return (AMDGPU::SSrc_32RegClassID == RegClass) || + (AMDGPU::SSrc_64RegClassID == RegClass) || + (AMDGPU::VSrc_32RegClassID == RegClass) || + (AMDGPU::VSrc_64RegClassID == RegClass); +} - /// \returns this SIInstrEncodingType for this instruction. - unsigned getEncodingType(const MCInst &MI) const; +uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const { - /// \brief Get then size in bytes of this instructions encoding. - unsigned getEncodingBytes(const MCInst &MI) const; + IntFloatUnion Imm; + if (MO.isImm()) + Imm.I = MO.getImm(); + else if (MO.isFPImm()) + Imm.F = MO.getFPImm(); + else + return ~0; - /// \returns the hardware encoding for a register - unsigned getRegBinaryCode(unsigned reg) const; + if (Imm.I >= 0 && Imm.I <= 64) + return 128 + Imm.I; - /// \brief Generated function that returns the hardware encoding for - /// a register - unsigned getHWRegNum(unsigned reg) const; + if (Imm.I >= -16 && Imm.I <= -1) + return 192 + abs(Imm.I); -}; + if (Imm.F == 0.5f) + return 240; -} // End anonymous namespace + if (Imm.F == -0.5f) + return 241; -MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new SIMCCodeEmitter(MCII, MRI, STI, Ctx); + if (Imm.F == 1.0f) + return 242; + + if (Imm.F == -1.0f) + return 243; + + if (Imm.F == 2.0f) + return 244; + + if (Imm.F == -2.0f) + return 245; + + if (Imm.F == 4.0f) + return 246; + + if (Imm.F == -4.0f) + return 247; + + return 255; } void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups); - unsigned bytes = getEncodingBytes(MI); + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + unsigned bytes = Desc.getSize(); + for (unsigned i = 0; i < bytes; i++) { OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); } + + if (bytes > 4) + return; + + // Check for additional literals in SRC0/1/2 (Op 1/2/3) + for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) { + + // Check if this operand should be encoded as [SV]Src + if (!isSrcOperand(Desc, i)) + continue; + + // Is this operand a literal immediate? + const MCOperand &Op = MI.getOperand(i); + if (getLitEncoding(Op) != 255) + continue; + + // Yes! Encode it + IntFloatUnion Imm; + if (Op.isImm()) + Imm.I = Op.getImm(); + else + Imm.F = Op.getFPImm(); + + for (unsigned j = 0; j < 4; j++) { + OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff)); + } + + // Only one literal value allowed + break; + } } uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const { - if (MO.isReg()) { - return getRegBinaryCode(MO.getReg()); - } else if (MO.isImm()) { - return MO.getImm(); - } else if (MO.isFPImm()) { - // XXX: Not all instructions can use inline literals - // XXX: We should make sure this is a 32-bit constant - union { - float F; - uint32_t I; - } Imm; - Imm.F = MO.getFPImm(); - return Imm.I; - } else if (MO.isExpr()) { + if (MO.isReg()) + return MRI.getEncodingValue(MO.getReg()); + + if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = MCFixupKind(FK_PCRel_4); Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); return 0; - } else{ - llvm_unreachable("Encoding of this operand type is not supported yet."); } + + // Figure out the operand number, needed for isSrcOperand check + unsigned OpNo = 0; + for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) { + if (&MO == &MI.getOperand(OpNo)) + break; + } + + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + if (isSrcOperand(Desc, OpNo)) { + uint32_t Enc = getLitEncoding(MO); + if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4)) + return Enc; + + } else if (MO.isImm()) + return MO.getImm(); + + llvm_unreachable("Encoding of this operand type is not supported yet."); return 0; } @@ -167,10 +218,10 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const { - unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg()); - return regCode >> shift; - return 0; + unsigned regCode = MRI.getEncodingValue(MI.getOperand(OpNo).getReg()); + return (regCode & 0xff) >> shift; } + unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI, unsigned OpNo , SmallVectorImpl<MCFixup> &Fixup) const { @@ -182,117 +233,3 @@ unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixup) const { return GPRAlign(MI, OpNo, 2); } - -#define SMRD_OFFSET_MASK 0xff -#define SMRD_IMM_SHIFT 8 -#define SMRD_SBASE_MASK 0x3f -#define SMRD_SBASE_SHIFT 9 -/// This function is responsibe for encoding the offset -/// and the base ptr for SMRD instructions it should return a bit string in -/// this format: -/// -/// OFFSET = bits{7-0} -/// IMM = bits{8} -/// SBASE = bits{14-9} -/// -uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixup) const { - uint32_t Encoding; - - const MCOperand &OffsetOp = MI.getOperand(OpNo + 1); - - //XXX: Use this function for SMRD loads with register offsets - assert(OffsetOp.isImm()); - - Encoding = - (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK) - | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit - | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT) - ; - - return Encoding; -} - -//===----------------------------------------------------------------------===// -// Post Encoder Callbacks -//===----------------------------------------------------------------------===// - -uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{ - unsigned encodingType = getEncodingType(MI); - unsigned numSrcOps; - unsigned vgprBitOffset; - - if (encodingType == SIInstrEncodingType::VOP3) { - numSrcOps = 3; - vgprBitOffset = 32; - } else { - numSrcOps = 1; - vgprBitOffset = 0; - } - - // Add one to skip over the destination reg operand. - for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) { - const MCOperand &MO = MI.getOperand(opIdx); - if (MO.isReg()) { - unsigned reg = MI.getOperand(opIdx).getReg(); - if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) || - AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) { - Value |= (VGPR_BIT(opIdx)) << vgprBitOffset; - } - } else if (MO.isFPImm()) { - union { - float f; - uint32_t i; - } Imm; - // XXX: Not all instructions can use inline literals - // XXX: We should make sure this is a 32-bit constant - Imm.f = MO.getFPImm(); - Value |= ((uint64_t)Imm.i) << 32; - } - } - return Value; -} - -//===----------------------------------------------------------------------===// -// Encoding helper functions -//===----------------------------------------------------------------------===// - -unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const { - return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK; -} - -unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const { - - // These instructions aren't real instructions with an encoding type, so - // we need to manually specify their size. - switch (MI.getOpcode()) { - default: break; - case AMDGPU::SI_LOAD_LITERAL_I32: - case AMDGPU::SI_LOAD_LITERAL_F32: - return 4; - } - - unsigned encoding_type = getEncodingType(MI); - switch (encoding_type) { - case SIInstrEncodingType::EXP: - case SIInstrEncodingType::LDS: - case SIInstrEncodingType::MUBUF: - case SIInstrEncodingType::MTBUF: - case SIInstrEncodingType::MIMG: - case SIInstrEncodingType::VOP3: - return 8; - default: - return 4; - } -} - - -unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const { - switch (reg) { - case AMDGPU::M0: return 124; - case AMDGPU::SREG_LIT_0: return 128; - case AMDGPU::SI_LITERAL_CONSTANT: return 255; - default: return MRI.getEncodingValue(reg); - } -} - diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index 3dc1ecd..868810c 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -13,6 +13,7 @@ class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features> : Processor<Name, itin, Features>; +def : Proc<"", R600_EG_Itin, [FeatureR600ALUInst]>; def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>; def : Proc<"rv710", R600_EG_Itin, []>; def : Proc<"rv730", R600_EG_Itin, []>; diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h index 7dea8e4..16cfcf5 100644 --- a/lib/Target/R600/R600Defines.h +++ b/lib/Target/R600/R600Defines.h @@ -49,6 +49,9 @@ namespace R600_InstFlag { #define HW_REG_MASK 0x1ff #define HW_CHAN_SHIFT 9 +#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT) +#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK) + namespace R600Operands { enum Ops { DST, @@ -62,18 +65,33 @@ namespace R600Operands { SRC0_NEG, SRC0_REL, SRC0_ABS, + SRC0_SEL, SRC1, SRC1_NEG, SRC1_REL, SRC1_ABS, + SRC1_SEL, SRC2, SRC2_NEG, SRC2_REL, + SRC2_SEL, LAST, PRED_SEL, IMM, COUNT }; + + const static int ALUOpTable[3][R600Operands::COUNT] = { +// W C S S S S S S S S S S S +// R O D L S R R R R S R R R R S R R R L P +// D U I M R A R C C C C R C C C C R C C C A R I +// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M +// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M + {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12}, + {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19}, + {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17} + }; + } #endif // R600DEFINES_H_ diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp index b903d4a..f8c900f 100644 --- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp @@ -55,118 +55,6 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { return new R600ExpandSpecialInstrsPass(TM); } -bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) { - const R600RegisterInfo &TRI = TII->getRegisterInfo(); - if (MI.getOpcode() != AMDGPU::input_perspective) - return false; - - MachineBasicBlock::iterator I = &MI; - unsigned DstReg = MI.getOperand(0).getReg(); - R600MachineFunctionInfo *MFI = MI.getParent()->getParent() - ->getInfo<R600MachineFunctionInfo>(); - unsigned IJIndexBase; - - // In Evergreen ISA doc section 8.3.2 : - // We need to interpolate XY and ZW in two different instruction groups. - // An INTERP_* must occupy all 4 slots of an instruction group. - // Output of INTERP_XY is written in X,Y slots - // Output of INTERP_ZW is written in Z,W slots - // - // Thus interpolation requires the following sequences : - // - // AnyGPR.x = INTERP_ZW; (Write Masked Out) - // AnyGPR.y = INTERP_ZW; (Write Masked Out) - // DstGPR.z = INTERP_ZW; - // DstGPR.w = INTERP_ZW; (End of first IG) - // DstGPR.x = INTERP_XY; - // DstGPR.y = INTERP_XY; - // AnyGPR.z = INTERP_XY; (Write Masked Out) - // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG) - // - switch (MI.getOperand(1).getImm()) { - case 0: - IJIndexBase = MFI->GetIJPerspectiveIndex(); - break; - case 1: - IJIndexBase = MFI->GetIJLinearIndex(); - break; - default: - assert(0 && "Unknow ij index"); - } - - for (unsigned i = 0; i < 8; i++) { - unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( - 2 * IJIndexBase + ((i + 1) % 2)); - unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( - MI.getOperand(2).getImm()); - - - unsigned Sel = AMDGPU::sel_x; - switch (i % 4) { - case 0:Sel = AMDGPU::sel_x;break; - case 1:Sel = AMDGPU::sel_y;break; - case 2:Sel = AMDGPU::sel_z;break; - case 3:Sel = AMDGPU::sel_w;break; - default:break; - } - - unsigned Res = TRI.getSubReg(DstReg, Sel); - - unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY; - - MachineBasicBlock &MBB = *(MI.getParent()); - MachineInstr *NewMI = - TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg); - - if (!(i> 1 && i < 6)) { - TII->addFlag(NewMI, 0, MO_FLAG_MASK); - } - - if (i % 4 != 3) - TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); - } - - MI.eraseFromParent(); - - return true; -} - -bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) { - const R600RegisterInfo &TRI = TII->getRegisterInfo(); - if (MI.getOpcode() != AMDGPU::input_constant) - return false; - - MachineBasicBlock::iterator I = &MI; - unsigned DstReg = MI.getOperand(0).getReg(); - - for (unsigned i = 0; i < 4; i++) { - unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( - MI.getOperand(1).getImm()); - - unsigned Sel = AMDGPU::sel_x; - switch (i % 4) { - case 0:Sel = AMDGPU::sel_x;break; - case 1:Sel = AMDGPU::sel_y;break; - case 2:Sel = AMDGPU::sel_z;break; - case 3:Sel = AMDGPU::sel_w;break; - default:break; - } - - unsigned Res = TRI.getSubReg(DstReg, Sel); - - MachineBasicBlock &MBB = *(MI.getParent()); - MachineInstr *NewMI = TII->buildDefaultInstruction( - MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg); - - if (i % 4 != 3) - TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); - } - - MI.eraseFromParent(); - - return true; -} - bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { const R600RegisterInfo &TRI = TII->getRegisterInfo(); @@ -200,7 +88,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { MI.eraseFromParent(); continue; } - case AMDGPU::BREAK: + case AMDGPU::BREAK: { MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, AMDGPU::PRED_SETE_INT, AMDGPU::PREDICATE_BIT, @@ -214,12 +102,87 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { .addReg(AMDGPU::PREDICATE_BIT); MI.eraseFromParent(); continue; - } + } - if (ExpandInputPerspective(MI)) - continue; - if (ExpandInputConstant(MI)) - continue; + case AMDGPU::INTERP_PAIR_XY: { + MachineInstr *BMI; + unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( + MI.getOperand(2).getImm()); + + for (unsigned Chan = 0; Chan < 4; ++Chan) { + unsigned DstReg; + + if (Chan < 2) + DstReg = MI.getOperand(Chan).getReg(); + else + DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W; + + BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY, + DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg); + + if (Chan > 0) { + BMI->bundleWithPred(); + } + if (Chan >= 2) + TII->addFlag(BMI, 0, MO_FLAG_MASK); + if (Chan != 3) + TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + } + + MI.eraseFromParent(); + continue; + } + + case AMDGPU::INTERP_PAIR_ZW: { + MachineInstr *BMI; + unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( + MI.getOperand(2).getImm()); + + for (unsigned Chan = 0; Chan < 4; ++Chan) { + unsigned DstReg; + + if (Chan < 2) + DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y; + else + DstReg = MI.getOperand(Chan-2).getReg(); + + BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW, + DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg); + + if (Chan > 0) { + BMI->bundleWithPred(); + } + if (Chan < 2) + TII->addFlag(BMI, 0, MO_FLAG_MASK); + if (Chan != 3) + TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + } + + MI.eraseFromParent(); + continue; + } + + case AMDGPU::INTERP_VEC_LOAD: { + const R600RegisterInfo &TRI = TII->getRegisterInfo(); + MachineInstr *BMI; + unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( + MI.getOperand(1).getImm()); + unsigned DstReg = MI.getOperand(0).getReg(); + + for (unsigned Chan = 0; Chan < 4; ++Chan) { + BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0, + TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg); + if (Chan > 0) { + BMI->bundleWithPred(); + } + if (Chan != 3) + TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + } + + MI.eraseFromParent(); + continue; + } + } bool IsReduction = TII->isReductionOp(MI.getOpcode()); bool IsVector = TII->isVector(MI); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index f0eece3..b5c2a93 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -16,6 +16,7 @@ #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -71,10 +72,27 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); + // Legalize loads and stores to the private address space. + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom); + setOperationAction(ISD::STORE, MVT::i8, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::FP_TO_SINT); + setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::SELECT_CC); setSchedulingPreference(Sched::VLIW); } @@ -115,15 +133,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::R600_LOAD_CONST: { - int64_t RegIndex = MI->getOperand(1).getImm(); - unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) - .addOperand(MI->getOperand(0)) - .addReg(ConstantReg); - break; - } - case AMDGPU::MASK_WRITE: { unsigned maskedRegister = MI->getOperand(0).getReg(); assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); @@ -154,18 +163,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::RESERVE_REG: { - R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); - int64_t ReservedIndex = MI->getOperand(0).getImm(); - unsigned ReservedReg = - AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); - MFI->ReservedRegs.push_back(ReservedReg); - unsigned SuperReg = - AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4); - MFI->ReservedRegs.push_back(SuperReg); - break; - } - case AMDGPU::TXD: { unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); @@ -250,33 +247,26 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::input_perspective: { - R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); - - // XXX Be more fine about register reservation - for (unsigned i = 0; i < 4; i ++) { - unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i); - MFI->ReservedRegs.push_back(ReservedReg); - } - - switch (MI->getOperand(1).getImm()) { - case 0:// Perspective - MFI->HasPerspectiveInterpolation = true; - break; - case 1:// Linear - MFI->HasLinearInterpolation = true; - break; - default: - assert(0 && "Unknow ij index"); - } - - return BB; - } - case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportSwz: { + // Instruction is left unmodified if its not the last one of its type + bool isLastInstructionOfItsType = true; + unsigned InstExportType = MI->getOperand(1).getImm(); + for (MachineBasicBlock::iterator NextExportInst = llvm::next(I), + EndBlock = BB->end(); NextExportInst != EndBlock; + NextExportInst = llvm::next(NextExportInst)) { + if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || + NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { + unsigned CurrentInstExportType = NextExportInst->getOperand(1) + .getImm(); + if (CurrentInstExportType == InstExportType) { + isLastInstructionOfItsType = false; + break; + } + } + } bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; - if (!EOP) + if (!EOP && !isLastInstructionOfItsType) return BB; unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) @@ -288,9 +278,18 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addOperand(MI->getOperand(5)) .addOperand(MI->getOperand(6)) .addImm(CfInst) - .addImm(1); + .addImm(EOP); break; } + case AMDGPU::RETURN: { + // RETURN instructions must have the live-out registers as implicit uses, + // otherwise they appear dead. + R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); + MachineInstrBuilder MIB(*MF, MI); + for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i) + MIB.addReg(MFI->LiveOuts[i], RegState::Implicit); + return BB; + } } MI->eraseFromParent(); @@ -304,57 +303,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( using namespace llvm::Intrinsic; using namespace llvm::AMDGPUIntrinsic; -static SDValue -InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap, - unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type, - SDValue Scalar, SDValue Chain) { - if (!ExportMap[Slot]) { - SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, - DL, MVT::v4f32, - DAG.getUNDEF(MVT::v4f32), - Scalar, - DAG.getConstant(Channel, MVT::i32)); - - unsigned Mask = 1 << Channel; - - const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32), - DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32), - DAG.getConstant(Mask, MVT::i32)}; - - SDValue Res = DAG.getNode( - AMDGPUISD::EXPORT, - DL, - MVT::Other, - Ops, 6); - ExportMap[Slot] = Res.getNode(); - return Res; - } - - SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ; - SDValue PreviousVector = ExportInstruction->getOperand(1); - SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, - DL, MVT::v4f32, - PreviousVector, - Scalar, - DAG.getConstant(Channel, MVT::i32)); - - unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5)) - ->getZExtValue(); - Mask |= (1 << Channel); - - const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector, - DAG.getConstant(Inst, MVT::i32), - DAG.getConstant(Type, MVT::i32), - DAG.getConstant(Slot, MVT::i32), - DAG.getConstant(Mask, MVT::i32)}; - - DAG.UpdateNodeOperands(ExportInstruction, - Ops, 6); - - return Chain; - -} - SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); @@ -364,7 +312,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); + case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); unsigned IntrinsicID = @@ -372,58 +322,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const switch (IntrinsicID) { case AMDGPUIntrinsic::AMDGPU_store_output: { MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &MRI = MF.getRegInfo(); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); - if (!MRI.isLiveOut(Reg)) { - MRI.addLiveOut(Reg); - } + MFI->LiveOuts.push_back(Reg); return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); } - case AMDGPUIntrinsic::R600_store_pixel_color: { - MachineFunction &MF = DAG.getMachineFunction(); - R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); - int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - - SDNode **OutputsMap = MFI->Outputs; - return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, - RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2), - Chain); - + case AMDGPUIntrinsic::R600_store_swizzle: { + const SDValue Args[8] = { + Chain, + Op.getOperand(2), // Export Value + Op.getOperand(3), // ArrayBase + Op.getOperand(4), // Type + DAG.getConstant(0, MVT::i32), // SWZ_X + DAG.getConstant(1, MVT::i32), // SWZ_Y + DAG.getConstant(2, MVT::i32), // SWZ_Z + DAG.getConstant(3, MVT::i32) // SWZ_W + }; + return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(), + Args, 8); } - case AMDGPUIntrinsic::R600_store_stream_output : { - MachineFunction &MF = DAG.getMachineFunction(); - R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); - int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - int64_t BufIndex = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); - - SDNode **OutputsMap = MFI->StreamOutputs[BufIndex]; - unsigned Inst; - switch (cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue() ) { - // STREAM3 - case 3: - Inst = 4; - break; - // STREAM2 - case 2: - Inst = 3; - break; - // STREAM1 - case 1: - Inst = 2; - break; - // STREAM0 - case 0: - Inst = 1; - break; - default: - llvm_unreachable("Wrong buffer id for stream outputs !"); - } - return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, - RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2), - Chain); - } // default for switch(IntrinsicID) default: break; } @@ -442,38 +361,35 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); } - case AMDGPUIntrinsic::R600_load_input_perspective: { - int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - if (slot < 0) - return DAG.getUNDEF(MVT::f32); - SDValue FullVector = DAG.getNode( - AMDGPUISD::INTERP, - DL, MVT::v4f32, - DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); - } - case AMDGPUIntrinsic::R600_load_input_linear: { - int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - if (slot < 0) - return DAG.getUNDEF(MVT::f32); - SDValue FullVector = DAG.getNode( - AMDGPUISD::INTERP, - DL, MVT::v4f32, - DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); - } - case AMDGPUIntrinsic::R600_load_input_constant: { + + case AMDGPUIntrinsic::R600_interp_input: { int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - if (slot < 0) - return DAG.getUNDEF(MVT::f32); - SDValue FullVector = DAG.getNode( - AMDGPUISD::INTERP_P0, - DL, MVT::v4f32, - DAG.getConstant(slot / 4 , MVT::i32)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); + int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); + MachineSDNode *interp; + if (ijb < 0) { + interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL, + MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32)); + return DAG.getTargetExtractSubreg( + TII->getRegisterInfo().getSubRegFromChannel(slot % 4), + DL, MVT::f32, SDValue(interp, 0)); + } + + if (slot % 4 < 2) + interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL, + MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), + CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32), + CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32)); + else + interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL, + MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), + CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32), + CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32)); + + return SDValue(interp, slot % 2); } case r600_read_ngroups_x: @@ -527,6 +443,20 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: return; case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + return; + case ISD::LOAD: { + SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); + Results.push_back(SDValue(Node, 0)); + Results.push_back(SDValue(Node, 1)); + // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode + // function + DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); + return; + } + case ISD::STORE: + SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode(); + Results.push_back(SDValue(Node, 0)); + return; } } @@ -594,6 +524,20 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, false, false, false, 0); } +SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { + + MachineFunction &MF = DAG.getMachineFunction(); + const AMDGPUFrameLowering *TFL = + static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); + + FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); + assert(FIN); + + unsigned FrameIndex = FIN->getIndex(); + unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); + return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32); +} + SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -680,9 +624,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const } // Try to lower to a SET* instruction: - // We need all the operands of SELECT_CC to have the same value type, so if - // necessary we need to change True and False to be the same type as LHS and - // RHS, and then convert the result of the select_cc back to the correct type. + // + // CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware, + // but for the other case where CompareVT != VT, all operands of + // SELECT_CC need to have the same value type, so we need to change True and + // False to be the same type as LHS and RHS, and then convert the result of + // the select_cc back to the correct type. // Move hardware True/False values to the correct operand. if (isHWTrueValue(False) && isHWFalseValue(True)) { @@ -692,32 +639,17 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const } if (isHWTrueValue(True) && isHWFalseValue(False)) { - if (CompareVT != VT) { - if (VT == MVT::f32 && CompareVT == MVT::i32) { - SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, - LHS, RHS, - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - CC); - // Convert integer values of true (-1) and false (0) to fp values of - // true (1.0f) and false (0.0f). - SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean, - DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB); - } else if (VT == MVT::i32 && CompareVT == MVT::f32) { - SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, - LHS, RHS, - DAG.getConstantFP(1.0f, MVT::f32), - DAG.getConstantFP(0.0f, MVT::f32), - CC); - // Convert fp values of true (1.0f) and false (0.0f) to integer values - // of true (-1) and false (0). - SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt); - return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg); - } else { - // I don't think there will be any other type pairings. - assert(!"Unhandled operand type parings in SELECT_CC"); - } + if (CompareVT != VT && VT == MVT::f32 && CompareVT == MVT::i32) { + SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, + LHS, RHS, + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + CC); + // Convert integer values of true (-1) and false (0) to fp values of + // true (1.0f) and false (0.0f). + SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB); } else { // This SELECT_CC is already legal. return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); @@ -808,6 +740,61 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return Cond; } +/// LLVM generates byte-addresed pointers. For indirect addressing, we need to +/// convert these pointers to a register index. Each register holds +/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the +/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used +/// for indirect addressing. +SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, + unsigned StackWidth, + SelectionDAG &DAG) const { + unsigned SRLPad; + switch(StackWidth) { + case 1: + SRLPad = 2; + break; + case 2: + SRLPad = 3; + break; + case 4: + SRLPad = 4; + break; + default: llvm_unreachable("Invalid stack width"); + } + + return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr, + DAG.getConstant(SRLPad, MVT::i32)); +} + +void R600TargetLowering::getStackAddress(unsigned StackWidth, + unsigned ElemIdx, + unsigned &Channel, + unsigned &PtrIncr) const { + switch (StackWidth) { + default: + case 1: + Channel = 0; + if (ElemIdx > 0) { + PtrIncr = 1; + } else { + PtrIncr = 0; + } + break; + case 2: + Channel = ElemIdx % 2; + if (ElemIdx == 2) { + PtrIncr = 1; + } else { + PtrIncr = 0; + } + break; + case 4: + Channel = ElemIdx; + PtrIncr = 0; + break; + } +} + SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); StoreSDNode *StoreNode = cast<StoreSDNode>(Op); @@ -829,9 +816,188 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } return Chain; } - return SDValue(); + + EVT ValueVT = Value.getValueType(); + + if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + return SDValue(); + } + + // Lowering for indirect addressing + + const MachineFunction &MF = DAG.getMachineFunction(); + const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>( + getTargetMachine().getFrameLowering()); + unsigned StackWidth = TFL->getStackWidth(MF); + + Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); + + if (ValueVT.isVector()) { + unsigned NumElemVT = ValueVT.getVectorNumElements(); + EVT ElemVT = ValueVT.getVectorElementType(); + SDValue Stores[4]; + + assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " + "vector width in load"); + + for (unsigned i = 0; i < NumElemVT; ++i) { + unsigned Channel, PtrIncr; + getStackAddress(StackWidth, i, Channel, PtrIncr); + Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, + DAG.getConstant(PtrIncr, MVT::i32)); + SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, + Value, DAG.getConstant(i, MVT::i32)); + + Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, + Chain, Elem, Ptr, + DAG.getTargetConstant(Channel, MVT::i32)); + } + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT); + } else { + if (ValueVT == MVT::i8) { + Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); + } + Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, + DAG.getTargetConstant(0, MVT::i32)); // Channel + } + + return Chain; +} + +// return (512 + (kc_bank << 12) +static int +ConstantAddressBlock(unsigned AddressSpace) { + switch (AddressSpace) { + case AMDGPUAS::CONSTANT_BUFFER_0: + return 512; + case AMDGPUAS::CONSTANT_BUFFER_1: + return 512 + 4096; + case AMDGPUAS::CONSTANT_BUFFER_2: + return 512 + 4096 * 2; + case AMDGPUAS::CONSTANT_BUFFER_3: + return 512 + 4096 * 3; + case AMDGPUAS::CONSTANT_BUFFER_4: + return 512 + 4096 * 4; + case AMDGPUAS::CONSTANT_BUFFER_5: + return 512 + 4096 * 5; + case AMDGPUAS::CONSTANT_BUFFER_6: + return 512 + 4096 * 6; + case AMDGPUAS::CONSTANT_BUFFER_7: + return 512 + 4096 * 7; + case AMDGPUAS::CONSTANT_BUFFER_8: + return 512 + 4096 * 8; + case AMDGPUAS::CONSTANT_BUFFER_9: + return 512 + 4096 * 9; + case AMDGPUAS::CONSTANT_BUFFER_10: + return 512 + 4096 * 10; + case AMDGPUAS::CONSTANT_BUFFER_11: + return 512 + 4096 * 11; + case AMDGPUAS::CONSTANT_BUFFER_12: + return 512 + 4096 * 12; + case AMDGPUAS::CONSTANT_BUFFER_13: + return 512 + 4096 * 13; + case AMDGPUAS::CONSTANT_BUFFER_14: + return 512 + 4096 * 14; + case AMDGPUAS::CONSTANT_BUFFER_15: + return 512 + 4096 * 15; + default: + return -1; + } } +SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const +{ + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + LoadSDNode *LoadNode = cast<LoadSDNode>(Op); + SDValue Chain = Op.getOperand(0); + SDValue Ptr = Op.getOperand(1); + SDValue LoweredLoad; + + int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); + if (ConstantBlock > -1) { + SDValue Result; + if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) || + dyn_cast<Constant>(LoadNode->getSrcValue())) { + SDValue Slots[4]; + for (unsigned i = 0; i < 4; i++) { + // We want Const position encoded with the following formula : + // (((512 + (kc_bank << 12) + const_index) << 2) + chan) + // const_index is Ptr computed by llvm using an alignment of 16. + // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and + // then div by 4 at the ISel step + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); + Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); + } + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4); + } else { + // non constant ptr cant be folded, keeps it as a v4f32 load + Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, + DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)) + ); + } + + if (!VT.isVector()) { + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, + DAG.getConstant(0, MVT::i32)); + } + + SDValue MergedValues[2] = { + Result, + Chain + }; + return DAG.getMergeValues(MergedValues, 2, DL); + } + + if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + return SDValue(); + } + + // Lowering for indirect addressing + const MachineFunction &MF = DAG.getMachineFunction(); + const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>( + getTargetMachine().getFrameLowering()); + unsigned StackWidth = TFL->getStackWidth(MF); + + Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); + + if (VT.isVector()) { + unsigned NumElemVT = VT.getVectorNumElements(); + EVT ElemVT = VT.getVectorElementType(); + SDValue Loads[4]; + + assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " + "vector width in load"); + + for (unsigned i = 0; i < NumElemVT; ++i) { + unsigned Channel, PtrIncr; + getStackAddress(StackWidth, i, Channel, PtrIncr); + Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, + DAG.getConstant(PtrIncr, MVT::i32)); + Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, + Chain, Ptr, + DAG.getTargetConstant(Channel, MVT::i32), + Op.getOperand(2)); + } + for (unsigned i = NumElemVT; i < 4; ++i) { + Loads[i] = DAG.getUNDEF(ElemVT); + } + EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4); + LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4); + } else { + LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, + Chain, Ptr, + DAG.getTargetConstant(0, MVT::i32), // Channel + Op.getOperand(2)); + } + + SDValue Ops[2]; + Ops[0] = LoweredLoad; + Ops[1] = Chain; + + return DAG.getMergeValues(Ops, 2, DL); +} SDValue R600TargetLowering::LowerFPOW(SDValue Op, SelectionDAG &DAG) const { @@ -873,7 +1039,7 @@ SDValue R600TargetLowering::LowerFormalArguments( AMDGPUAS::PARAM_I_ADDRESS); SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), DAG.getConstant(ParamOffsetBytes, MVT::i32), - MachinePointerInfo(new Argument(PtrTy)), + MachinePointerInfo(UndefValue::get(PtrTy)), ArgVT, false, false, ArgBytes); InVals.push_back(Arg); ParamOffsetBytes += ArgBytes; @@ -904,6 +1070,121 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } break; } + + // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -> + // (i32 select_cc f32, f32, -1, 0 cc) + // + // Mesa's GLSL frontend generates the above pattern a lot and we can lower + // this to one of the SET*_DX10 instructions. + case ISD::FP_TO_SINT: { + SDValue FNeg = N->getOperand(0); + if (FNeg.getOpcode() != ISD::FNEG) { + return SDValue(); + } + SDValue SelectCC = FNeg.getOperand(0); + if (SelectCC.getOpcode() != ISD::SELECT_CC || + SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS + SelectCC.getOperand(2).getValueType() != MVT::f32 || // True + !isHWTrueValue(SelectCC.getOperand(2)) || + !isHWFalseValue(SelectCC.getOperand(3))) { + return SDValue(); + } + + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0), + SelectCC.getOperand(0), // LHS + SelectCC.getOperand(1), // RHS + DAG.getConstant(-1, MVT::i32), // True + DAG.getConstant(0, MVT::i32), // Flase + SelectCC.getOperand(4)); // CC + + break; + } + // Extract_vec (Build_vector) generated by custom lowering + // also needs to be customly combined + case ISD::EXTRACT_VECTOR_ELT: { + SDValue Arg = N->getOperand(0); + if (Arg.getOpcode() == ISD::BUILD_VECTOR) { + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + unsigned Element = Const->getZExtValue(); + return Arg->getOperand(Element); + } + } + if (Arg.getOpcode() == ISD::BITCAST && + Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + unsigned Element = Const->getZExtValue(); + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(), + Arg->getOperand(0).getOperand(Element)); + } + } + } + + case ISD::SELECT_CC: { + // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> + // selectcc x, y, a, b, inv(cc) + SDValue LHS = N->getOperand(0); + if (LHS.getOpcode() != ISD::SELECT_CC) { + return SDValue(); + } + + SDValue RHS = N->getOperand(1); + SDValue True = N->getOperand(2); + SDValue False = N->getOperand(3); + + if (LHS.getOperand(2).getNode() != True.getNode() || + LHS.getOperand(3).getNode() != False.getNode() || + RHS.getNode() != False.getNode() || + cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) { + return SDValue(); + } + + ISD::CondCode CCOpcode = cast<CondCodeSDNode>(LHS->getOperand(4))->get(); + CCOpcode = ISD::getSetCCInverse( + CCOpcode, LHS.getOperand(0).getValueType().isInteger()); + return DAG.getSelectCC(N->getDebugLoc(), + LHS.getOperand(0), + LHS.getOperand(1), + LHS.getOperand(2), + LHS.getOperand(3), + CCOpcode); + } + case AMDGPUISD::EXPORT: { + SDValue Arg = N->getOperand(1); + if (Arg.getOpcode() != ISD::BUILD_VECTOR) + break; + SDValue NewBldVec[4] = { + DAG.getUNDEF(MVT::f32), + DAG.getUNDEF(MVT::f32), + DAG.getUNDEF(MVT::f32), + DAG.getUNDEF(MVT::f32) + }; + SDValue NewArgs[8] = { + N->getOperand(0), // Chain + SDValue(), + N->getOperand(2), // ArrayBase + N->getOperand(3), // Type + N->getOperand(4), // SWZ_X + N->getOperand(5), // SWZ_Y + N->getOperand(6), // SWZ_Z + N->getOperand(7) // SWZ_W + }; + for (unsigned i = 0; i < Arg.getNumOperands(); i++) { + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) { + if (C->isZero()) { + NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0 + } else if (C->isExactlyValue(1.0)) { + NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0 + } else { + NewBldVec[i] = Arg.getOperand(i); + } + } else { + NewBldVec[i] = Arg.getOperand(i); + } + } + DebugLoc DL = N->getDebugLoc(); + NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4); + return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8); + } } return SDValue(); } diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 2b954da..afa3897 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -63,7 +63,13 @@ private: SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; - + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; + + SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, + SelectionDAG &DAG) const; + void getStackAddress(unsigned StackWidth, unsigned ElemIdx, + unsigned &Channel, unsigned &PtrIncr) const; bool isZero(SDValue Op) const; }; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 06b78d0..7e3f005 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -16,8 +16,11 @@ #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "R600Defines.h" +#include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #define GET_INSTRINFO_CTOR #include "AMDGPUGenDFAPacketizer.inc" @@ -104,7 +107,6 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { switch (Opcode) { default: return false; case AMDGPU::RETURN: - case AMDGPU::RESERVE_REG: return true; } } @@ -466,6 +468,124 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return 2; } +int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int Offset = 0; + + if (MFI->getNumObjects() == 0) { + return -1; + } + + if (MRI.livein_empty()) { + return 0; + } + + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + LE = MRI.livein_end(); + LI != LE; ++LI) { + Offset = std::max(Offset, + GET_REG_INDEX(RI.getEncodingValue(LI->first))); + } + + return Offset + 1; +} + +int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { + int Offset = 0; + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Variable sized objects are not supported + assert(!MFI->hasVarSizedObjects()); + + if (MFI->getNumObjects() == 0) { + return -1; + } + + Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1); + + return getIndirectIndexBegin(MF) + Offset; +} + +std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs( + const MachineFunction &MF) const { + const AMDGPUFrameLowering *TFL = + static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); + std::vector<unsigned> Regs; + + unsigned StackWidth = TFL->getStackWidth(MF); + int End = getIndirectIndexEnd(MF); + + if (End == -1) { + return Regs; + } + + for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { + unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); + Regs.push_back(SuperReg); + for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { + unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); + Regs.push_back(Reg); + } + } + return Regs; +} + +unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const { + // XXX: Remove when we support a stack width > 2 + assert(Channel == 0); + return RegIndex; +} + +const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass( + unsigned SourceReg) const { + return &AMDGPU::R600_TReg32RegClass; +} + +const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const { + return &AMDGPU::TRegMemRegClass; +} + +MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const { + unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); + MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, + AMDGPU::AR_X, OffsetReg); + setImmOperand(MOVA, R600Operands::WRITE, 0); + + MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, + AddrReg, ValueReg) + .addReg(AMDGPU::AR_X, RegState::Implicit); + setImmOperand(Mov, R600Operands::DST_REL, 1); + return Mov; +} + +MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const { + unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); + MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, + AMDGPU::AR_X, + OffsetReg); + setImmOperand(MOVA, R600Operands::WRITE, 0); + MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, + ValueReg, + AddrReg) + .addReg(AMDGPU::AR_X, RegState::Implicit); + setImmOperand(Mov, R600Operands::SRC0_REL, 1); + + return Mov; +} + +const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const { + return &AMDGPU::IndirectRegRegClass; +} + + MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, @@ -486,13 +606,15 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB .addReg(Src0Reg) // $src0 .addImm(0) // $src0_neg .addImm(0) // $src0_rel - .addImm(0); // $src0_abs + .addImm(0) // $src0_abs + .addImm(-1); // $src0_sel if (Src1Reg) { MIB.addReg(Src1Reg) // $src1 .addImm(0) // $src1_neg .addImm(0) // $src1_rel - .addImm(0); // $src1_abs + .addImm(0) // $src1_abs + .addImm(-1); // $src1_sel } //XXX: The r600g finalizer expects this to be 1, once we've moved the @@ -521,16 +643,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI, int R600InstrInfo::getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const { - const static int OpTable[3][R600Operands::COUNT] = { -// W C S S S S S S S S -// R O D L S R R R S R R R S R R L P -// D U I M R A R C C C C C C C R C C A R I -// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M -// T M P E D L P 0 N R A 1 N R A 2 N R T D M - {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11}, - {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17}, - {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14} - }; unsigned TargetFlags = get(Opcode).TSFlags; unsigned OpTableIdx; @@ -556,7 +668,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode, OpTableIdx = 2; } - return OpTable[OpTableIdx][Op]; + return R600Operands::ALUOpTable[OpTableIdx][Op]; } void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op, diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index 11685af..efe721c 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -113,6 +113,38 @@ namespace llvm { virtual int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const { return 1;} + /// \returns a list of all the registers that may be accesed using indirect + /// addressing. + std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const; + + virtual int getIndirectIndexBegin(const MachineFunction &MF) const; + + virtual int getIndirectIndexEnd(const MachineFunction &MF) const; + + + virtual unsigned calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const; + + virtual const TargetRegisterClass *getIndirectAddrStoreRegClass( + unsigned SourceReg) const; + + virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const; + + virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const; + + virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const; + + virtual const TargetRegisterClass *getSuperIndirectRegClass() const; + + + ///buildDefaultInstruction - This function returns a MachineInstr with + /// all the instruction modifiers initialized to their default values. /// You can use this function to avoid manually specifying each instruction /// modifier operand when building a new instruction. /// diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 64bab18..8242df9 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -70,6 +70,11 @@ class InstFlag<string PM = "printOperand", int Default = 0> let PrintMethod = PM; } +// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers +def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> { + let PrintMethod = "printSel"; +} + def LITERAL : InstFlag<"printLiteral">; def WRITE : InstFlag <"printWrite", 1>; @@ -86,9 +91,16 @@ def UP : InstFlag <"printUpdatePred">; // default to 0. def LAST : InstFlag<"printLast", 1>; +def FRAMEri : Operand<iPTR> { + let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); +} + def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; +def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; +def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; +def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; class R600ALU_Word0 { field bits<32> Word0; @@ -173,6 +185,55 @@ class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{ let Word1{17-13} = alu_inst; } +class VTX_WORD0 { + field bits<32> Word0; + bits<7> SRC_GPR; + bits<5> VC_INST; + bits<2> FETCH_TYPE; + bits<1> FETCH_WHOLE_QUAD; + bits<8> BUFFER_ID; + bits<1> SRC_REL; + bits<2> SRC_SEL_X; + bits<6> MEGA_FETCH_COUNT; + + let Word0{4-0} = VC_INST; + let Word0{6-5} = FETCH_TYPE; + let Word0{7} = FETCH_WHOLE_QUAD; + let Word0{15-8} = BUFFER_ID; + let Word0{22-16} = SRC_GPR; + let Word0{23} = SRC_REL; + let Word0{25-24} = SRC_SEL_X; + let Word0{31-26} = MEGA_FETCH_COUNT; +} + +class VTX_WORD1_GPR { + field bits<32> Word1; + bits<7> DST_GPR; + bits<1> DST_REL; + bits<3> DST_SEL_X; + bits<3> DST_SEL_Y; + bits<3> DST_SEL_Z; + bits<3> DST_SEL_W; + bits<1> USE_CONST_FIELDS; + bits<6> DATA_FORMAT; + bits<2> NUM_FORMAT_ALL; + bits<1> FORMAT_COMP_ALL; + bits<1> SRF_MODE_ALL; + + let Word1{6-0} = DST_GPR; + let Word1{7} = DST_REL; + let Word1{8} = 0; // Reserved + let Word1{11-9} = DST_SEL_X; + let Word1{14-12} = DST_SEL_Y; + let Word1{17-15} = DST_SEL_Z; + let Word1{20-18} = DST_SEL_W; + let Word1{21} = USE_CONST_FIELDS; + let Word1{27-22} = DATA_FORMAT; + let Word1{29-28} = NUM_FORMAT_ALL; + let Word1{30} = FORMAT_COMP_ALL; + let Word1{31} = SRF_MODE_ALL; +} + /* XXX: R600 subtarget uses a slightly different encoding than the other subtargets. We currently handle this in R600MCCodeEmitter, but we may @@ -214,11 +275,11 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern, InstR600 <0, (outs R600_Reg32:$dst), (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $dst$write$dst_rel$omod, " - "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " + "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -254,13 +315,13 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern, (outs R600_Reg32:$dst), (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " - "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " - "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " + "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " + "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -291,14 +352,14 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern, InstR600 <0, (outs R600_Reg32:$dst), (ins REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, - R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, + R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $dst$dst_rel, " - "$src0_neg$src0$src0_rel, " - "$src1_neg$src1$src1_rel, " - "$src2_neg$src2$src2_rel, " + "$src0_neg$src0$src0_sel$src0_rel, " + "$src1_neg$src1$src1_sel$src1_rel, " + "$src2_neg$src2$src2_sel$src2_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -342,6 +403,27 @@ def TEX_SHADOW : PatLeaf< }] >; +def TEX_RECT : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 5; + }] +>; + +def TEX_ARRAY : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 9 || TType == 10 || TType == 15 || TType == 16; + }] +>; + +def TEX_SHADOW_ARRAY : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 11 || TType == 12 || TType == 17; + }] +>; + class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, dag ins, string asm, list<dag> pattern> : InstR600ISA <outs, ins, asm, pattern> { @@ -414,32 +496,35 @@ def isR600toCayman : Predicate< "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; //===----------------------------------------------------------------------===// -// Interpolation Instructions +// R600 SDNodes //===----------------------------------------------------------------------===// -def INTERP: SDNode<"AMDGPUISD::INTERP", - SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]> - >; +def INTERP_PAIR_XY : AMDGPUShaderInst < + (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), + (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), + "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", + []>; -def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", - SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> - >; +def INTERP_PAIR_ZW : AMDGPUShaderInst < + (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), + (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), + "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", + []>; -let usesCustomInserter = 1 in { -def input_perspective : AMDGPUShaderInst < - (outs R600_Reg128:$dst), - (ins i32imm:$src0, i32imm:$src1), - "input_perspective $src0 $src1 : dst", - [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>; -} // End usesCustomInserter = 1 - -def input_constant : AMDGPUShaderInst < - (outs R600_Reg128:$dst), - (ins i32imm:$src), - "input_perspective $src : dst", - [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>; +def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, + [SDNPMayLoad] +>; +//===----------------------------------------------------------------------===// +// Interpolation Instructions +//===----------------------------------------------------------------------===// +def INTERP_VEC_LOAD : AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins i32imm:$src0), + "INTERP_LOAD $src0 : $dst", + []>; def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; @@ -455,7 +540,7 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; // Export Instructions //===----------------------------------------------------------------------===// -def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>; +def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, [SDNPHasChain, SDNPSideEffect]>; @@ -507,53 +592,59 @@ class ExportBufWord1 { multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), (ExportInst - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 0, 61, 0, 7, 7, 7, cf_inst, 0) >; def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), (ExportInst - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 0, 61, 7, 0, 7, 7, cf_inst, 0) >; - def : Pat<(int_R600_store_pixel_dummy), + def : Pat<(int_R600_store_dummy (i32 imm:$type)), (ExportInst - (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0) + (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0) >; - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, - 0, 1, 2, 3, cf_inst, 0) + def : Pat<(int_R600_store_dummy 1), + (ExportInst + (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) + >; + + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), + (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$base, + imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) >; + } multiclass SteamOutputExportPattern<Instruction ExportInst, bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { // Stream0 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf0inst, 0)>; // Stream1 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 2), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf1inst, 0)>; // Stream2 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 3), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf2inst, 0)>; // Stream3 - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 4), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf3inst, 0)>; } -let isTerminator = 1, usesCustomInserter = 1 in { +let usesCustomInserter = 1 in { class ExportSwzInst : InstR600ISA<( outs), @@ -567,7 +658,7 @@ class ExportSwzInst : InstR600ISA<( let Inst{63-32} = Word1; } -} // End isTerminator = 1, usesCustomInserter = 1 +} // End usesCustomInserter = 1 class ExportBufInst : InstR600ISA<( outs), @@ -580,7 +671,7 @@ class ExportBufInst : InstR600ISA<( let Inst{63-32} = Word1; } -let Predicates = [isR600toCayman] in { +let Predicates = [isR600toCayman] in { //===----------------------------------------------------------------------===// // Common Instructions R600, R700, Evergreen, Cayman @@ -624,6 +715,34 @@ def SNE : R600_2OP < COND_NE))] >; +def SETE_DX10 : R600_2OP < + 0xC, "SETE_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_EQ))] +>; + +def SETGT_DX10 : R600_2OP < + 0xD, "SETGT_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_GT))] +>; + +def SETGE_DX10 : R600_2OP < + 0xE, "SETGE_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_GE))] +>; + +def SETNE_DX10 : R600_2OP < + 0xF, "SETNE_DX10", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), + COND_NE))] +>; + def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; @@ -684,7 +803,7 @@ def SETE_INT : R600_2OP < >; def SETGT_INT : R600_2OP < - 0x3B, "SGT_INT", + 0x3B, "SETGT_INT", [(set (i32 R600_Reg32:$dst), (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] >; @@ -830,8 +949,13 @@ class MUL_LIT_Common <bits<5> inst> : R600_3OP < class MULADD_Common <bits<5> inst> : R600_3OP < inst, "MULADD", + [] +>; + +class MULADD_IEEE_Common <bits<5> inst> : R600_3OP < + inst, "MULADD_IEEE", [(set (f32 R600_Reg32:$dst), - (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] + (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))] >; class CNDE_Common <bits<5> inst> : R600_3OP < @@ -988,6 +1112,7 @@ let Predicates = [isR600] in { def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; def MULADD_r600 : MULADD_Common<0x10>; + def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>; def CNDE_r600 : CNDE_Common<0x18>; def CNDGT_r600 : CNDGT_Common<0x19>; def CNDGE_r600 : CNDGE_Common<0x1A>; @@ -1070,7 +1195,7 @@ let Predicates = [isR700] in { //===----------------------------------------------------------------------===// let Predicates = [isEG] in { - + def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; defm DIV_eg : DIV_Common<RECIP_IEEE_eg>; @@ -1127,6 +1252,7 @@ let Predicates = [isEGorCayman] in { >; def MULADD_eg : MULADD_Common<0x14>; + def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; def ASHR_eg : ASHR_Common<0x15>; def LSHR_eg : LSHR_Common<0x16>; def LSHL_eg : LSHL_Common<0x17>; @@ -1138,6 +1264,10 @@ let Predicates = [isEGorCayman] in { defm DOT4_eg : DOT4_Common<0xBE>; defm CUBE_eg : CUBE_Common<0xC0>; +let hasSideEffects = 1 in { + def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>; +} + def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { @@ -1228,37 +1358,30 @@ def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < >; class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> - : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern> { - - // Operands - bits<7> DST_GPR; - bits<7> SRC_GPR; + : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>, + VTX_WORD1_GPR, VTX_WORD0 { // Static fields - bits<5> VC_INST = 0; - bits<2> FETCH_TYPE = 2; - bits<1> FETCH_WHOLE_QUAD = 0; - bits<8> BUFFER_ID = buffer_id; - bits<1> SRC_REL = 0; + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = buffer_id; + let SRC_REL = 0; // XXX: We can infer this field based on the SRC_GPR. This would allow us // to store vertex addresses in any channel, not just X. - bits<2> SRC_SEL_X = 0; - bits<6> MEGA_FETCH_COUNT; - bits<1> DST_REL = 0; - bits<3> DST_SEL_X; - bits<3> DST_SEL_Y; - bits<3> DST_SEL_Z; - bits<3> DST_SEL_W; + let SRC_SEL_X = 0; + let DST_REL = 0; // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, // however, based on my testing if USE_CONST_FIELDS is set, then all // these fields need to be set to 0. - bits<1> USE_CONST_FIELDS = 0; - bits<6> DATA_FORMAT; - bits<2> NUM_FORMAT_ALL = 1; - bits<1> FORMAT_COMP_ALL = 0; - bits<1> SRF_MODE_ALL = 0; + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 1; + let FORMAT_COMP_ALL = 0; + let SRF_MODE_ALL = 0; + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; // LLVM can only encode 64-bit instructions, so these fields are manually // encoded in R600CodeEmitter // @@ -1269,29 +1392,7 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> // bits<1> ALT_CONST = 0; // bits<2> BUFFER_INDEX_MODE = 0; - // VTX_WORD0 - let Inst{4-0} = VC_INST; - let Inst{6-5} = FETCH_TYPE; - let Inst{7} = FETCH_WHOLE_QUAD; - let Inst{15-8} = BUFFER_ID; - let Inst{22-16} = SRC_GPR; - let Inst{23} = SRC_REL; - let Inst{25-24} = SRC_SEL_X; - let Inst{31-26} = MEGA_FETCH_COUNT; - - // VTX_WORD1_GPR - let Inst{38-32} = DST_GPR; - let Inst{39} = DST_REL; - let Inst{40} = 0; // Reserved - let Inst{43-41} = DST_SEL_X; - let Inst{46-44} = DST_SEL_Y; - let Inst{49-47} = DST_SEL_Z; - let Inst{52-50} = DST_SEL_W; - let Inst{53} = USE_CONST_FIELDS; - let Inst{59-54} = DATA_FORMAT; - let Inst{61-60} = NUM_FORMAT_ALL; - let Inst{62} = FORMAT_COMP_ALL; - let Inst{63} = SRF_MODE_ALL; + // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding // is done in R600CodeEmitter @@ -1346,7 +1447,7 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> // This is not really necessary, but there were some GPU hangs that appeared // to be caused by ALU instructions in the next instruction group that wrote - // to the $ptr registers of the VTX_READ. + // to the $ptr registers of the VTX_READ. // e.g. // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 // %T2_X<def> = MOV %ZERO @@ -1387,6 +1488,10 @@ def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] >; +def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, + [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))] +>; + //===----------------------------------------------------------------------===// // VTX Read from global memory space //===----------------------------------------------------------------------===// @@ -1417,9 +1522,15 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, } +//===----------------------------------------------------------------------===// +// Regist loads and stores - for indirect addressing +//===----------------------------------------------------------------------===// + +defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; + let Predicates = [isCayman] in { -let isVector = 1 in { +let isVector = 1 in { def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; @@ -1476,6 +1587,7 @@ def PRED_X : InstR600 < (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), "", [], NullALU> { let FlagOperandIdx = 3; + let isTerminator = 1; } let isTerminator = 1, isBranch = 1, isBarrier = 1 in { @@ -1502,19 +1614,6 @@ def MASK_WRITE : AMDGPUShaderInst < } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 -def R600_LOAD_CONST : AMDGPUShaderInst < - (outs R600_Reg32:$dst), - (ins i32imm:$src0), - "R600_LOAD_CONST $dst, $src0", - [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] ->; - -def RESERVE_REG : AMDGPUShaderInst < - (outs), - (ins i32imm:$src), - "RESERVE_REG $src", - [(int_AMDGPU_reserve_reg imm:$src)] ->; def TXD: AMDGPUShaderInst < (outs R600_Reg128:$dst), @@ -1540,11 +1639,138 @@ def FNEG_R600 : FNEG<R600_Reg32>; //===---------------------------------------------------------------------===// // Return instruction //===---------------------------------------------------------------------===// -let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { +let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, + usesCustomInserter = 1 in { def RETURN : ILFormat<(outs), (ins variable_ops), "RETURN", [(IL_retflag)]>; } + +//===----------------------------------------------------------------------===// +// Constant Buffer Addressing Support +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { +def CONST_COPY : Instruction { + let OutOperandList = (outs R600_Reg32:$dst); + let InOperandList = (ins i32imm:$src); + let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; + let AsmString = "CONST_COPY"; + let neverHasSideEffects = 1; + let isAsCheapAsAMove = 1; + let Itinerary = NullALU; +} +} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" + +def TEX_VTX_CONSTBUF : + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", + [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>, + VTX_WORD1_GPR, VTX_WORD0 { + + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = 0; + let SRC_REL = 0; + let SRC_SEL_X = 0; + let DST_REL = 0; + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 2; + let FORMAT_COMP_ALL = 1; + let SRF_MODE_ALL = 1; + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 35; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + +// LLVM can only encode 64-bit instructions, so these fields are manually +// encoded in R600CodeEmitter +// +// bits<16> OFFSET; +// bits<2> ENDIAN_SWAP = 0; +// bits<1> CONST_BUF_NO_STRIDE = 0; +// bits<1> MEGA_FETCH = 0; +// bits<1> ALT_CONST = 0; +// bits<2> BUFFER_INDEX_MODE = 0; + + + +// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding +// is done in R600CodeEmitter +// +// Inst{79-64} = OFFSET; +// Inst{81-80} = ENDIAN_SWAP; +// Inst{82} = CONST_BUF_NO_STRIDE; +// Inst{83} = MEGA_FETCH; +// Inst{84} = ALT_CONST; +// Inst{86-85} = BUFFER_INDEX_MODE; +// Inst{95-86} = 0; Reserved + +// VTX_WORD3 (Padding) +// +// Inst{127-96} = 0; +} + +def TEX_VTX_TEXBUF: + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", + [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, +VTX_WORD1_GPR, VTX_WORD0 { + +let VC_INST = 0; +let FETCH_TYPE = 2; +let FETCH_WHOLE_QUAD = 0; +let SRC_REL = 0; +let SRC_SEL_X = 0; +let DST_REL = 0; +let USE_CONST_FIELDS = 1; +let NUM_FORMAT_ALL = 0; +let FORMAT_COMP_ALL = 0; +let SRF_MODE_ALL = 1; +let MEGA_FETCH_COUNT = 16; +let DST_SEL_X = 0; +let DST_SEL_Y = 1; +let DST_SEL_Z = 2; +let DST_SEL_W = 3; +let DATA_FORMAT = 0; + +let Inst{31-0} = Word0; +let Inst{63-32} = Word1; + +// LLVM can only encode 64-bit instructions, so these fields are manually +// encoded in R600CodeEmitter +// +// bits<16> OFFSET; +// bits<2> ENDIAN_SWAP = 0; +// bits<1> CONST_BUF_NO_STRIDE = 0; +// bits<1> MEGA_FETCH = 0; +// bits<1> ALT_CONST = 0; +// bits<2> BUFFER_INDEX_MODE = 0; + + + +// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding +// is done in R600CodeEmitter +// +// Inst{79-64} = OFFSET; +// Inst{81-80} = ENDIAN_SWAP; +// Inst{82} = CONST_BUF_NO_STRIDE; +// Inst{83} = MEGA_FETCH; +// Inst{84} = ALT_CONST; +// Inst{86-85} = BUFFER_INDEX_MODE; +// Inst{95-86} = 0; Reserved + +// VTX_WORD3 (Padding) +// +// Inst{127-96} = 0; +} + + + //===--------------------------------------------------------------------===// // Instructions support //===--------------------------------------------------------------------===// @@ -1641,7 +1867,19 @@ def : Pat < // SGE Reverse args def : Pat < (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), - (SGE R600_Reg32:$src1, R600_Reg32:$src0) + (SGE R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGT_DX10 reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT), + (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGE_DX10 reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE), + (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0) >; // SETGT_INT reverse args @@ -1682,31 +1920,43 @@ def : Pat < (SETE R600_Reg32:$src0, R600_Reg32:$src1) >; +//SETE_DX10 - 'true if ordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO), + (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) +>; + //SNE - 'true if unordered' def : Pat < (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), (SNE R600_Reg32:$src0, R600_Reg32:$src1) >; -def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>; -def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>; -def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>; -def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>; +//SETNE_DX10 - 'true if ordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO), + (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) +>; + +def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>; +def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>; +def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>; +def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>; -def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; -def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; -def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; -def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; +def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>; +def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>; +def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>; +def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>; def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td index 3825bc4..dc8980a 100644 --- a/lib/Target/R600/R600Intrinsics.td +++ b/lib/Target/R600/R600Intrinsics.td @@ -12,21 +12,20 @@ //===----------------------------------------------------------------------===// let TargetPrefix = "R600", isTarget = 1 in { - def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_R600_load_input_perspective : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_constant : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; - def int_R600_load_input_linear : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_R600_interp_input : + Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_R600_load_texbuf : + Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_R600_store_swizzle : + Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_R600_store_stream_output : - Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_R600_store_pixel_color : - Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; + Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_R600_store_pixel_depth : Intrinsic<[], [llvm_float_ty], []>; def int_R600_store_pixel_stencil : Intrinsic<[], [llvm_float_ty], []>; - def int_R600_store_pixel_dummy : - Intrinsic<[], [], []>; + def int_R600_store_dummy : + Intrinsic<[], [llvm_i32_ty], []>; } diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp new file mode 100644 index 0000000..3ebe653 --- /dev/null +++ b/lib/Target/R600/R600LowerConstCopy.cpp @@ -0,0 +1,222 @@ +//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr. +/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot +/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits +/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try +/// to fold them if possible or replace them by MOV otherwise. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600InstrInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/GlobalValue.h" + +namespace llvm { + +class R600LowerConstCopy : public MachineFunctionPass { +private: + static char ID; + const R600InstrInfo *TII; + + struct ConstPairs { + unsigned XYPair; + unsigned ZWPair; + }; + + bool canFoldInBundle(ConstPairs &UsedConst, unsigned ReadConst) const; +public: + R600LowerConstCopy(TargetMachine &tm); + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; } +}; + +char R600LowerConstCopy::ID = 0; + +R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) : + MachineFunctionPass(ID), + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) +{ +} + +bool R600LowerConstCopy::canFoldInBundle(ConstPairs &UsedConst, + unsigned ReadConst) const { + unsigned ReadConstChan = ReadConst & 3; + unsigned ReadConstIndex = ReadConst & (~3); + if (ReadConstChan < 2) { + if (!UsedConst.XYPair) { + UsedConst.XYPair = ReadConstIndex; + } + return UsedConst.XYPair == ReadConstIndex; + } else { + if (!UsedConst.ZWPair) { + UsedConst.ZWPair = ReadConstIndex; + } + return UsedConst.ZWPair == ReadConstIndex; + } +} + +static bool isControlFlow(const MachineInstr &MI) { + return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) || + (MI.getOpcode() == AMDGPU::ENDIF) || + (MI.getOpcode() == AMDGPU::ELSE) || + (MI.getOpcode() == AMDGPU::WHILELOOP) || + (MI.getOpcode() == AMDGPU::BREAK); +} + +bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) { + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + DenseMap<unsigned, MachineInstr *> RegToConstIndex; + for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(), + E = MBB.instr_end(); I != E;) { + + if (I->getOpcode() == AMDGPU::CONST_COPY) { + MachineInstr &MI = *I; + I = llvm::next(I); + unsigned DstReg = MI.getOperand(0).getReg(); + DenseMap<unsigned, MachineInstr *>::iterator SrcMI = + RegToConstIndex.find(DstReg); + if (SrcMI != RegToConstIndex.end()) { + SrcMI->second->eraseFromParent(); + RegToConstIndex.erase(SrcMI); + } + MachineInstr *NewMI = + TII->buildDefaultInstruction(MBB, &MI, AMDGPU::MOV, + MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); + TII->setImmOperand(NewMI, R600Operands::SRC0_SEL, + MI.getOperand(1).getImm()); + RegToConstIndex[DstReg] = NewMI; + MI.eraseFromParent(); + continue; + } + + std::vector<unsigned> Defs; + // We consider all Instructions as bundled because algorithm that handle + // const read port limitations inside an IG is still valid with single + // instructions. + std::vector<MachineInstr *> Bundle; + + if (I->isBundle()) { + unsigned BundleSize = I->getBundleSize(); + for (unsigned i = 0; i < BundleSize; i++) { + I = llvm::next(I); + Bundle.push_back(I); + } + } else if (TII->isALUInstr(I->getOpcode())){ + Bundle.push_back(I); + } else if (isControlFlow(*I)) { + RegToConstIndex.clear(); + I = llvm::next(I); + continue; + } else { + MachineInstr &MI = *I; + for (MachineInstr::mop_iterator MOp = MI.operands_begin(), + MOpE = MI.operands_end(); MOp != MOpE; ++MOp) { + MachineOperand &MO = *MOp; + if (!MO.isReg()) + continue; + if (MO.isDef()) { + Defs.push_back(MO.getReg()); + } else { + // Either a TEX or an Export inst, prevent from erasing def of used + // operand + RegToConstIndex.erase(MO.getReg()); + for (MCSubRegIterator SR(MO.getReg(), &TII->getRegisterInfo()); + SR.isValid(); ++SR) { + RegToConstIndex.erase(*SR); + } + } + } + } + + + R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + for(std::vector<MachineInstr *>::iterator It = Bundle.begin(), + ItE = Bundle.end(); It != ItE; ++It) { + MachineInstr *MI = *It; + if (TII->isPredicated(MI)) { + // We don't want to erase previous assignment + RegToConstIndex.erase(MI->getOperand(0).getReg()); + } else { + int WriteIDX = TII->getOperandIdx(MI->getOpcode(), R600Operands::WRITE); + if (WriteIDX < 0 || MI->getOperand(WriteIDX).getImm()) + Defs.push_back(MI->getOperand(0).getReg()); + } + } + + ConstPairs CP = {0,0}; + for (unsigned SrcOp = 0; SrcOp < 3; SrcOp++) { + for(std::vector<MachineInstr *>::iterator It = Bundle.begin(), + ItE = Bundle.end(); It != ItE; ++It) { + MachineInstr *MI = *It; + int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[SrcOp][0]); + if (SrcIdx < 0) + continue; + MachineOperand &MO = MI->getOperand(SrcIdx); + DenseMap<unsigned, MachineInstr *>::iterator SrcMI = + RegToConstIndex.find(MO.getReg()); + if (SrcMI != RegToConstIndex.end()) { + MachineInstr *CstMov = SrcMI->second; + int ConstMovSel = + TII->getOperandIdx(CstMov->getOpcode(), R600Operands::SRC0_SEL); + unsigned ConstIndex = CstMov->getOperand(ConstMovSel).getImm(); + if (MI->isInsideBundle() && canFoldInBundle(CP, ConstIndex)) { + TII->setImmOperand(MI, OpTable[SrcOp][1], ConstIndex); + MI->getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST); + } else { + RegToConstIndex.erase(SrcMI); + } + } + } + } + + for (std::vector<unsigned>::iterator It = Defs.begin(), ItE = Defs.end(); + It != ItE; ++It) { + DenseMap<unsigned, MachineInstr *>::iterator SrcMI = + RegToConstIndex.find(*It); + if (SrcMI != RegToConstIndex.end()) { + SrcMI->second->eraseFromParent(); + RegToConstIndex.erase(SrcMI); + } + } + I = llvm::next(I); + } + + if (MBB.succ_empty()) { + for (DenseMap<unsigned, MachineInstr *>::iterator + DI = RegToConstIndex.begin(), DE = RegToConstIndex.end(); + DI != DE; ++DI) { + DI->second->eraseFromParent(); + } + } + } + return false; +} + +FunctionPass *createR600LowerConstCopy(TargetMachine &tm) { + return new R600LowerConstCopy(tm); +} + +} + + diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp index 4eb5efa..40aec83 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.cpp +++ b/lib/Target/R600/R600MachineFunctionInfo.cpp @@ -13,22 +13,6 @@ using namespace llvm; R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) - : MachineFunctionInfo(), - HasLinearInterpolation(false), - HasPerspectiveInterpolation(false) { + : MachineFunctionInfo() { memset(Outputs, 0, sizeof(Outputs)); - memset(StreamOutputs, 0, sizeof(StreamOutputs)); } - -unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const { - assert(HasPerspectiveInterpolation); - return 0; -} - -unsigned R600MachineFunctionInfo::GetIJLinearIndex() const { - assert(HasLinearInterpolation); - if (HasPerspectiveInterpolation) - return 1; - else - return 0; -} diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index e97fb5b..4b901f4 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -13,6 +13,7 @@ #ifndef R600MACHINEFUNCTIONINFO_H #define R600MACHINEFUNCTIONINFO_H +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include <vector> @@ -23,15 +24,9 @@ class R600MachineFunctionInfo : public MachineFunctionInfo { public: R600MachineFunctionInfo(const MachineFunction &MF); - std::vector<unsigned> ReservedRegs; + SmallVector<unsigned, 4> LiveOuts; + std::vector<unsigned> IndirectRegs; SDNode *Outputs[16]; - SDNode *StreamOutputs[64][4]; - bool HasLinearInterpolation; - bool HasPerspectiveInterpolation; - - unsigned GetIJLinearIndex() const; - unsigned GetIJPerspectiveIndex() const; - }; } // End llvm namespace diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index a39f83d..bbd7995 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -15,6 +15,7 @@ #include "R600RegisterInfo.h" #include "AMDGPUTargetMachine.h" #include "R600Defines.h" +#include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" using namespace llvm; @@ -28,7 +29,6 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm, BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>(); Reserved.set(AMDGPU::ZERO); Reserved.set(AMDGPU::HALF); @@ -38,21 +38,30 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::NEG_ONE); Reserved.set(AMDGPU::PV_X); Reserved.set(AMDGPU::ALU_LITERAL_X); + Reserved.set(AMDGPU::ALU_CONST); Reserved.set(AMDGPU::PREDICATE_BIT); Reserved.set(AMDGPU::PRED_SEL_OFF); Reserved.set(AMDGPU::PRED_SEL_ZERO); Reserved.set(AMDGPU::PRED_SEL_ONE); - for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(), - E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) { + for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(), + E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) { Reserved.set(*I); } - for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(), - E = MFI->ReservedRegs.end(); I != E; ++I) { + for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(), + E = AMDGPU::TRegMemRegClass.end(); + I != E; ++I) { Reserved.set(*I); } + const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(&TII); + std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF); + for (std::vector<unsigned>::iterator I = IndirectRegs.begin(), + E = IndirectRegs.end(); + I != E; ++I) { + Reserved.set(*I); + } return Reserved; } @@ -81,9 +90,10 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass( unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const { switch (Channel) { default: assert(!"Invalid channel index"); return 0; - case 0: return AMDGPU::sel_x; - case 1: return AMDGPU::sel_y; - case 2: return AMDGPU::sel_z; - case 3: return AMDGPU::sel_w; + case 0: return AMDGPU::sub0; + case 1: return AMDGPU::sub1; + case 2: return AMDGPU::sub2; + case 3: return AMDGPU::sub3; } } + diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index d3d6d25..ce5994c 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -19,7 +19,7 @@ class R600RegWithChan <string name, bits<9> sel, string chan> : class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> { let Namespace = "AMDGPU"; - let SubRegIndices = [sel_x, sel_y, sel_z, sel_w]; + let SubRegIndices = [sub0, sub1, sub2, sub3]; let HWEncoding = encoding; } @@ -28,9 +28,11 @@ foreach Index = 0-127 in { // 32-bit Temporary Registers def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>; - // 32-bit Constant Registers (There are more than 128, this the number - // that is currently supported. - def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>; + // Indirect addressing offset registers + def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan, + Index, Chan>; + def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, + Chan>; } // 128-bit Temporary Registers def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW", @@ -42,7 +44,7 @@ foreach Index = 0-127 in { } // Array Base Register holding input in FS -foreach Index = 448-464 in { +foreach Index = 448-480 in { def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; } @@ -61,19 +63,25 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>; def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; +def AR_X : R600Reg<"AR.x", 0>; def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, - (add (sequence "ArrayBase%u", 448, 464))>; + (add (sequence "ArrayBase%u", 448, 480))>; +// special registers for ALU src operands +// const buffer reference, SRCx_SEL contains index +def ALU_CONST : R600Reg<"CBuf", 0>; +// interpolation param reference, SRCx_SEL contains index +def ALU_PARAM : R600Reg<"Param", 0>; -def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, - (add (interleave - (interleave (sequence "C%u_X", 0, 127), - (sequence "C%u_Z", 0, 127)), - (interleave (sequence "C%u_Y", 0, 127), - (sequence "C%u_W", 0, 127))))>; +let isAllocatable = 0 in { + +// XXX: Only use the X channel, until we support wider stack widths +def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>; + +} // End isAllocatable = 0 def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, - (add (sequence "T%u_X", 0, 127))>; + (add (sequence "T%u_X", 0, 127), AR_X)>; def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "T%u_Y", 0, 127))>; @@ -85,15 +93,16 @@ def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "T%u_W", 0, 127))>; def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, - (add (interleave - (interleave R600_TReg32_X, R600_TReg32_Z), - (interleave R600_TReg32_Y, R600_TReg32_W)))>; + (interleave R600_TReg32_X, R600_TReg32_Y, + R600_TReg32_Z, R600_TReg32_W)>; def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add R600_TReg32, - R600_CReg32, R600_ArrayBase, - ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; + R600_Addr, + ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF, + ALU_CONST, ALU_PARAM + )>; def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>; @@ -105,3 +114,33 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add (sequence "T%u_XYZW", 0, 127))> { let CopyCost = -1; } + +//===----------------------------------------------------------------------===// +// Register classes for indirect addressing +//===----------------------------------------------------------------------===// + +// Super register for all the Indirect Registers. This register class is used +// by the REG_SEQUENCE instruction to specify the registers to use for direct +// reads / writes which may be written / read by an indirect address. +class IndirectSuper<string n, list<Register> subregs> : + RegisterWithSubRegs<n, subregs> { + let Namespace = "AMDGPU"; + let SubRegIndices = + [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15]; +} + +def IndirectSuperReg : IndirectSuper<"Indirect", + [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X, + TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X, + TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X] +>; + +def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>; + +// This register class defines the registers that are the storage units for +// the "Indirect Addressing" pseudo memory space. +// XXX: Only use the X channel, until we support wider stack widths +def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32, + (add (sequence "TRegMem%u_X", 0, 16)) +>; diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index f580377..2477e2a 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -147,7 +147,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) { /// \brief Is BB the last block saved on the stack ? bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) { - return Stack.back().first == BB; + return !Stack.empty() && Stack.back().first == BB; } /// \brief Pop the last saved value from the control flow stack diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 4c672ca..0a0fbd9 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -26,21 +26,22 @@ using namespace llvm; SITargetLowering::SITargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM), - TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) { + TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())), + TRI(TM.getRegisterInfo()) { addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); - addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); - addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); + addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); - addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); + addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); + addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass); computeRegisterProperties(); - setOperationAction(ISD::AND, MVT::i1, Custom); - setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); @@ -62,63 +63,13 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { - const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); MachineBasicBlock::iterator I = MI; - if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) { - AppendS_WAITCNT(MI, *BB, llvm::next(I)); - return BB; - } - switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; - case AMDGPU::CLAMP_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) - .addImm(0) // ABS - .addImm(1) // CLAMP - .addImm(0) // OMOD - .addImm(0); // NEG - MI->eraseFromParent(); - break; - - case AMDGPU::FABS_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) - .addImm(1) // ABS - .addImm(0) // CLAMP - .addImm(0) // OMOD - .addImm(0); // NEG - MI->eraseFromParent(); - break; - - case AMDGPU::FNEG_SI: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - // VSRC1-2 are unused, but we still need to fill all the - // operand slots, so we just reuse the VSRC0 operand - .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(1)) - .addImm(0) // ABS - .addImm(0) // CLAMP - .addImm(0) // OMOD - .addImm(1); // NEG - MI->eraseFromParent(); - break; case AMDGPU::SHADER_TYPE: BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType = MI->getOperand(0).getImm(); @@ -128,29 +79,13 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::SI_INTERP: LowerSI_INTERP(MI, *BB, I, MRI); break; - case AMDGPU::SI_INTERP_CONST: - LowerSI_INTERP_CONST(MI, *BB, I, MRI); - break; - case AMDGPU::SI_KIL: - LowerSI_KIL(MI, *BB, I, MRI); - break; case AMDGPU::SI_WQM: LowerSI_WQM(MI, *BB, I, MRI); break; - case AMDGPU::SI_V_CNDLT: - LowerSI_V_CNDLT(MI, *BB, I, MRI); - break; } return BB; } -void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I) const { - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT)) - .addImm(0); -} - - void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) @@ -190,57 +125,6 @@ void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, MI->eraseFromParent(); } -void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, - MachineBasicBlock &BB, MachineBasicBlock::iterator I, - MachineRegisterInfo &MRI) const { - MachineOperand dst = MI->getOperand(0); - MachineOperand attr_chan = MI->getOperand(1); - MachineOperand attr = MI->getOperand(2); - MachineOperand params = MI->getOperand(3); - unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) - .addOperand(params); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) - .addOperand(dst) - .addOperand(attr_chan) - .addOperand(attr) - .addReg(M0); - - MI->eraseFromParent(); -} - -void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { - // Clear this pixel from the exec mask if the operand is negative - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32), - AMDGPU::VCC) - .addReg(AMDGPU::SREG_LIT_0) - .addOperand(MI->getOperand(0)); - - MI->eraseFromParent(); -} - -void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { - unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - - BuildMI(BB, I, BB.findDebugLoc(I), - TII->get(AMDGPU::V_CMP_GT_F32_e32), - VCC) - .addReg(AMDGPU::SREG_LIT_0) - .addOperand(MI->getOperand(1)); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(3)) - .addOperand(MI->getOperand(2)) - .addReg(VCC); - - MI->eraseFromParent(); -} - EVT SITargetLowering::getSetCCResultType(EVT VT) const { return MVT::i1; } @@ -255,7 +139,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); case ISD::INTRINSIC_WO_CHAIN: { unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -272,30 +155,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -/// \brief The function is for lowering i1 operations on the -/// VCC register. -/// -/// In the VALU context, VCC is a one bit register, but in the -/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only -/// the SALU can perform operations on the VCC register, we need to promote -/// the operand types from i1 to i64 in order for tablegen to be able to match -/// this operation to the correct SALU instruction. We do this promotion by -/// wrapping the operands in a CopyToReg node. -/// -SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, - SelectionDAG &DAG, - unsigned VCCNode) const { - DebugLoc DL = Op.getDebugLoc(); - - SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, - DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, - Op.getOperand(0)), - DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, - Op.getOperand(1))); - - return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); -} - /// \brief Helper function for LowerBRCOND static SDNode *findUser(SDValue Value, unsigned Opcode) { @@ -500,12 +359,252 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } -#define NODE_NAME_CASE(node) case SIISD::node: return #node; +/// \brief Test if RegClass is one of the VSrc classes +static bool isVSrc(unsigned RegClass) { + return AMDGPU::VSrc_32RegClassID == RegClass || + AMDGPU::VSrc_64RegClassID == RegClass; +} + +/// \brief Test if RegClass is one of the SSrc classes +static bool isSSrc(unsigned RegClass) { + return AMDGPU::SSrc_32RegClassID == RegClass || + AMDGPU::SSrc_64RegClassID == RegClass; +} + +/// \brief Analyze the possible immediate value Op +/// +/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate +/// and the immediate value if it's a literal immediate +int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { + + union { + int32_t I; + float F; + } Imm; + + if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) + Imm.I = Node->getSExtValue(); + else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) + Imm.F = Node->getValueAPF().convertToFloat(); + else + return -1; // It isn't an immediate + + if ((Imm.I >= -16 && Imm.I <= 64) || + Imm.F == 0.5f || Imm.F == -0.5f || + Imm.F == 1.0f || Imm.F == -1.0f || + Imm.F == 2.0f || Imm.F == -2.0f || + Imm.F == 4.0f || Imm.F == -4.0f) + return 0; // It's an inline immediate + + return Imm.I; // It's a literal immediate +} + +/// \brief Try to fold an immediate directly into an instruction +bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate, + bool &ScalarSlotUsed) const { + + MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand); + if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode())) + return false; + + const SDValue &Op = Mov->getOperand(0); + int32_t Value = analyzeImmediate(Op.getNode()); + if (Value == -1) { + // Not an immediate at all + return false; + + } else if (Value == 0) { + // Inline immediates can always be fold + Operand = Op; + return true; + + } else if (Value == Immediate) { + // Already fold literal immediate + Operand = Op; + return true; + + } else if (!ScalarSlotUsed && !Immediate) { + // Fold this literal immediate + ScalarSlotUsed = true; + Immediate = Value; + Operand = Op; + return true; -const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); - NODE_NAME_CASE(VCC_AND) - NODE_NAME_CASE(VCC_BITCAST) } + + return false; +} + +/// \brief Does "Op" fit into register class "RegClass" ? +bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op, + unsigned RegClass) const { + + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + SDNode *Node = Op.getNode(); + + int OpClass; + if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) { + const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode()); + OpClass = Desc.OpInfo[Op.getResNo()].RegClass; + + } else if (Node->getOpcode() == ISD::CopyFromReg) { + RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode()); + OpClass = MRI.getRegClass(Reg->getReg())->getID(); + + } else + return false; + + if (OpClass == -1) + return false; + + return TRI->getRegClass(RegClass)->hasSubClassEq(TRI->getRegClass(OpClass)); +} + +/// \brief Make sure that we don't exeed the number of allowed scalars +void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, + unsigned RegClass, + bool &ScalarSlotUsed) const { + + // First map the operands register class to a destination class + if (RegClass == AMDGPU::VSrc_32RegClassID) + RegClass = AMDGPU::VReg_32RegClassID; + else if (RegClass == AMDGPU::VSrc_64RegClassID) + RegClass = AMDGPU::VReg_64RegClassID; + else + return; + + // Nothing todo if they fit naturaly + if (fitsRegClass(DAG, Operand, RegClass)) + return; + + // If the scalar slot isn't used yet use it now + if (!ScalarSlotUsed) { + ScalarSlotUsed = true; + return; + } + + // This is a conservative aproach, it is possible that we can't determine + // the correct register class and copy too often, but better save than sorry. + SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32); + SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DebugLoc(), + Operand.getValueType(), Operand, RC); + Operand = SDValue(Node, 0); +} + +SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, + SelectionDAG &DAG) const { + + // Original encoding (either e32 or e64) + int Opcode = Node->getMachineOpcode(); + const MCInstrDesc *Desc = &TII->get(Opcode); + + unsigned NumDefs = Desc->getNumDefs(); + unsigned NumOps = Desc->getNumOperands(); + + // e64 version if available, -1 otherwise + int OpcodeE64 = AMDGPU::getVOPe64(Opcode); + const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64); + + assert(!DescE64 || DescE64->getNumDefs() == NumDefs); + assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4)); + + int32_t Immediate = Desc->getSize() == 4 ? 0 : -1; + bool HaveVSrc = false, HaveSSrc = false; + + // First figure out what we alread have in this instruction + for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; + i != e && Op < NumOps; ++i, ++Op) { + + unsigned RegClass = Desc->OpInfo[Op].RegClass; + if (isVSrc(RegClass)) + HaveVSrc = true; + else if (isSSrc(RegClass)) + HaveSSrc = true; + else + continue; + + int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode()); + if (Imm != -1 && Imm != 0) { + // Literal immediate + Immediate = Imm; + } + } + + // If we neither have VSrc nor SSrc it makes no sense to continue + if (!HaveVSrc && !HaveSSrc) + return Node; + + // No scalar allowed when we have both VSrc and SSrc + bool ScalarSlotUsed = HaveVSrc && HaveSSrc; + + // Second go over the operands and try to fold them + std::vector<SDValue> Ops; + bool Promote2e64 = false; + for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; + i != e && Op < NumOps; ++i, ++Op) { + + const SDValue &Operand = Node->getOperand(i); + Ops.push_back(Operand); + + // Already folded immediate ? + if (isa<ConstantSDNode>(Operand.getNode()) || + isa<ConstantFPSDNode>(Operand.getNode())) + continue; + + // Is this a VSrc or SSrc operand ? + unsigned RegClass = Desc->OpInfo[Op].RegClass; + if (!isVSrc(RegClass) && !isSSrc(RegClass)) { + + if (i == 1 && Desc->isCommutable() && + fitsRegClass(DAG, Ops[0], RegClass) && + foldImm(Ops[1], Immediate, ScalarSlotUsed)) { + + assert(isVSrc(Desc->OpInfo[NumDefs].RegClass) || + isSSrc(Desc->OpInfo[NumDefs].RegClass)); + + // Swap commutable operands + SDValue Tmp = Ops[1]; + Ops[1] = Ops[0]; + Ops[0] = Tmp; + + } else if (DescE64 && !Immediate) { + // Test if it makes sense to switch to e64 encoding + + RegClass = DescE64->OpInfo[Op].RegClass; + int32_t TmpImm = -1; + if ((isVSrc(RegClass) || isSSrc(RegClass)) && + foldImm(Ops[i], TmpImm, ScalarSlotUsed)) { + + Immediate = -1; + Promote2e64 = true; + Desc = DescE64; + DescE64 = 0; + } + } + continue; + } + + // Try to fold the immediates + if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { + // Folding didn't worked, make sure we don't hit the SReg limit + ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed); + } + } + + if (Promote2e64) { + // Add the modifier flags while promoting + for (unsigned i = 0; i < 4; ++i) + Ops.push_back(DAG.getTargetConstant(0, MVT::i32)); + } + + // Add optional chain and glue + for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) + Ops.push_back(Node->getOperand(i)); + + // Either create a complete new or update the current instruction + if (Promote2e64) + return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(), + Node->getVTList(), Ops.data(), Ops.size()); + else + return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); } diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index c088112..737162f 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -22,31 +22,25 @@ namespace llvm { class SITargetLowering : public AMDGPUTargetLowering { const SIInstrInfo * TII; + const TargetRegisterInfo * TRI; - /// Memory reads and writes are syncronized using the S_WAITCNT instruction. - /// This function takes the most conservative approach and inserts an - /// S_WAITCNT instruction after every read and write. - void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I) const; void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned Opocde) const; void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; - void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG, - unsigned VCCNode) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + bool foldImm(SDValue &Operand, int32_t &Immediate, + bool &ScalarSlotUsed) const; + bool fitsRegClass(SelectionDAG &DAG, SDValue &Op, unsigned RegClass) const; + void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, + unsigned RegClass, bool &ScalarSlotUsed) const; + public: SITargetLowering(TargetMachine &tm); virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, @@ -54,7 +48,9 @@ public: virtual EVT getSetCCResultType(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - virtual const char* getTargetNodeName(unsigned Opcode) const; + virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; + + int32_t analyzeImmediate(const SDNode *N) const; }; } // End namespace llvm diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp new file mode 100644 index 0000000..24fc929 --- /dev/null +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -0,0 +1,353 @@ +//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Insert wait instructions for memory reads and writes. +/// +/// Memory reads and writes are issued asynchronously, so we need to insert +/// S_WAITCNT instructions when we want to access any of their results or +/// overwrite any register that's used asynchronously. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + +/// \brief One variable for each of the hardware counters +typedef union { + struct { + unsigned VM; + unsigned EXP; + unsigned LGKM; + } Named; + unsigned Array[3]; + +} Counters; + +typedef Counters RegCounters[512]; +typedef std::pair<unsigned, unsigned> RegInterval; + +class SIInsertWaits : public MachineFunctionPass { + +private: + static char ID; + const SIInstrInfo *TII; + const SIRegisterInfo &TRI; + const MachineRegisterInfo *MRI; + + /// \brief Constant hardware limits + static const Counters WaitCounts; + + /// \brief Constant zero value + static const Counters ZeroCounts; + + /// \brief Counter values we have already waited on. + Counters WaitedOn; + + /// \brief Counter values for last instruction issued. + Counters LastIssued; + + /// \brief Registers used by async instructions. + RegCounters UsedRegs; + + /// \brief Registers defined by async instructions. + RegCounters DefinedRegs; + + /// \brief Different export instruction types seen since last wait. + unsigned ExpInstrTypesSeen; + + /// \brief Get increment/decrement amount for this instruction. + Counters getHwCounts(MachineInstr &MI); + + /// \brief Is operand relevant for async execution? + bool isOpRelevant(MachineOperand &Op); + + /// \brief Get register interval an operand affects. + RegInterval getRegInterval(MachineOperand &Op); + + /// \brief Handle instructions async components + void pushInstruction(MachineInstr &MI); + + /// \brief Insert the actual wait instruction + bool insertWait(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const Counters &Counts); + + /// \brief Resolve all operand dependencies to counter requirements + Counters handleOperands(MachineInstr &MI); + +public: + SIInsertWaits(TargetMachine &tm) : + MachineFunctionPass(ID), + TII(static_cast<const SIInstrInfo*>(tm.getInstrInfo())), + TRI(TII->getRegisterInfo()) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "SI insert wait instructions"; + } + +}; + +} // End anonymous namespace + +char SIInsertWaits::ID = 0; + +const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } }; +const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } }; + +FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) { + return new SIInsertWaits(tm); +} + +Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { + + uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; + Counters Result; + + Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); + + // Only consider stores or EXP for EXP_CNT + Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT && + (MI.getOpcode() == AMDGPU::EXP || !MI.getDesc().mayStore())); + + // LGKM may uses larger values + if (TSFlags & SIInstrFlags::LGKM_CNT) { + + MachineOperand &Op = MI.getOperand(0); + assert(Op.isReg() && "First LGKM operand must be a register!"); + + unsigned Reg = Op.getReg(); + unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize(); + Result.Named.LGKM = Size > 4 ? 2 : 1; + + } else { + Result.Named.LGKM = 0; + } + + return Result; +} + +bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { + + // Constants are always irrelevant + if (!Op.isReg()) + return false; + + // Defines are always relevant + if (Op.isDef()) + return true; + + // For exports all registers are relevant + MachineInstr &MI = *Op.getParent(); + if (MI.getOpcode() == AMDGPU::EXP) + return true; + + // For stores the stored value is also relevant + if (!MI.getDesc().mayStore()) + return false; + + for (MachineInstr::mop_iterator I = MI.operands_begin(), + E = MI.operands_end(); I != E; ++I) { + + if (I->isReg() && I->isUse()) + return Op.isIdenticalTo(*I); + } + + return false; +} + +RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) { + + if (!Op.isReg()) + return std::make_pair(0, 0); + + unsigned Reg = Op.getReg(); + unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize(); + + assert(Size >= 4); + + RegInterval Result; + Result.first = TRI.getEncodingValue(Reg); + Result.second = Result.first + Size / 4; + + return Result; +} + +void SIInsertWaits::pushInstruction(MachineInstr &MI) { + + // Get the hardware counter increments and sum them up + Counters Increment = getHwCounts(MI); + unsigned Sum = 0; + + for (unsigned i = 0; i < 3; ++i) { + LastIssued.Array[i] += Increment.Array[i]; + Sum += Increment.Array[i]; + } + + // If we don't increase anything then that's it + if (Sum == 0) + return; + + // Remember which export instructions we have seen + if (Increment.Named.EXP) { + ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2; + } + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + + MachineOperand &Op = MI.getOperand(i); + if (!isOpRelevant(Op)) + continue; + + RegInterval Interval = getRegInterval(Op); + for (unsigned j = Interval.first; j < Interval.second; ++j) { + + // Remember which registers we define + if (Op.isDef()) + DefinedRegs[j] = LastIssued; + + // and which one we are using + if (Op.isUse()) + UsedRegs[j] = LastIssued; + } + } +} + +bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const Counters &Required) { + + // End of program? No need to wait on anything + if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM) + return false; + + // Figure out if the async instructions execute in order + bool Ordered[3]; + + // VM_CNT is always ordered + Ordered[0] = true; + + // EXP_CNT is unordered if we have both EXP & VM-writes + Ordered[1] = ExpInstrTypesSeen == 3; + + // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS + Ordered[2] = false; + + // The values we are going to put into the S_WAITCNT instruction + Counters Counts = WaitCounts; + + // Do we really need to wait? + bool NeedWait = false; + + for (unsigned i = 0; i < 3; ++i) { + + if (Required.Array[i] <= WaitedOn.Array[i]) + continue; + + NeedWait = true; + + if (Ordered[i]) { + unsigned Value = LastIssued.Array[i] - Required.Array[i]; + + // adjust the value to the real hardware posibilities + Counts.Array[i] = std::min(Value, WaitCounts.Array[i]); + + } else + Counts.Array[i] = 0; + + // Remember on what we have waited on + WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i]; + } + + if (!NeedWait) + return false; + + // Reset EXP_CNT instruction types + if (Counts.Named.EXP == 0) + ExpInstrTypesSeen = 0; + + // Build the wait instruction + BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) + .addImm((Counts.Named.VM & 0xF) | + ((Counts.Named.EXP & 0x7) << 4) | + ((Counts.Named.LGKM & 0x7) << 8)); + + return true; +} + +/// \brief helper function for handleOperands +static void increaseCounters(Counters &Dst, const Counters &Src) { + + for (unsigned i = 0; i < 3; ++i) + Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]); +} + +Counters SIInsertWaits::handleOperands(MachineInstr &MI) { + + Counters Result = ZeroCounts; + + // For each register affected by this + // instruction increase the result sequence + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + + MachineOperand &Op = MI.getOperand(i); + RegInterval Interval = getRegInterval(Op); + for (unsigned j = Interval.first; j < Interval.second; ++j) { + + if (Op.isDef()) + increaseCounters(Result, UsedRegs[j]); + + if (Op.isUse()) + increaseCounters(Result, DefinedRegs[j]); + } + } + + return Result; +} + +bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { + + bool Changes = false; + + MRI = &MF.getRegInfo(); + + WaitedOn = ZeroCounts; + LastIssued = ZeroCounts; + + memset(&UsedRegs, 0, sizeof(UsedRegs)); + memset(&DefinedRegs, 0, sizeof(DefinedRegs)); + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + + Changes |= insertWait(MBB, I, handleOperands(*I)); + pushInstruction(*I); + } + + // Wait for everything at the end of the MBB + Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued); + } + + return Changes; +} diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index aea3b5a..fe417d6 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -1,4 +1,4 @@ -//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===// +//===-- SIInstrFormats.td - SI Instruction Encodings ----------------------===// // // The LLVM Compiler Infrastructure // @@ -9,138 +9,418 @@ // // SI Instruction format definitions. // -// Instructions with _32 take 32-bit operands. -// Instructions with _64 take 64-bit operands. -// -// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit -// encoding is the standard encoding, but instruction that make use of -// any of the instruction modifiers must use the 64-bit encoding. -// -// Instructions with _e32 use the 32-bit encoding. -// Instructions with _e64 use the 64-bit encoding. -// //===----------------------------------------------------------------------===// -class VOP3b_2IN <bits<9> op, string opName, RegisterClass dstClass, - RegisterClass src0Class, RegisterClass src1Class, - list<dag> pattern> - : VOP3b <op, (outs dstClass:$vdst), - (ins src0Class:$src0, src1Class:$src1, InstFlag:$src2, InstFlag:$sdst, - InstFlag:$omod, InstFlag:$neg), - opName, pattern ->; +class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : + AMDGPUInst<outs, ins, asm, pattern> { + field bits<1> VM_CNT = 0; + field bits<1> EXP_CNT = 0; + field bits<1> LGKM_CNT = 0; -class VOP3_1_32 <bits<9> op, string opName, list<dag> pattern> - : VOP3b_2IN <op, opName, SReg_1, AllReg_32, VReg_32, pattern>; + let TSFlags{0} = VM_CNT; + let TSFlags{1} = EXP_CNT; + let TSFlags{2} = LGKM_CNT; +} -class VOP3_32 <bits<9> op, string opName, list<dag> pattern> - : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>; +class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> : + InstSI <outs, ins, asm, pattern> { -class VOP3_64 <bits<9> op, string opName, list<dag> pattern> - : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, VReg_64:$src1, VReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>; + field bits<32> Inst; + let Size = 4; +} +class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> : + InstSI <outs, ins, asm, pattern> { -class SOP1_32 <bits<8> op, string opName, list<dag> pattern> - : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>; + field bits<64> Inst; + let Size = 8; +} -class SOP1_64 <bits<8> op, string opName, list<dag> pattern> - : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>; +//===----------------------------------------------------------------------===// +// Scalar operations +//===----------------------------------------------------------------------===// -class SOP2_32 <bits<7> op, string opName, list<dag> pattern> - : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>; +class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32<outs, ins, asm, pattern> { -class SOP2_64 <bits<7> op, string opName, list<dag> pattern> - : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; + bits<7> SDST; + bits<8> SSRC0; -class SOP2_VCC <bits<7> op, string opName, list<dag> pattern> - : SOP2 <op, (outs SReg_1:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; + let Inst{7-0} = SSRC0; + let Inst{15-8} = op; + let Inst{22-16} = SDST; + let Inst{31-23} = 0x17d; //encoding; -class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, - string opName, list<dag> pattern> : - VOP1 < - op, (outs vrc:$dst), (ins arc:$src0), opName, pattern - >; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} -multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> { - def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>; - def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; +class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32 <outs, ins, asm, pattern> { + + bits<7> SDST; + bits<8> SSRC0; + bits<8> SSRC1; + + let Inst{7-0} = SSRC0; + let Inst{15-8} = SSRC1; + let Inst{22-16} = SDST; + let Inst{29-23} = op; + let Inst{31-30} = 0x2; // encoding + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; } -multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> { +class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32<outs, ins, asm, pattern> { + + bits<8> SSRC0; + bits<8> SSRC1; - def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>; + let Inst{7-0} = SSRC0; + let Inst{15-8} = SSRC1; + let Inst{22-16} = op; + let Inst{31-23} = 0x17e; - def _e64 : VOP3_64 < - {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; + let DisableEncoding = "$dst"; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; } -class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, - string opName, list<dag> pattern> : - VOP2 < - op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern - >; +class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32 <outs, ins , asm, pattern> { -multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> { + bits <7> SDST; + bits <16> SIMM16; + + let Inst{15-0} = SIMM16; + let Inst{22-16} = SDST; + let Inst{27-23} = op; + let Inst{31-28} = 0xb; //encoding + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} - def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>; +class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 < + (outs), + ins, + asm, + pattern > { - def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; + bits <16> SIMM16; + + let Inst{15-0} = SIMM16; + let Inst{22-16} = op; + let Inst{31-23} = 0x17f; // encoding + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; } -multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> { - def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>; +class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm, + list<dag> pattern> : Enc32<outs, ins, asm, pattern> { + + bits<7> SDST; + bits<6> SBASE; + bits<8> OFFSET; + + let Inst{7-0} = OFFSET; + let Inst{8} = imm; + let Inst{14-9} = SBASE; + let Inst{21-15} = SDST; + let Inst{26-22} = op; + let Inst{31-27} = 0x18; //encoding + + let LGKM_CNT = 1; +} - def _e64 : VOP3_64 < - {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; +//===----------------------------------------------------------------------===// +// Vector ALU operations +//===----------------------------------------------------------------------===// + +let Uses = [EXEC] in { + +class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32 <outs, ins, asm, pattern> { + + bits<8> VDST; + bits<9> SRC0; + + let Inst{8-0} = SRC0; + let Inst{16-9} = op; + let Inst{24-17} = VDST; + let Inst{31-25} = 0x3f; //encoding + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32 <outs, ins, asm, pattern> { + + bits<8> VDST; + bits<9> SRC0; + bits<8> VSRC1; + + let Inst{8-0} = SRC0; + let Inst{16-9} = VSRC1; + let Inst{24-17} = VDST; + let Inst{30-25} = op; + let Inst{31} = 0x0; //encoding + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; } -class SOPK_32 <bits<5> op, string opName, list<dag> pattern> - : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>; +class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc64 <outs, ins, asm, pattern> { + + bits<8> VDST; + bits<9> SRC0; + bits<9> SRC1; + bits<9> SRC2; + bits<3> ABS; + bits<1> CLAMP; + bits<2> OMOD; + bits<3> NEG; + + let Inst{7-0} = VDST; + let Inst{10-8} = ABS; + let Inst{11} = CLAMP; + let Inst{25-17} = op; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = SRC0; + let Inst{49-41} = SRC1; + let Inst{58-50} = SRC2; + let Inst{60-59} = OMOD; + let Inst{63-61} = NEG; + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc64 <outs, ins, asm, pattern> { + + bits<8> VDST; + bits<9> SRC0; + bits<9> SRC1; + bits<9> SRC2; + bits<7> SDST; + bits<2> OMOD; + bits<3> NEG; + + let Inst{7-0} = VDST; + let Inst{14-8} = SDST; + let Inst{25-17} = op; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = SRC0; + let Inst{49-41} = SRC1; + let Inst{58-50} = SRC2; + let Inst{60-59} = OMOD; + let Inst{63-61} = NEG; + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} -class SOPK_64 <bits<5> op, string opName, list<dag> pattern> - : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>; +class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> : + Enc32 <(outs VCCReg:$dst), ins, asm, pattern> { + + bits<9> SRC0; + bits<8> VSRC1; + + let Inst{8-0} = SRC0; + let Inst{16-9} = VSRC1; + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; + + let DisableEncoding = "$dst"; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} -class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, - string opName, list<dag> pattern> : - VOPC < - op, (ins arc:$src0, vrc:$src1), opName, pattern - >; +class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32 <outs, ins, asm, pattern> { -multiclass VOPC_32 <bits<9> op, string opName, list<dag> pattern> { + bits<8> VDST; + bits<8> VSRC; + bits<2> ATTRCHAN; + bits<6> ATTR; - def _e32 : VOPC_Helper < - {op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - VReg_32, AllReg_32, opName, pattern - >; + let Inst{7-0} = VSRC; + let Inst{9-8} = ATTRCHAN; + let Inst{15-10} = ATTR; + let Inst{17-16} = op; + let Inst{25-18} = VDST; + let Inst{31-26} = 0x32; // encoding - def _e64 : VOP3_1_32 < - op, - opName, pattern - >; + let neverHasSideEffects = 1; + let mayLoad = 1; + let mayStore = 0; } -multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> { +} // End Uses = [EXEC] - def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>; +//===----------------------------------------------------------------------===// +// Vector I/O operations +//===----------------------------------------------------------------------===// - def _e64 : VOP3_64 < - {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, - opName, [] - >; +let Uses = [EXEC] in { + +class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc64<outs, ins, asm, pattern> { + + bits<8> VDATA; + bits<12> OFFSET; + bits<1> OFFEN; + bits<1> IDXEN; + bits<1> GLC; + bits<1> ADDR64; + bits<1> LDS; + bits<8> VADDR; + bits<5> SRSRC; + bits<1> SLC; + bits<1> TFE; + bits<8> SOFFSET; + + let Inst{11-0} = OFFSET; + let Inst{12} = OFFEN; + let Inst{13} = IDXEN; + let Inst{14} = GLC; + let Inst{15} = ADDR64; + let Inst{16} = LDS; + let Inst{24-18} = op; + let Inst{31-26} = 0x38; //encoding + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{54} = SLC; + let Inst{55} = TFE; + let Inst{63-56} = SOFFSET; + + let VM_CNT = 1; + let EXP_CNT = 1; + + let neverHasSideEffects = 1; } -class SOPC_32 <bits<7> op, string opName, list<dag> pattern> - : SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>; +class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc64<outs, ins, asm, pattern> { + + bits<8> VDATA; + bits<12> OFFSET; + bits<1> OFFEN; + bits<1> IDXEN; + bits<1> GLC; + bits<1> ADDR64; + bits<4> DFMT; + bits<3> NFMT; + bits<8> VADDR; + bits<5> SRSRC; + bits<1> SLC; + bits<1> TFE; + bits<8> SOFFSET; + + let Inst{11-0} = OFFSET; + let Inst{12} = OFFEN; + let Inst{13} = IDXEN; + let Inst{14} = GLC; + let Inst{15} = ADDR64; + let Inst{18-16} = op; + let Inst{22-19} = DFMT; + let Inst{25-23} = NFMT; + let Inst{31-26} = 0x3a; //encoding + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{54} = SLC; + let Inst{55} = TFE; + let Inst{63-56} = SOFFSET; + + let VM_CNT = 1; + let EXP_CNT = 1; + + let neverHasSideEffects = 1; +} -class SOPC_64 <bits<7> op, string opName, list<dag> pattern> - : SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; +class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc64 <outs, ins, asm, pattern> { + + bits<8> VDATA; + bits<4> DMASK; + bits<1> UNORM; + bits<1> GLC; + bits<1> DA; + bits<1> R128; + bits<1> TFE; + bits<1> LWE; + bits<1> SLC; + bits<8> VADDR; + bits<5> SRSRC; + bits<5> SSAMP; + + let Inst{11-8} = DMASK; + let Inst{12} = UNORM; + let Inst{13} = GLC; + let Inst{14} = DA; + let Inst{15} = R128; + let Inst{16} = TFE; + let Inst{17} = LWE; + let Inst{24-18} = op; + let Inst{25} = SLC; + let Inst{31-26} = 0x3c; + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{57-53} = SSAMP; + + let VM_CNT = 1; + let EXP_CNT = 1; +} + +def EXP : Enc64< + (outs), + (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, + VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), + "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", + [] > { + + bits<4> EN; + bits<6> TGT; + bits<1> COMPR; + bits<1> DONE; + bits<1> VM; + bits<8> VSRC0; + bits<8> VSRC1; + bits<8> VSRC2; + bits<8> VSRC3; + + let Inst{3-0} = EN; + let Inst{9-4} = TGT; + let Inst{10} = COMPR; + let Inst{11} = DONE; + let Inst{12} = VM; + let Inst{31-26} = 0x3e; + let Inst{39-32} = VSRC0; + let Inst{47-40} = VSRC1; + let Inst{55-48} = VSRC2; + let Inst{63-56} = VSRC3; + + let EXP_CNT = 1; +} +} // End Uses = [EXEC] diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index c6ad4d5..d9dbd6a 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -41,7 +41,15 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // never be necessary. assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); - if (AMDGPU::SReg_64RegClass.contains(DestReg)) { + if (AMDGPU::VReg_64RegClass.contains(DestReg)) { + assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || + AMDGPU::SReg_64RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub0)) + .addReg(RI.getSubReg(SrcReg, AMDGPU::sub0), getKillRegState(KillSrc)) + .addReg(DestReg, RegState::Define | RegState::Implicit); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub1)) + .addReg(RI.getSubReg(SrcReg, AMDGPU::sub1), getKillRegState(KillSrc)); + } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); @@ -58,9 +66,19 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } +MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, + bool NewMI) const { + + if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() || + !MI->getOperand(2).isReg()) + return 0; + + return TargetInstrInfo::commuteInstruction(MI, NewMI); +} + MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, int64_t Imm) const { - MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc()); + MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc()); MachineInstrBuilder MIB(*MF, MI); MIB.addReg(DstReg, RegState::Define); MIB.addImm(Imm); @@ -76,9 +94,6 @@ bool SIInstrInfo::isMov(unsigned Opcode) const { case AMDGPU::S_MOV_B64: case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: - case AMDGPU::V_MOV_IMM_F32: - case AMDGPU::V_MOV_IMM_I32: - case AMDGPU::S_MOV_IMM_I32: return true; } } @@ -87,3 +102,51 @@ bool SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { return RC != &AMDGPU::EXECRegRegClass; } + +//===----------------------------------------------------------------------===// +// Indirect addressing callbacks +//===----------------------------------------------------------------------===// + +unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const { + assert(Channel == 0); + return RegIndex; +} + + +int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { + llvm_unreachable("Unimplemented"); +} + +int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { + llvm_unreachable("Unimplemented"); +} + +const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass( + unsigned SourceReg) const { + llvm_unreachable("Unimplemented"); +} + +const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const { + llvm_unreachable("Unimplemented"); +} + +MachineInstrBuilder SIInstrInfo::buildIndirectWrite( + MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, + unsigned Address, unsigned OffsetReg) const { + llvm_unreachable("Unimplemented"); +} + +MachineInstrBuilder SIInstrInfo::buildIndirectRead( + MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, + unsigned Address, unsigned OffsetReg) const { + llvm_unreachable("Unimplemented"); +} + +const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const { + llvm_unreachable("Unimplemented"); +} diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 631f6c0..5789af5 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -35,11 +35,8 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - /// \returns the encoding type of this instruction. - unsigned getEncodingType(const MachineInstr &MI) const; - - /// \returns the size of this instructions encoding in number of bytes. - unsigned getEncodingBytes(const MachineInstr &MI) const; + virtual MachineInstr *commuteInstruction(MachineInstr *MI, + bool NewMI=false) const; virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, int64_t Imm) const; @@ -48,14 +45,48 @@ public: virtual bool isMov(unsigned Opcode) const; virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + + virtual int getIndirectIndexBegin(const MachineFunction &MF) const; + + virtual int getIndirectIndexEnd(const MachineFunction &MF) const; + + virtual unsigned calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const; + + virtual const TargetRegisterClass *getIndirectAddrStoreRegClass( + unsigned SourceReg) const; + + virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const; + + virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, + unsigned Address, + unsigned OffsetReg) const; + + virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, + unsigned Address, + unsigned OffsetReg) const; + + virtual const TargetRegisterClass *getSuperIndirectRegClass() const; }; +namespace AMDGPU { + + int getVOPe64(uint16_t Opcode); + +} // End namespace AMDGPU + } // End namespace llvm namespace SIInstrFlags { enum Flags { // First 4 bits are the instruction encoding - NEED_WAIT = 1 << 4 + VM_CNT = 1 << 0, + EXP_CNT = 1 << 1, + LGKM_CNT = 1 << 2 }; } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 873a451..d6c3f06 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===// +//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===// // // The LLVM Compiler Infrastructure // @@ -8,521 +8,280 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// SI DAG Profiles -//===----------------------------------------------------------------------===// -def SDTVCCBinaryOp : SDTypeProfile<1, 2, [ - SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2> -]>; - -//===----------------------------------------------------------------------===// // SI DAG Nodes //===----------------------------------------------------------------------===// -// and operation on 64-bit wide vcc -def SIsreg1_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, - [SDNPCommutative, SDNPAssociative] ->; - -// Special bitcast node for sharing VCC register between VALU and SALU -def SIsreg1_bitcast : SDNode<"SIISD::VCC_BITCAST", - SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> +// SMRD takes a 64bit memory address and can only add an 32bit offset +def SIadd64bit32bit : SDNode<"ISD::ADD", + SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]> >; -// and operation on 64-bit wide vcc -def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, - [SDNPCommutative, SDNPAssociative] +// Transformation function, extract the lower 32bit of a 64bit immediate +def LO32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32); +}]>; + +// Transformation function, extract the upper 32bit of a 64bit immediate +def HI32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32); +}]>; + +def IMM8bitDWORD : ImmLeaf < + i32, [{ + return (Imm & ~0x3FC) == 0; + }], SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant( + N->getZExtValue() >> 2, MVT::i32); + }]> >; -// Special bitcast node for sharing VCC register between VALU and SALU -def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST", - SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> ->; - -class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : - AMDGPUInst<outs, ins, asm, pattern> { - - field bits<4> EncodingType = 0; - field bits<1> NeedWait = 0; - - let TSFlags{3-0} = EncodingType; - let TSFlags{4} = NeedWait; - -} - -class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> : - InstSI <outs, ins, asm, pattern> { - - field bits<32> Inst; -} - -class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> : - InstSI <outs, ins, asm, pattern> { - - field bits<64> Inst; -} - -class SIOperand <ValueType vt, dag opInfo>: Operand <vt> { - let EncoderMethod = "encodeOperand"; - let MIOperandInfo = opInfo; -} - -def IMM16bit : ImmLeaf < +def IMM12bit : ImmLeaf < i16, - [{return isInt<16>(Imm);}] + [{return isUInt<12>(Imm);}] >; -def IMM8bit : ImmLeaf < - i32, - [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}] ->; +class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{ + return ((const SITargetLowering &)TLI).analyzeImmediate(N) == 0; +}]>; -def IMM12bit : ImmLeaf < - i16, - [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}] ->; +//===----------------------------------------------------------------------===// +// SI assembler operands +//===----------------------------------------------------------------------===// -def IMM32bitIn64bit : ImmLeaf < - i64, - [{return isInt<32>(Imm);}] ->; +def SIOperand { + int ZERO = 0x80; + int VCC = 0x6A; +} class GPR4Align <RegisterClass rc> : Operand <vAny> { let EncoderMethod = "GPR4AlignEncode"; let MIOperandInfo = (ops rc:$reg); } -class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> { +class GPR2Align <RegisterClass rc> : Operand <iPTR> { let EncoderMethod = "GPR2AlignEncode"; let MIOperandInfo = (ops rc:$reg); } -def SMRDmemrr : Operand<iPTR> { - let MIOperandInfo = (ops SReg_64, SReg_32); - let EncoderMethod = "GPR2AlignEncode"; -} - -def SMRDmemri : Operand<iPTR> { - let MIOperandInfo = (ops SReg_64, i32imm); - let EncoderMethod = "SMRDmemriEncode"; -} - -def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>; -def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>; - -let Uses = [EXEC] in { - -def EXP : Enc64< - (outs), - (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, - VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), - "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", - [] > { - - bits<4> EN; - bits<6> TGT; - bits<1> COMPR; - bits<1> DONE; - bits<1> VM; - bits<8> VSRC0; - bits<8> VSRC1; - bits<8> VSRC2; - bits<8> VSRC3; - - let Inst{3-0} = EN; - let Inst{9-4} = TGT; - let Inst{10} = COMPR; - let Inst{11} = DONE; - let Inst{12} = VM; - let Inst{31-26} = 0x3e; - let Inst{39-32} = VSRC0; - let Inst{47-40} = VSRC1; - let Inst{55-48} = VSRC2; - let Inst{63-56} = VSRC3; - let EncodingType = 0; //SIInstrEncodingType::EXP - - let NeedWait = 1; - let usesCustomInserter = 1; -} - -class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64 <outs, ins, asm, pattern> { - - bits<8> VDATA; - bits<4> DMASK; - bits<1> UNORM; - bits<1> GLC; - bits<1> DA; - bits<1> R128; - bits<1> TFE; - bits<1> LWE; - bits<1> SLC; - bits<8> VADDR; - bits<5> SRSRC; - bits<5> SSAMP; - - let Inst{11-8} = DMASK; - let Inst{12} = UNORM; - let Inst{13} = GLC; - let Inst{14} = DA; - let Inst{15} = R128; - let Inst{16} = TFE; - let Inst{17} = LWE; - let Inst{24-18} = op; - let Inst{25} = SLC; - let Inst{31-26} = 0x3c; - let Inst{39-32} = VADDR; - let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; - let Inst{57-53} = SSAMP; - - let EncodingType = 2; //SIInstrEncodingType::MIMG - - let NeedWait = 1; - let usesCustomInserter = 1; -} - -class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64<outs, ins, asm, pattern> { - - bits<8> VDATA; - bits<12> OFFSET; - bits<1> OFFEN; - bits<1> IDXEN; - bits<1> GLC; - bits<1> ADDR64; - bits<4> DFMT; - bits<3> NFMT; - bits<8> VADDR; - bits<5> SRSRC; - bits<1> SLC; - bits<1> TFE; - bits<8> SOFFSET; - - let Inst{11-0} = OFFSET; - let Inst{12} = OFFEN; - let Inst{13} = IDXEN; - let Inst{14} = GLC; - let Inst{15} = ADDR64; - let Inst{18-16} = op; - let Inst{22-19} = DFMT; - let Inst{25-23} = NFMT; - let Inst{31-26} = 0x3a; //encoding - let Inst{39-32} = VADDR; - let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; - let Inst{54} = SLC; - let Inst{55} = TFE; - let Inst{63-56} = SOFFSET; - let EncodingType = 3; //SIInstrEncodingType::MTBUF - - let NeedWait = 1; - let usesCustomInserter = 1; - let neverHasSideEffects = 1; -} - -class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64<outs, ins, asm, pattern> { - - bits<8> VDATA; - bits<12> OFFSET; - bits<1> OFFEN; - bits<1> IDXEN; - bits<1> GLC; - bits<1> ADDR64; - bits<1> LDS; - bits<8> VADDR; - bits<5> SRSRC; - bits<1> SLC; - bits<1> TFE; - bits<8> SOFFSET; - - let Inst{11-0} = OFFSET; - let Inst{12} = OFFEN; - let Inst{13} = IDXEN; - let Inst{14} = GLC; - let Inst{15} = ADDR64; - let Inst{16} = LDS; - let Inst{24-18} = op; - let Inst{31-26} = 0x38; //encoding - let Inst{39-32} = VADDR; - let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; - let Inst{54} = SLC; - let Inst{55} = TFE; - let Inst{63-56} = SOFFSET; - let EncodingType = 4; //SIInstrEncodingType::MUBUF - - let NeedWait = 1; - let usesCustomInserter = 1; - let neverHasSideEffects = 1; -} +include "SIInstrFormats.td" -} // End Uses = [EXEC] - -class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32<outs, ins, asm, pattern> { - - bits<7> SDST; - bits<15> PTR; - bits<8> OFFSET = PTR{7-0}; - bits<1> IMM = PTR{8}; - bits<6> SBASE = PTR{14-9}; - - let Inst{7-0} = OFFSET; - let Inst{8} = IMM; - let Inst{14-9} = SBASE; - let Inst{21-15} = SDST; - let Inst{26-22} = op; - let Inst{31-27} = 0x18; //encoding - let EncodingType = 5; //SIInstrEncodingType::SMRD - - let NeedWait = 1; - let usesCustomInserter = 1; -} +//===----------------------------------------------------------------------===// +// +// SI Instruction multiclass helpers. +// +// Instructions with _32 take 32-bit operands. +// Instructions with _64 take 64-bit operands. +// +// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit +// encoding is the standard encoding, but instruction that make use of +// any of the instruction modifiers must use the 64-bit encoding. +// +// Instructions with _e32 use the 32-bit encoding. +// Instructions with _e64 use the 64-bit encoding. +// +//===----------------------------------------------------------------------===// -class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32<outs, ins, asm, pattern> { +//===----------------------------------------------------------------------===// +// Scalar classes +//===----------------------------------------------------------------------===// - bits<7> SDST; - bits<8> SSRC0; +class SOP1_32 <bits<8> op, string opName, list<dag> pattern> : SOP1 < + op, (outs SReg_32:$dst), (ins SSrc_32:$src0), + opName#" $dst, $src0", pattern +>; - let Inst{7-0} = SSRC0; - let Inst{15-8} = op; - let Inst{22-16} = SDST; - let Inst{31-23} = 0x17d; //encoding; - let EncodingType = 6; //SIInstrEncodingType::SOP1 +class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 < + op, (outs SReg_64:$dst), (ins SSrc_64:$src0), + opName#" $dst, $src0", pattern +>; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; -} +class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 < + op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1), + opName#" $dst, $src0, $src1", pattern +>; -class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32 <outs, ins, asm, pattern> { - - bits<7> SDST; - bits<8> SSRC0; - bits<8> SSRC1; +class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 < + op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1), + opName#" $dst, $src0, $src1", pattern +>; - let Inst{7-0} = SSRC0; - let Inst{15-8} = SSRC1; - let Inst{22-16} = SDST; - let Inst{29-23} = op; - let Inst{31-30} = 0x2; // encoding - let EncodingType = 7; // SIInstrEncodingType::SOP2 +class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC < + op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1), + opName#" $dst, $src0, $src1", pattern +>; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; -} +class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC < + op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1), + opName#" $dst, $src0, $src1", pattern +>; -class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32<outs, ins, asm, pattern> { +class SOPK_32 <bits<5> op, string opName, list<dag> pattern> : SOPK < + op, (outs SReg_32:$dst), (ins i16imm:$src0), + opName#" $dst, $src0", pattern +>; - bits<8> SSRC0; - bits<8> SSRC1; +class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK < + op, (outs SReg_64:$dst), (ins i16imm:$src0), + opName#" $dst, $src0", pattern +>; - let Inst{7-0} = SSRC0; - let Inst{15-8} = SSRC1; - let Inst{22-16} = op; - let Inst{31-23} = 0x17e; - let EncodingType = 8; // SIInstrEncodingType::SOPC +multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> { + def _IMM : SMRD < + op, 1, (outs dstClass:$dst), + (ins GPR2Align<SReg_64>:$sbase, i32imm:$offset), + asm#" $dst, $sbase, $offset", [] + >; - let DisableEncoding = "$dst"; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; + def _SGPR : SMRD < + op, 0, (outs dstClass:$dst), + (ins GPR2Align<SReg_64>:$sbase, SReg_32:$soff), + asm#" $dst, $sbase, $soff", [] + >; } -class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32 <outs, ins , asm, pattern> { - - bits <7> SDST; - bits <16> SIMM16; - - let Inst{15-0} = SIMM16; - let Inst{22-16} = SDST; - let Inst{27-23} = op; - let Inst{31-28} = 0xb; //encoding - let EncodingType = 9; // SIInstrEncodingType::SOPK +//===----------------------------------------------------------------------===// +// Vector ALU classes +//===----------------------------------------------------------------------===// - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; +class VOP <string opName> { + string OpName = opName; } -class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 < - (outs), - ins, - asm, - pattern > { - - bits <16> SIMM16; - - let Inst{15-0} = SIMM16; - let Inst{22-16} = op; - let Inst{31-23} = 0x17f; // encoding - let EncodingType = 10; // SIInstrEncodingType::SOPP - - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; -} - -let Uses = [EXEC] in { - -class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32 <outs, ins, asm, pattern> { - - bits<8> VDST; - bits<8> VSRC; - bits<2> ATTRCHAN; - bits<6> ATTR; - - let Inst{7-0} = VSRC; - let Inst{9-8} = ATTRCHAN; - let Inst{15-10} = ATTR; - let Inst{17-16} = op; - let Inst{25-18} = VDST; - let Inst{31-26} = 0x32; // encoding - let EncodingType = 11; // SIInstrEncodingType::VINTRP - - let neverHasSideEffects = 1; - let mayLoad = 1; - let mayStore = 0; +multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src, + string opName, list<dag> pattern> { + + def _e32 : VOP1 < + op, (outs drc:$dst), (ins src:$src0), + opName#"_e32 $dst, $src0", pattern + >, VOP <opName>; + + def _e64 : VOP3 < + {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + (outs drc:$dst), + (ins src:$src0, + i32imm:$abs, i32imm:$clamp, + i32imm:$omod, i32imm:$neg), + opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", [] + >, VOP <opName> { + let SRC1 = SIOperand.ZERO; + let SRC2 = SIOperand.ZERO; + } } -class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32 <outs, ins, asm, pattern> { - - bits<8> VDST; - bits<9> SRC0; - - let Inst{8-0} = SRC0; - let Inst{16-9} = op; - let Inst{24-17} = VDST; - let Inst{31-25} = 0x3f; //encoding - - let EncodingType = 12; // SIInstrEncodingType::VOP1 - let PostEncoderMethod = "VOPPostEncode"; - - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; +multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> + : VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>; + +multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> + : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>; + +multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, + string opName, list<dag> pattern> { + def _e32 : VOP2 < + op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), + opName#"_e32 $dst, $src0, $src1", pattern + >, VOP <opName>; + + def _e64 : VOP3 < + {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + (outs vrc:$dst), + (ins arc:$src0, arc:$src1, + i32imm:$abs, i32imm:$clamp, + i32imm:$omod, i32imm:$neg), + opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] + >, VOP <opName> { + let SRC2 = SIOperand.ZERO; + } } -class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc32 <outs, ins, asm, pattern> { - - bits<8> VDST; - bits<9> SRC0; - bits<8> VSRC1; - - let Inst{8-0} = SRC0; - let Inst{16-9} = VSRC1; - let Inst{24-17} = VDST; - let Inst{30-25} = op; - let Inst{31} = 0x0; //encoding - - let EncodingType = 13; // SIInstrEncodingType::VOP2 - let PostEncoderMethod = "VOPPostEncode"; - - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; +multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> + : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern>; + +multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> + : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern>; + +multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> { + + def _e32 : VOP2 < + op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1), + opName#"_e32 $dst, $src0, $src1", pattern + >, VOP <opName>; + + def _e64 : VOP3b < + {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + (outs VReg_32:$dst), + (ins VSrc_32:$src0, VSrc_32:$src1, + i32imm:$abs, i32imm:$clamp, + i32imm:$omod, i32imm:$neg), + opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] + >, VOP <opName> { + let SRC2 = SIOperand.ZERO; + /* the VOP2 variant puts the carry out into VCC, the VOP3 variant + can write it into any SGPR. We currently don't use the carry out, + so for now hardcode it to VCC as well */ + let SDST = SIOperand.VCC; + } } -class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64 <outs, ins, asm, pattern> { - - bits<8> VDST; - bits<9> SRC0; - bits<9> SRC1; - bits<9> SRC2; - bits<3> ABS; - bits<1> CLAMP; - bits<2> OMOD; - bits<3> NEG; - - let Inst{7-0} = VDST; - let Inst{10-8} = ABS; - let Inst{11} = CLAMP; - let Inst{25-17} = op; - let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = SRC0; - let Inst{49-41} = SRC1; - let Inst{58-50} = SRC2; - let Inst{60-59} = OMOD; - let Inst{63-61} = NEG; - - let EncodingType = 14; // SIInstrEncodingType::VOP3 - let PostEncoderMethod = "VOPPostEncode"; - - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; +multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, + string opName, ValueType vt, PatLeaf cond> { + + def _e32 : VOPC < + op, (ins arc:$src0, vrc:$src1), + opName#"_e32 $dst, $src0, $src1", [] + >, VOP <opName>; + + def _e64 : VOP3 < + {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + (outs SReg_64:$dst), + (ins arc:$src0, arc:$src1, + InstFlag:$abs, InstFlag:$clamp, + InstFlag:$omod, InstFlag:$neg), + opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", + !if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>, + [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))] + ) + >, VOP <opName> { + let SRC2 = SIOperand.ZERO; + } } -class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64 <outs, ins, asm, pattern> { - - bits<8> VDST; - bits<9> SRC0; - bits<9> SRC1; - bits<9> SRC2; - bits<7> SDST; - bits<2> OMOD; - bits<3> NEG; - - let Inst{7-0} = VDST; - let Inst{14-8} = SDST; - let Inst{25-17} = op; - let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = SRC0; - let Inst{49-41} = SRC1; - let Inst{58-50} = SRC2; - let Inst{60-59} = OMOD; - let Inst{63-61} = NEG; - - let EncodingType = 14; // SIInstrEncodingType::VOP3 - let PostEncoderMethod = "VOPPostEncode"; - - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; -} +multiclass VOPC_32 <bits<8> op, string opName, + ValueType vt = untyped, PatLeaf cond = COND_NULL> + : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond>; -class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> : - Enc32 <(outs VCCReg:$dst), ins, asm, pattern> { +multiclass VOPC_64 <bits<8> op, string opName, + ValueType vt = untyped, PatLeaf cond = COND_NULL> + : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>; - bits<9> SRC0; - bits<8> VSRC1; +class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 < + op, (outs VReg_32:$dst), + (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2, + i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), + opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern +>, VOP <opName>; - let Inst{8-0} = SRC0; - let Inst{16-9} = VSRC1; - let Inst{24-17} = op; - let Inst{31-25} = 0x3e; - - let EncodingType = 15; //SIInstrEncodingType::VOPC - let PostEncoderMethod = "VOPPostEncode"; - let DisableEncoding = "$dst"; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; -} +class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 < + op, (outs VReg_64:$dst), + (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2, + i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), + opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern +>, VOP <opName>; -} // End Uses = [EXEC] +//===----------------------------------------------------------------------===// +// Vector I/O classes +//===----------------------------------------------------------------------===// -class MIMG_Load_Helper <bits<7> op, string asm> : MIMG < +class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF < op, - (outs VReg_128:$vdata), - (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr, - GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp), - asm, + (outs), + (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, + i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, + GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), + asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt," + #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []> { - let mayLoad = 1; - let mayStore = 0; + let mayStore = 1; + let mayLoad = 0; } class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF < @@ -530,8 +289,9 @@ class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF (outs regClass:$dst), (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc, - i1imm:$tfe, SReg_32:$soffset), - asm, + i1imm:$tfe, SSrc_32:$soffset), + asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, " + #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset", []> { let mayLoad = 1; let mayStore = 0; @@ -542,48 +302,38 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF (outs regClass:$dst), (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, - i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), - asm, + i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), + asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt," + #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []> { let mayLoad = 1; let mayStore = 0; } -class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF < +class MIMG_Load_Helper <bits<7> op, string asm> : MIMG < op, - (outs), - (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, - i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, - GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), - asm, + (outs VReg_128:$vdata), + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr, + GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp), + asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," + #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", []> { - let mayStore = 1; - let mayLoad = 0; + let mayLoad = 1; + let mayStore = 0; } -multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass, - ValueType vt> { - def _IMM : SMRD < - op, - (outs dstClass:$dst), - (ins SMRDmemri:$src0), - asm, - [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))] - >; - - def _SGPR : SMRD < - op, - (outs dstClass:$dst), - (ins SMRDmemrr:$src0), - asm, - [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))] - >; -} +//===----------------------------------------------------------------------===// +// Vector instruction mappings +//===----------------------------------------------------------------------===// -multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> { - defm _F32 : SMRD_Helper <op, asm, dstClass, f32>; - defm _I32 : SMRD_Helper <op, asm, dstClass, i32>; +// Maps an opcode in e32 form to its e64 equivalent +def getVOPe64 : InstrMapping { + let FilterClass = "VOP"; + let RowFields = ["OpName"]; + let ColFields = ["Size"]; + let KeyCol = ["4"]; + let ValueCols = [["8"]]; } -include "SIInstrFormats.td" include "SIInstructions.td" diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 005be96..af116f0 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -11,16 +11,31 @@ // that are not yet supported remain commented out. //===----------------------------------------------------------------------===// +class InterpSlots { +int P0 = 2; +int P10 = 0; +int P20 = 1; +} +def INTERP : InterpSlots; + +def InterpSlot : Operand<i32> { + let PrintMethod = "printInterpSlot"; +} + def isSI : Predicate<"Subtarget.device()" "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">; let Predicates = [isSI] in { let neverHasSideEffects = 1 in { + +let isMoveImm = 1 in { def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>; def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>; def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>; def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>; +} // End isMoveImm = 1 + def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>; def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>; def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; @@ -28,6 +43,7 @@ def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>; def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; } // End neverHasSideEffects = 1 + ////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>; ////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>; ////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>; @@ -96,6 +112,7 @@ def S_CMPK_EQ_I32 : SOPK < >; */ +let isCompare = 1 in { def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>; @@ -107,6 +124,8 @@ def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>; def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>; def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; +} // End isCompare = 1 + def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>; //def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>; @@ -116,286 +135,262 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; //def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>; //def EXP : EXP_ <0x00000000, "EXP", []>; -defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>; -defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", []>; -def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT)), - (V_CMP_LT_F32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", []>; -def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)), - (V_CMP_EQ_F32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", []>; -def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE)), - (V_CMP_LE_F32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", []>; -def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT)), - (V_CMP_GT_F32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", []>; -def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), - (V_CMP_LG_F32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", []>; -def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE)), - (V_CMP_GE_F32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>; -defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>; -defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>; -defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>; -defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>; -defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>; -defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", []>; -def : Pat < - (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), - (V_CMP_NEQ_F32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>; -defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>; - -//Side effect is writing to EXEC -let hasSideEffects = 1 in { - -defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>; -defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>; -defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>; -defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>; -defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>; -defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>; -defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>; -defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>; -defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>; -defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>; -defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>; -defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>; -defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>; -defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>; -defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>; -defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>; - -} // End hasSideEffects = 1 - -defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>; -defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>; -defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>; -defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>; -defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>; -defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>; -defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>; -defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>; -defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>; -defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>; -defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>; -defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>; -defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>; -defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>; -defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>; -defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>; - -//Side effect is writing to EXEC -let hasSideEffects = 1 in { - -defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>; -defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>; -defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>; -defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>; -defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>; -defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>; -defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>; -defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>; -defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>; -defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>; -defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>; -defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>; -defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>; -defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>; -defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>; -defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>; - -} // End hasSideEffects = 1 - -defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>; -defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>; -defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>; -defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>; -defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>; -defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>; -defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>; -defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>; -defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>; -defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>; -defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>; -defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>; -defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>; -defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>; -defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>; -defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>; -defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>; -defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>; -defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>; -defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>; -defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>; -defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>; -defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>; -defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>; -defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>; -defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>; -defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>; -defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>; -defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>; -defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>; -defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>; -defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>; -defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>; -defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>; -defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>; -defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>; -defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>; -defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>; -defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>; -defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>; -defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>; -defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>; -defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>; -defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>; -defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>; -defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>; -defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>; -defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>; -defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>; -defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>; -defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>; -defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>; -defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>; -defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>; -defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>; -defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>; -defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>; -defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>; -defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>; -defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>; -defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>; -defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>; -defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>; -defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>; -defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>; -defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", []>; -def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LT)), - (V_CMP_LT_I32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", []>; -def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)), - (V_CMP_EQ_I32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", []>; -def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LE)), - (V_CMP_LE_I32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", []>; -def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GT)), - (V_CMP_GT_I32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", []>; -def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), - (V_CMP_NE_I32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", []>; -def : Pat < - (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GE)), - (V_CMP_GE_I32_e64 AllReg_32:$src0, VReg_32:$src1) ->; -defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>; - -let hasSideEffects = 1 in { +let isCompare = 1 in { + +defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">; +defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_LT>; +defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_EQ>; +defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_LE>; +defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_GT>; +defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", f32, COND_NE>; +defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_GE>; +defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32">; +defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32">; +defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">; +defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">; +defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">; +defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">; +defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_NE>; +defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">; +defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">; + +let hasSideEffects = 1, Defs = [EXEC] in { + +defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">; +defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">; +defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">; +defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">; +defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">; +defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">; +defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">; +defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">; +defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">; +defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">; +defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">; +defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">; +defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">; +defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">; +defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">; +defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">; + +} // End hasSideEffects = 1, Defs = [EXEC] + +defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">; +defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64">; +defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64">; +defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64">; +defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64">; +defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">; +defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64">; +defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">; +defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">; +defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">; +defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">; +defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">; +defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">; +defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64">; +defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">; +defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">; + +let hasSideEffects = 1, Defs = [EXEC] in { + +defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">; +defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">; +defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">; +defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">; +defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">; +defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">; +defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">; +defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">; +defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">; +defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">; +defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">; +defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">; +defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">; +defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">; +defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">; +defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">; + +} // End hasSideEffects = 1, Defs = [EXEC] + +defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">; +defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">; +defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32">; +defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32">; +defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32">; +defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32">; +defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32">; +defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32">; +defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32">; +defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32">; +defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32">; +defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32">; +defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32">; +defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">; +defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">; +defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">; + +let hasSideEffects = 1, Defs = [EXEC] in { + +defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">; +defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">; +defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">; +defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">; +defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">; +defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">; +defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">; +defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">; +defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">; +defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">; +defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">; +defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">; +defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">; +defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">; +defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">; +defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">; + +} // End hasSideEffects = 1, Defs = [EXEC] + +defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">; +defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">; +defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64">; +defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64">; +defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64">; +defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64">; +defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64">; +defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64">; +defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64">; +defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64">; +defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64">; +defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64">; +defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64">; +defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64">; +defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64">; +defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64">; + +let hasSideEffects = 1, Defs = [EXEC] in { + +defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64">; +defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64">; +defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64">; +defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64">; +defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64">; +defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64">; +defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64">; +defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64">; +defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64">; +defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64">; +defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64">; +defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64">; +defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64">; +defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64">; +defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64">; +defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">; + +} // End hasSideEffects = 1, Defs = [EXEC] + +defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">; +defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_LT>; +defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>; +defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_LE>; +defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_GT>; +defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>; +defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_GE>; +defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">; + +let hasSideEffects = 1, Defs = [EXEC] in { + +defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">; +defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">; +defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">; +defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">; +defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">; +defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">; +defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">; +defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">; + +} // End hasSideEffects = 1, Defs = [EXEC] + +defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">; +defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64">; +defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64">; +defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64">; +defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64">; +defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64">; +defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64">; +defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">; + +let hasSideEffects = 1, Defs = [EXEC] in { + +defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">; +defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">; +defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">; +defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">; +defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">; +defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">; +defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">; +defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">; + +} // End hasSideEffects = 1, Defs = [EXEC] + +defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">; +defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32">; +defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32">; +defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32">; +defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32">; +defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32">; +defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32">; +defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">; + +let hasSideEffects = 1, Defs = [EXEC] in { + +defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">; +defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">; +defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">; +defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">; +defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">; +defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">; +defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">; +defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">; + +} // End hasSideEffects = 1, Defs = [EXEC] + +defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">; +defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64">; +defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64">; +defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64">; +defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64">; +defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64">; +defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64">; +defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">; + +let hasSideEffects = 1, Defs = [EXEC] in { + +defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">; +defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">; +defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">; +defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">; +defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">; +defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">; +defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">; +defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">; + +} // End hasSideEffects = 1, Defs = [EXEC] + +defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">; + +let hasSideEffects = 1, Defs = [EXEC] in { +defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">; +} // End hasSideEffects = 1, Defs = [EXEC] + +defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">; + +let hasSideEffects = 1, Defs = [EXEC] in { +defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; +} // End hasSideEffects = 1, Defs = [EXEC] + +} // End isCompare = 1 -defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>; -defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>; -defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>; -defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>; -defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>; -defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>; -defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>; -defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>; - -} // End hasSideEffects - -defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>; -defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>; -defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>; -defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>; -defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>; -defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>; -defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>; -defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>; - -let hasSideEffects = 1 in { - -defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>; -defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>; -defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>; -defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>; -defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>; -defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>; -defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>; -defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>; - -} // End hasSideEffects - -defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>; -defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>; -defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>; -defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>; -defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>; -defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>; -defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>; -defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>; - -let hasSideEffects = 1 in { - -defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>; -defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>; -defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>; -defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>; -defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>; -defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>; -defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>; -defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>; - -} // End hasSideEffects - -defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>; -defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>; -defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>; -defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>; -defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>; -defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>; -defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>; -defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>; -defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>; -defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>; -defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>; -defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>; -defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>; -defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>; -defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>; -defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>; -defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>; -defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>; -defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>; -defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>; //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; //def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; @@ -461,11 +456,13 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM //def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; //def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; -defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>; +let mayLoad = 1 in { + +defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>; //def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>; //def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; //def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; //def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; @@ -473,6 +470,8 @@ defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32 //def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; //def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; +} // mayLoad = 1 + //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; //def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; @@ -511,12 +510,12 @@ def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">; def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">; //def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; -//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>; +def IMAGE_SAMPLE_C : MIMG_Load_Helper <0x00000028, "IMAGE_SAMPLE_C">; //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; //def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>; //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; -//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>; -//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>; +def IMAGE_SAMPLE_C_L : MIMG_Load_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">; +def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; //def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; //def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; @@ -572,19 +571,21 @@ def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">; //def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>; //def V_NOP : VOP1_ <0x00000000, "V_NOP", []>; -let neverHasSideEffects = 1 in { + +let neverHasSideEffects = 1, isMoveImm = 1 in { defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; -} // End neverHasSideEffects +} // End neverHasSideEffects = 1, isMoveImm = 1 + defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; //defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; //defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", - [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))] + [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))] >; //defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; //defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", - [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))] + [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))] >; defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; ////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; @@ -601,31 +602,35 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; //defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>; //defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>; defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", - [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))] + [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))] >; defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>; -defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>; +defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", + [(set VReg_32:$dst, (fceil VSrc_32:$src0))] +>; defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", - [(set VReg_32:$dst, (frint AllReg_32:$src0))] + [(set VReg_32:$dst, (frint VSrc_32:$src0))] >; defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32", - [(set VReg_32:$dst, (ffloor AllReg_32:$src0))] + [(set VReg_32:$dst, (ffloor VSrc_32:$src0))] >; defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", - [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))] + [(set VReg_32:$dst, (fexp2 VSrc_32:$src0))] >; defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; -defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>; +defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", + [(set VReg_32:$dst, (flog2 VSrc_32:$src0))] +>; defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", - [(set VReg_32:$dst, (fdiv FP_ONE, AllReg_32:$src0))] + [(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))] >; defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; defm V_RSQ_LEGACY_F32 : VOP1_32 < 0x0000002d, "V_RSQ_LEGACY_F32", - [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))] + [(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))] >; defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; @@ -655,7 +660,7 @@ def V_INTERP_P1_F32 : VINTRP < 0x00000000, (outs VReg_32:$dst), (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_P1_F32", + "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]", []> { let DisableEncoding = "$m0"; } @@ -664,7 +669,7 @@ def V_INTERP_P2_F32 : VINTRP < 0x00000001, (outs VReg_32:$dst), (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_P2_F32", + "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]", []> { let Constraints = "$src0 = $dst"; @@ -675,10 +680,9 @@ def V_INTERP_P2_F32 : VINTRP < def V_INTERP_MOV_F32 : VINTRP < 0x00000002, (outs VReg_32:$dst), - (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_MOV_F32", + (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]", []> { - let VSRC = 0; let DisableEncoding = "$m0"; } @@ -695,7 +699,7 @@ def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM", let isBranch = 1 in { def S_BRANCH : SOPP < - 0x00000002, (ins brtarget:$target), "S_BRANCH", + 0x00000002, (ins brtarget:$target), "S_BRANCH $target", [(br bb:$target)]> { let isBarrier = 1; } @@ -703,35 +707,35 @@ def S_BRANCH : SOPP < let DisableEncoding = "$scc" in { def S_CBRANCH_SCC0 : SOPP < 0x00000004, (ins brtarget:$target, SCCReg:$scc), - "S_CBRANCH_SCC0", [] + "S_CBRANCH_SCC0 $target", [] >; def S_CBRANCH_SCC1 : SOPP < 0x00000005, (ins brtarget:$target, SCCReg:$scc), - "S_CBRANCH_SCC1", + "S_CBRANCH_SCC1 $target", [] >; } // End DisableEncoding = "$scc" def S_CBRANCH_VCCZ : SOPP < 0x00000006, (ins brtarget:$target, VCCReg:$vcc), - "S_CBRANCH_VCCZ", + "S_CBRANCH_VCCZ $target", [] >; def S_CBRANCH_VCCNZ : SOPP < 0x00000007, (ins brtarget:$target, VCCReg:$vcc), - "S_CBRANCH_VCCNZ", + "S_CBRANCH_VCCNZ $target", [] >; let DisableEncoding = "$exec" in { def S_CBRANCH_EXECZ : SOPP < 0x00000008, (ins brtarget:$target, EXECReg:$exec), - "S_CBRANCH_EXECZ", + "S_CBRANCH_EXECZ $target", [] >; def S_CBRANCH_EXECNZ : SOPP < 0x00000009, (ins brtarget:$target, EXECReg:$exec), - "S_CBRANCH_EXECNZ", + "S_CBRANCH_EXECNZ $target", [] >; } // End DisableEncoding = "$exec" @@ -758,80 +762,101 @@ def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), - (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32", + (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc), + "V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]", [] >{ let DisableEncoding = "$vcc"; } def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst), - (ins VReg_32:$src0, VReg_32:$src1, SReg_1:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), - "V_CNDMASK_B32_e64", - [(set (i32 VReg_32:$dst), (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0))] + (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2, + InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), + "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", + [(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2), + VSrc_32:$src1, VSrc_32:$src0))] >; //f32 pattern for V_CNDMASK_B32_e64 def : Pat < - (f32 (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0)), - (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_1:$src2) + (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)), + (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2) >; defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; -defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", []>; -def : Pat < - (f32 (fadd AllReg_32:$src0, VReg_32:$src1)), - (V_ADD_F32_e32 AllReg_32:$src0, VReg_32:$src1) +let isCommutable = 1 in { +defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", + [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))] >; +} // End isCommutable = 1 -defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", []>; -def : Pat < - (f32 (fsub AllReg_32:$src0, VReg_32:$src1)), - (V_SUB_F32_e32 AllReg_32:$src0, VReg_32:$src1) +defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", + [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))] >; + defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; + +let isCommutable = 1 in { + defm V_MUL_LEGACY_F32 : VOP2_32 < 0x00000007, "V_MUL_LEGACY_F32", - [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))] >; defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", - [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))] >; + +} // End isCommutable = 1 + //defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>; //defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>; //defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>; //defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; + +let isCommutable = 1 in { + defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", - [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))] >; defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", - [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))] >; + defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; + +} // End isCommutable = 1 + defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; + +let isCommutable = 1 in { + defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", - [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))] >; defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", - [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))] >; defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", - [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))] >; + +} // End isCommutable = 1 + defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>; defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; @@ -840,23 +865,30 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; let Defs = [VCC] in { // Carry-out goes to VCC -defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32", - [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] + +let isCommutable = 1 in { +defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", + [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; -defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32", - [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] +} // End isCommutable = 1 + +defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", + [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; + +defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>; +let Uses = [VCC] in { // Carry-out comes from VCC +defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; +defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; +defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>; +} // End Uses = [VCC] } // End Defs = [VCC] -defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>; -defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>; -defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>; -defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>; defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>; defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", - [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))] + [(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))] >; ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; @@ -926,6 +958,10 @@ def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; +def : Pat < + (mul VSrc_32:$src0, VReg_32:$src1), + (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) +>; def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; @@ -949,27 +985,35 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; def S_CSELECT_B32 : SOP2 < 0x0000000a, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", - [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))] + [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc), + SReg_32:$src0, SReg_32:$src1))] >; def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; // f32 pattern for S_CSELECT_B32 def : Pat < - (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)), + (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)), (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc) >; def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", - [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))] + [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))] >; -def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64", - [(set SReg_1:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))] + +def : Pat < + (i1 (and SSrc_64:$src0, SSrc_64:$src1)), + (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1) >; + def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; +def : Pat < + (i1 (or SSrc_64:$src0, SSrc_64:$src1)), + (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1) +>; def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>; def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>; @@ -998,54 +1042,12 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; //def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; -class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI < - (outs VReg_32:$dst), - (ins immType:$src0), - "V_MOV_IMM", - [(set VReg_32:$dst, (immNode:$src0))] ->; - -let isCodeGenOnly = 1, isPseudo = 1 in { - -def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>; -def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>; - -def S_MOV_IMM_I32 : InstSI < - (outs SReg_32:$dst), - (ins i32imm:$src0), - "S_MOV_IMM_I32", - [(set SReg_32:$dst, (imm:$src0))] ->; - -// i64 immediates aren't really supported in hardware, but LLVM will use the i64 -// type for indices on load and store instructions. The pattern for -// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits, -// which the hardware can handle. -def S_MOV_IMM_I64 : InstSI < - (outs SReg_64:$dst), - (ins i64imm:$src0), - "S_MOV_IMM_I64 $dst, $src0", - [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))] ->; - -} // End isCodeGenOnly, isPseudo = 1 - -class SI_LOAD_LITERAL<Operand ImmType> : - Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> { - - bits<32> imm; - let Inst{31-0} = imm; -} - -def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>; -def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>; - let isCodeGenOnly = 1, isPseudo = 1 in { def SET_M0 : InstSI < (outs SReg_32:$dst), (ins i32imm:$src0), - "SET_M0", + "SET_M0 $dst, $src0", [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))] >; @@ -1058,13 +1060,6 @@ def LOAD_CONST : AMDGPUShaderInst < let usesCustomInserter = 1 in { -def SI_V_CNDLT : InstSI < - (outs VReg_32:$dst), - (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2), - "SI_V_CNDLT $dst, $src0, $src1, $src2", - [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))] ->; - def SI_INTERP : InstSI < (outs VReg_32:$dst), (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), @@ -1072,21 +1067,6 @@ def SI_INTERP : InstSI < [] >; -def SI_INTERP_CONST : InstSI < - (outs VReg_32:$dst), - (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), - "SI_INTERP_CONST $dst, $attr_chan, $attr, $params", - [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan, - imm:$attr, SReg_32:$params))] ->; - -def SI_KIL : InstSI < - (outs), - (ins VReg_32:$src), - "SI_KIL $src", - [(int_AMDGPU_kill VReg_32:$src)] ->; - def SI_WQM : InstSI < (outs), (ins), @@ -1106,15 +1086,15 @@ let isBranch = 1, isTerminator = 1 in { def SI_IF : InstSI < (outs SReg_64:$dst), - (ins SReg_1:$vcc, brtarget:$target), - "SI_IF", - [(set SReg_64:$dst, (int_SI_if SReg_1:$vcc, bb:$target))] + (ins SReg_64:$vcc, brtarget:$target), + "SI_IF $dst, $vcc, $target", + [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))] >; def SI_ELSE : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target), - "SI_ELSE", + "SI_ELSE $dst, $src, $target", [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> { let Constraints = "$src = $dst"; @@ -1123,7 +1103,7 @@ def SI_ELSE : InstSI < def SI_LOOP : InstSI < (outs), (ins SReg_64:$saved, brtarget:$target), - "SI_LOOP", + "SI_LOOP $saved, $target", [(int_SI_loop SReg_64:$saved, bb:$target)] >; @@ -1132,43 +1112,60 @@ def SI_LOOP : InstSI < def SI_BREAK : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src), - "SI_ELSE", + "SI_ELSE $dst, $src", [(set SReg_64:$dst, (int_SI_break SReg_64:$src))] >; def SI_IF_BREAK : InstSI < (outs SReg_64:$dst), - (ins SReg_1:$vcc, SReg_64:$src), - "SI_IF_BREAK", - [(set SReg_64:$dst, (int_SI_if_break SReg_1:$vcc, SReg_64:$src))] + (ins SReg_64:$vcc, SReg_64:$src), + "SI_IF_BREAK $dst, $vcc, $src", + [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))] >; def SI_ELSE_BREAK : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), - "SI_ELSE_BREAK", + "SI_ELSE_BREAK $dst, $src0, $src1", [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))] >; def SI_END_CF : InstSI < (outs), (ins SReg_64:$saved), - "SI_END_CF", + "SI_END_CF $saved", [(int_SI_end_cf SReg_64:$saved)] >; +def SI_KILL : InstSI < + (outs), + (ins VReg_32:$src), + "SI_KIL $src", + [(int_AMDGPU_kill VReg_32:$src)] +>; + } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 // Uses = [EXEC], Defs = [EXEC] } // end IsCodeGenOnly, isPseudo +def : Pat< + (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2), + (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0)) +>; + +def : Pat < + (int_AMDGPU_kilp), + (SI_KILL (V_MOV_B32_e32 0xbf800000)) +>; + /* int_SI_vs_load_input */ def : Pat< (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, VReg_32:$buf_idx_vgpr), (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0, VReg_32:$buf_idx_vgpr, SReg_128:$tlst, - 0, 0, (i32 SREG_LIT_0)) + 0, 0, 0) >; /* int_SI_export */ @@ -1179,43 +1176,101 @@ def : Pat < VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3) >; -/* int_SI_sample */ -def : Pat < - (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), - (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, - SReg_256:$rsrc, SReg_128:$sampler) ->; -/* int_SI_sample_lod */ +/* int_SI_sample for simple 1D texture lookup */ def : Pat < - (int_SI_sample_lod imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), - (IMAGE_SAMPLE_L imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, - SReg_256:$rsrc, SReg_128:$sampler) + (int_SI_sample imm:$writemask, (v1i32 VReg_32:$addr), + SReg_256:$rsrc, SReg_128:$sampler, imm), + (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, + (i32 (COPY_TO_REGCLASS VReg_32:$addr, VReg_32)), + SReg_256:$rsrc, SReg_128:$sampler) >; -/* int_SI_sample_bias */ -def : Pat < - (int_SI_sample_bias imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), - (IMAGE_SAMPLE_B imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, - SReg_256:$rsrc, SReg_128:$sampler) ->; +class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, + ValueType addr_type> : Pat < + (name imm:$writemask, (addr_type addr_class:$addr), + SReg_256:$rsrc, SReg_128:$sampler, imm), + (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, + (EXTRACT_SUBREG addr_class:$addr, sub0), + SReg_256:$rsrc, SReg_128:$sampler) +>; + +class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, + ValueType addr_type> : Pat < + (name imm:$writemask, (addr_type addr_class:$addr), + SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT), + (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, + (EXTRACT_SUBREG addr_class:$addr, sub0), + SReg_256:$rsrc, SReg_128:$sampler) +>; + +class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, + ValueType addr_type> : Pat < + (name imm:$writemask, (addr_type addr_class:$addr), + SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY), + (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, + (EXTRACT_SUBREG addr_class:$addr, sub0), + SReg_256:$rsrc, SReg_128:$sampler) +>; + +class SampleShadowPattern<Intrinsic name, MIMG opcode, + RegisterClass addr_class, ValueType addr_type> : Pat < + (name imm:$writemask, (addr_type addr_class:$addr), + SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW), + (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, + (EXTRACT_SUBREG addr_class:$addr, sub0), + SReg_256:$rsrc, SReg_128:$sampler) +>; + +class SampleShadowArrayPattern<Intrinsic name, MIMG opcode, + RegisterClass addr_class, ValueType addr_type> : Pat < + (name imm:$writemask, (addr_type addr_class:$addr), + SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY), + (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, + (EXTRACT_SUBREG addr_class:$addr, sub0), + SReg_256:$rsrc, SReg_128:$sampler) +>; + +/* int_SI_sample* for texture lookups consuming more address parameters */ +multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> { + def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>; + def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>; + def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>; + def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>; + def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>; + + def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>; + def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>; + def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>; + def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>; + + def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>; + def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>; + def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>; + def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>; +} -def CLAMP_SI : CLAMP<VReg_32>; -def FABS_SI : FABS<VReg_32>; -def FNEG_SI : FNEG<VReg_32>; +defm : SamplePatterns<VReg_64, v2i32>; +defm : SamplePatterns<VReg_128, v4i32>; +defm : SamplePatterns<VReg_256, v8i32>; +defm : SamplePatterns<VReg_512, v16i32>; -def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>; -def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>; -def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>; -def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>; +def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>; +def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>; +def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>; +def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>; +def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>; +def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>; +def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>; +def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>; +def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>; +def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>; def : Vector_Build <v4f32, VReg_128, f32, VReg_32>; -def : Vector_Build <v4i32, SReg_128, i32, SReg_32>; +def : Vector_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>; +def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>; def : BitConvert <i32, f32, SReg_32>; def : BitConvert <i32, f32, VReg_32>; @@ -1223,24 +1278,68 @@ def : BitConvert <i32, f32, VReg_32>; def : BitConvert <f32, i32, SReg_32>; def : BitConvert <f32, i32, VReg_32>; +/********** =================== **********/ +/********** Src & Dst modifiers **********/ +/********** =================== **********/ + +def : Pat < + (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)), + (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */), + 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) +>; + +def : Pat < + (fabs VReg_32:$src), + (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */), + 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) +>; + +def : Pat < + (fneg VReg_32:$src), + (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */), + 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */) +>; + +/********** ================== **********/ +/********** Immediate Patterns **********/ +/********** ================== **********/ + +def : Pat < + (i1 imm:$imm), + (S_MOV_B64 imm:$imm) +>; + def : Pat < - (i64 (SIsreg1_bitcast SReg_1:$vcc)), - (S_MOV_B64 (COPY_TO_REGCLASS SReg_1:$vcc, SReg_64)) + (i32 imm:$imm), + (V_MOV_B32_e32 imm:$imm) >; def : Pat < - (i1 (SIsreg1_bitcast SReg_64:$vcc)), - (COPY_TO_REGCLASS SReg_64:$vcc, SReg_1) + (f32 fpimm:$imm), + (V_MOV_B32_e32 fpimm:$imm) >; def : Pat < - (i64 (SIvcc_bitcast VCCReg:$vcc)), - (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64)) + (i32 imm:$imm), + (S_MOV_B32 imm:$imm) >; def : Pat < - (i1 (SIvcc_bitcast SReg_64:$vcc)), - (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg) + (f32 fpimm:$imm), + (S_MOV_B32 fpimm:$imm) +>; + +def : Pat < + (i64 InlineImm<i64>:$imm), + (S_MOV_B64 InlineImm<i64>:$imm) +>; + +// i64 immediates aren't supported in hardware, split it into two 32bit values +def : Pat < + (i64 imm:$imm), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_MOV_B32 (i32 (LO32 imm:$imm))), sub0), + (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1) >; /********** ===================== **********/ @@ -1248,6 +1347,12 @@ def : Pat < /********** ===================== **********/ def : Pat < + (int_SI_fs_interp_constant imm:$attr_chan, imm:$attr, SReg_32:$params), + (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, + (S_MOV_B32 SReg_32:$params)) +>; + +def : Pat < (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params), (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan, imm:$attr, SReg_32:$params) @@ -1305,47 +1410,86 @@ def : Pat < def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>; def : Pat < - (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1), - (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1)) + (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1), + (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1)) >; def : Pat< - (fdiv AllReg_32:$src0, AllReg_32:$src1), - (V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1)) + (fdiv VSrc_32:$src0, VSrc_32:$src1), + (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1)) >; def : Pat < - (int_AMDGPU_kilp), - (SI_KIL (V_MOV_IMM_I32 0xbf800000)) + (fcos VSrc_32:$src0), + (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) +>; + +def : Pat < + (fsin VSrc_32:$src0), + (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) >; def : Pat < (int_AMDGPU_cube VReg_128:$src), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), - (EXTRACT_SUBREG VReg_128:$src, sel_y), - (EXTRACT_SUBREG VReg_128:$src, sel_z), - 0, 0, 0, 0), sel_x), - (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), - (EXTRACT_SUBREG VReg_128:$src, sel_y), - (EXTRACT_SUBREG VReg_128:$src, sel_z), - 0, 0, 0, 0), sel_y), - (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), - (EXTRACT_SUBREG VReg_128:$src, sel_y), - (EXTRACT_SUBREG VReg_128:$src, sel_z), - 0, 0, 0, 0), sel_z), - (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), - (EXTRACT_SUBREG VReg_128:$src, sel_y), - (EXTRACT_SUBREG VReg_128:$src, sel_z), - 0, 0, 0, 0), sel_w) + (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), + (EXTRACT_SUBREG VReg_128:$src, sub1), + (EXTRACT_SUBREG VReg_128:$src, sub2), + 0, 0, 0, 0), sub0), + (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), + (EXTRACT_SUBREG VReg_128:$src, sub1), + (EXTRACT_SUBREG VReg_128:$src, sub2), + 0, 0, 0, 0), sub1), + (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), + (EXTRACT_SUBREG VReg_128:$src, sub1), + (EXTRACT_SUBREG VReg_128:$src, sub2), + 0, 0, 0, 0), sub2), + (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), + (EXTRACT_SUBREG VReg_128:$src, sub1), + (EXTRACT_SUBREG VReg_128:$src, sub2), + 0, 0, 0, 0), sub3) +>; + +def : Pat < + (i32 (sext (i1 SReg_64:$src0))), + (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0) >; /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ -def : Pat <(f32 (IL_mad AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2)), - (V_MAD_LEGACY_F32 AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, +def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)), + (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2, 0, 0, 0, 0)>; +/********** ================== **********/ +/********** SMRD Patterns **********/ +/********** ================== **********/ + +multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> { + // 1. Offset as 8bit DWORD immediate + def : Pat < + (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)), + (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset)) + >; + + // 2. Offset loaded in an 32bit SGPR + def : Pat < + (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)), + (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset))) + >; + + // 3. No offset at all + def : Pat < + (constant_load SReg_64:$sbase), + (vt (Instr_IMM SReg_64:$sbase, 0)) + >; +} + +defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>; +defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; +defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>; +defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>; + } // End isSI predicate diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index c322fef..611b9c4 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -22,9 +22,11 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; def int_SI_wqm : Intrinsic <[], [], []>; - def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; - def int_SI_sample_bias : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; - def int_SI_sample_lod : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrReadMem]>; + + def int_SI_sample : Sample; + def int_SI_sampleb : Sample; + def int_SI_samplel : Sample; /* Interpolation Intrinsics */ diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 3fbe653..b215aa2 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -68,7 +68,10 @@ private: static char ID; const TargetInstrInfo *TII; - void Skip(MachineInstr &MI, MachineOperand &To); + bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); + + void Skip(MachineInstr &From, MachineOperand &To); + void SkipIfDead(MachineInstr &MI); void If(MachineInstr &MI); void Else(MachineInstr &MI); @@ -78,6 +81,7 @@ private: void Loop(MachineInstr &MI); void EndCf(MachineInstr &MI); + void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); public: @@ -100,22 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { return new SILowerControlFlowPass(tm); } -void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { +bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From, + MachineBasicBlock *To) { + unsigned NumInstr = 0; - for (MachineBasicBlock *MBB = *From.getParent()->succ_begin(); - NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty(); + for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty(); MBB = *MBB->succ_begin()) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); NumInstr < SkipThreshold && I != E; ++I) { if (I->isBundle() || !I->isBundled()) - ++NumInstr; + if (++NumInstr >= SkipThreshold) + return true; } } - if (NumInstr < SkipThreshold) + return false; +} + +void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { + + if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB())) return; DebugLoc DL = From.getDebugLoc(); @@ -124,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { .addReg(AMDGPU::EXEC); } +void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + if (!shouldSkip(&MBB, &MBB.getParent()->back())) + return; + + MachineBasicBlock::iterator Insert = &MI; + ++Insert; + + // If the exec mask is non-zero, skip the next two instructions + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(3) + .addReg(AMDGPU::EXEC); + + // Exec mask is zero: Export to NULL target... + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP)) + .addImm(0) + .addImm(0x09) // V_008DFC_SQ_EXP_NULL + .addImm(0) + .addImm(1) + .addImm(1) + .addReg(AMDGPU::VGPR0) + .addReg(AMDGPU::VGPR0) + .addReg(AMDGPU::VGPR0) + .addReg(AMDGPU::VGPR0); + + // ... and terminate wavefront + BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); +} + void SILowerControlFlowPass::If(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); @@ -242,8 +285,27 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) { assert(0); } +void SILowerControlFlowPass::Kill(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + // Kill is only allowed in pixel shaders + assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType == + ShaderType::PIXEL); + + // Clear this pixel from the exec mask if the operand is negative + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) + .addImm(0) + .addOperand(MI.getOperand(0)); + + MI.eraseFromParent(); +} + bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { - bool HaveCf = false; + + bool HaveKill = false; + unsigned Depth = 0; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { @@ -257,6 +319,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { switch (MI.getOpcode()) { default: break; case AMDGPU::SI_IF: + ++Depth; If(MI); break; @@ -277,14 +340,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { break; case AMDGPU::SI_LOOP: + ++Depth; Loop(MI); break; case AMDGPU::SI_END_CF: - HaveCf = true; + if (--Depth == 0 && HaveKill) { + SkipIfDead(MI); + HaveKill = false; + } EndCf(MI); break; + case AMDGPU::SI_KILL: + if (Depth == 0) + SkipIfDead(MI); + else + HaveKill = true; + Kill(MI); + break; + case AMDGPU::S_BRANCH: Branch(MI); break; @@ -292,40 +367,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { } } - // TODO: What is this good for? - unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType; - if (HaveCf && ShaderType == ShaderType::PIXEL) { - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; ++BI) { - - MachineBasicBlock &MBB = *BI; - if (MBB.succ_empty()) { - - MachineInstr &MI = *MBB.getFirstNonPHI(); - DebugLoc DL = MI.getDebugLoc(); - - // If the exec mask is non-zero, skip the next two instructions - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addImm(3) - .addReg(AMDGPU::EXEC); - - // Exec mask is zero: Export to NULL target... - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP)) - .addImm(0) - .addImm(0x09) // V_008DFC_SQ_EXP_NULL - .addImm(0) - .addImm(1) - .addImm(1) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0) - .addReg(AMDGPU::SREG_LIT_0); - - // ... and terminate wavefront - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM)); - } - } - } - return true; } diff --git a/lib/Target/R600/SILowerLiteralConstants.cpp b/lib/Target/R600/SILowerLiteralConstants.cpp deleted file mode 100644 index c0411e9..0000000 --- a/lib/Target/R600/SILowerLiteralConstants.cpp +++ /dev/null @@ -1,108 +0,0 @@ -//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief This pass performs the following transformation on instructions with -/// literal constants: -/// -/// %VGPR0 = V_MOV_IMM_I32 1 -/// -/// becomes: -/// -/// BUNDLE -/// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT -/// * SI_LOAD_LITERAL 1 -/// -/// The resulting sequence matches exactly how the hardware handles immediate -/// operands, so this transformation greatly simplifies the code generator. -/// -/// Only the *_MOV_IMM_* support immediate operands at the moment, but when -/// support for immediate operands is added to other instructions, they -/// will be lowered here as well. -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineInstrBundle.h" - -using namespace llvm; - -namespace { - -class SILowerLiteralConstantsPass : public MachineFunctionPass { - -private: - static char ID; - const TargetInstrInfo *TII; - -public: - SILowerLiteralConstantsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { - return "SI Lower literal constants pass"; - } -}; - -} // End anonymous namespace - -char SILowerLiteralConstantsPass::ID = 0; - -FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) { - return new SILowerLiteralConstantsPass(tm); -} - -bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) { - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next) { - Next = llvm::next(I); - MachineInstr &MI = *I; - switch (MI.getOpcode()) { - default: break; - case AMDGPU::S_MOV_IMM_I32: - case AMDGPU::S_MOV_IMM_I64: - case AMDGPU::V_MOV_IMM_F32: - case AMDGPU::V_MOV_IMM_I32: { - unsigned MovOpcode; - unsigned LoadLiteralOpcode; - MachineOperand LiteralOp = MI.getOperand(1); - if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) { - MovOpcode = AMDGPU::V_MOV_B32_e32; - } else { - MovOpcode = AMDGPU::S_MOV_B32; - } - if (LiteralOp.isImm()) { - LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32; - } else { - LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32; - } - MIBundleBuilder Bundle(MBB, I); - Bundle - .append(BuildMI(MF, MBB.findDebugLoc(I), TII->get(MovOpcode), - MI.getOperand(0).getReg()) - .addReg(AMDGPU::SI_LITERAL_CONSTANT)) - .append(BuildMI(MF, MBB.findDebugLoc(I), - TII->get(LoadLiteralOpcode)) - .addOperand(MI.getOperand(1))); - llvm::finalizeBundle(MBB, Bundle.begin()); - MI.eraseFromParent(); - break; - } - } - } - } - return false; -} diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index c3f1361..9e04e24 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -1,44 +1,40 @@ - -let Namespace = "AMDGPU" in { - def low : SubRegIndex; - def high : SubRegIndex; - - def sub0 : SubRegIndex; - def sub1 : SubRegIndex; - def sub2 : SubRegIndex; - def sub3 : SubRegIndex; - def sub4 : SubRegIndex; - def sub5 : SubRegIndex; - def sub6 : SubRegIndex; - def sub7 : SubRegIndex; -} +//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the SI registers +//===----------------------------------------------------------------------===// class SIReg <string n, bits<16> encoding = 0> : Register<n> { let Namespace = "AMDGPU"; let HWEncoding = encoding; } -class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> { - let Namespace = "AMDGPU"; - let SubRegIndices = [low, high]; - let HWEncoding = encoding; -} - -class SGPR_32 <bits<16> num, string name> : SIReg<name, num>; - -class VGPR_32 <bits<16> num, string name> : SIReg<name, num>; - // Special Registers def VCC : SIReg<"VCC", 106>; -def EXEC_LO : SIReg <"EXEC LO", 126>; -def EXEC_HI : SIReg <"EXEC HI", 127>; -def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>; +def EXEC : SIReg<"EXEC", 126>; def SCC : SIReg<"SCC", 253>; -def SREG_LIT_0 : SIReg <"S LIT 0", 128>; -def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT", 255>; def M0 : SIReg <"M0", 124>; -//Interpolation registers +// SGPR registers +foreach Index = 0-101 in { + def SGPR#Index : SIReg <"SGPR"#Index, Index>; +} + +// VGPR registers +foreach Index = 0-255 in { + def VGPR#Index : SIReg <"VGPR"#Index, Index> { + let HWEncoding{8} = 1; + } +} + +// virtual Interpolation registers def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">; def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">; def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">; @@ -64,73 +60,150 @@ def ANCILLARY : SIReg <"ANCILLARY">; def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">; def POS_FIXED_PT : SIReg <"POS_FIXED_PT">; -// SGPR 32-bit registers -foreach Index = 0-101 in { - def SGPR#Index : SGPR_32 <Index, "SGPR"#Index>; -} +//===----------------------------------------------------------------------===// +// Groupings using register classes and tuples +//===----------------------------------------------------------------------===// +// SGPR 32-bit registers def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add (sequence "SGPR%u", 0, 101))>; // SGPR 64-bit registers -def SGPR_64 : RegisterTuples<[low, high], - [(add (decimate SGPR_32, 2)), - (add(decimate (rotl SGPR_32, 1), 2))]>; +def SGPR_64 : RegisterTuples<[sub0, sub1], + [(add (decimate (trunc SGPR_32, 101), 2)), + (add (decimate (shl SGPR_32, 1), 2))]>; // SGPR 128-bit registers -def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w], - [(add (decimate SGPR_32, 4)), - (add (decimate (rotl SGPR_32, 1), 4)), - (add (decimate (rotl SGPR_32, 2), 4)), - (add (decimate (rotl SGPR_32, 3), 4))]>; +def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], + [(add (decimate (trunc SGPR_32, 99), 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4))]>; // SGPR 256-bit registers def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], - [(add (decimate SGPR_32, 8)), - (add (decimate (rotl SGPR_32, 1), 8)), - (add (decimate (rotl SGPR_32, 2), 8)), - (add (decimate (rotl SGPR_32, 3), 8)), - (add (decimate (rotl SGPR_32, 4), 8)), - (add (decimate (rotl SGPR_32, 5), 8)), - (add (decimate (rotl SGPR_32, 6), 8)), - (add (decimate (rotl SGPR_32, 7), 8))]>; + [(add (decimate (trunc SGPR_32, 95), 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4)), + (add (decimate (shl SGPR_32, 4), 4)), + (add (decimate (shl SGPR_32, 5), 4)), + (add (decimate (shl SGPR_32, 6), 4)), + (add (decimate (shl SGPR_32, 7), 4))]>; + +// SGPR 512-bit registers +def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], + [(add (decimate (trunc SGPR_32, 87), 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4)), + (add (decimate (shl SGPR_32, 4), 4)), + (add (decimate (shl SGPR_32, 5), 4)), + (add (decimate (shl SGPR_32, 6), 4)), + (add (decimate (shl SGPR_32, 7), 4)), + (add (decimate (shl SGPR_32, 8), 4)), + (add (decimate (shl SGPR_32, 9), 4)), + (add (decimate (shl SGPR_32, 10), 4)), + (add (decimate (shl SGPR_32, 11), 4)), + (add (decimate (shl SGPR_32, 12), 4)), + (add (decimate (shl SGPR_32, 13), 4)), + (add (decimate (shl SGPR_32, 14), 4)), + (add (decimate (shl SGPR_32, 15), 4))]>; // VGPR 32-bit registers -foreach Index = 0-255 in { - def VGPR#Index : VGPR_32 <Index, "VGPR"#Index>; -} - def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add (sequence "VGPR%u", 0, 255))>; // VGPR 64-bit registers -def VGPR_64 : RegisterTuples<[low, high], - [(add VGPR_32), - (add (rotl VGPR_32, 1))]>; +def VGPR_64 : RegisterTuples<[sub0, sub1], + [(add (trunc VGPR_32, 255)), + (add (shl VGPR_32, 1))]>; // VGPR 128-bit registers -def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w], - [(add VGPR_32), - (add (rotl VGPR_32, 1)), - (add (rotl VGPR_32, 2)), - (add (rotl VGPR_32, 3))]>; +def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], + [(add (trunc VGPR_32, 253)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3))]>; + +// VGPR 256-bit registers +def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], + [(add (trunc VGPR_32, 249)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3)), + (add (shl VGPR_32, 4)), + (add (shl VGPR_32, 5)), + (add (shl VGPR_32, 6)), + (add (shl VGPR_32, 7))]>; + +// VGPR 512-bit registers +def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], + [(add (trunc VGPR_32, 241)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3)), + (add (shl VGPR_32, 4)), + (add (shl VGPR_32, 5)), + (add (shl VGPR_32, 6)), + (add (shl VGPR_32, 7)), + (add (shl VGPR_32, 8)), + (add (shl VGPR_32, 9)), + (add (shl VGPR_32, 10)), + (add (shl VGPR_32, 11)), + (add (shl VGPR_32, 12)), + (add (shl VGPR_32, 13)), + (add (shl VGPR_32, 14)), + (add (shl VGPR_32, 15))]>; + +//===----------------------------------------------------------------------===// +// Register classes used as source and destination +//===----------------------------------------------------------------------===// + +// Special register classes for predicates and the M0 register +def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>; +def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>; +def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>; +def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, - (add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI) + (add SGPR_32, M0Reg) >; -def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>; - -def SReg_1 : RegisterClass<"AMDGPU", [i1], 1, (add VCC, SGPR_64, EXEC)>; +def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64, + (add SGPR_64, VCCReg, EXECReg) +>; def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>; +def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>; + // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, - (add VGPR_32, +def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>; + +def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; + +def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; + +def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; + +def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; + +//===----------------------------------------------------------------------===// +// [SV]Src_* register classes, can have either an immediate or an register +//===----------------------------------------------------------------------===// + +def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; + +def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>; + +def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, + (add VReg_32, SReg_32, PERSP_SAMPLE_I, PERSP_SAMPLE_J, PERSP_CENTER_I, PERSP_CENTER_J, PERSP_CENTROID_I, PERSP_CENTROID_J, @@ -147,21 +220,8 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, ANCILLARY, SAMPLE_COVERAGE, POS_FIXED_PT - ) + ) >; -def VReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add VGPR_64)>; - -def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128, (add VGPR_128)>; - -// AllReg_* - A set of all scalar and vector registers of a given width. -def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>; - -def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64, (add SReg_64, VReg_64)>; - -// Special register classes for predicates and the M0 register -def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>; -def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>; -def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>; -def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; +def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index f5e10fc..3d4bfdc 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -21,8 +21,9 @@ void SparcELFMCAsmInfo::anchor() { } SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) { IsLittleEndian = false; Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::sparcv9) - PointerSize = 8; + if (TheTriple.getArch() == Triple::sparcv9) { + PointerSize = CalleeSaveStackSlotSize = 8; + } Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index 6c47c70..a0dae6e 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -67,6 +67,22 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { } } +void SparcFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + MachineInstr &MI = *I; + DebugLoc dl = MI.getDebugLoc(); + int Size = MI.getOperand(0).getImm(); + if (MI.getOpcode() == SP::ADJCALLSTACKDOWN) + Size = -Size; + const SparcInstrInfo &TII = + *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo()); + if (Size) + BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size); + MBB.erase(I); +} + + void SparcFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index 6b593c9..464233e 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -32,6 +32,10 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool hasFP(const MachineFunction &MF) const { return false; } }; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 168640f..138b92d 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -95,15 +95,10 @@ SparcTargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (MF.getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); + // Make room for the return address offset. + RetOps.push_back(SDValue()); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -115,6 +110,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } unsigned RetAddrOffset = 8; //Call Inst + Delay Slot @@ -127,18 +123,19 @@ SparcTargetLowering::LowerReturn(SDValue Chain, SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag); Flag = Chain.getValue(1); - if (MF.getRegInfo().liveout_empty()) - MF.getRegInfo().addLiveOut(SP::I0); + RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy())); RetAddrOffset = 12; // CallInst + Delay Slot + Unimp } - SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32); + RetOps[0] = Chain; // Update chain. + RetOps[1] = DAG.getConstant(RetAddrOffset, MVT::i32); + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, - RetAddrOffsetNode, Flag); - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, - RetAddrOffsetNode); + RetOps.push_back(Flag); + + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, + &RetOps[0], RetOps.size()); } /// LowerFormalArguments - V8 uses a very simple ABI, where all values are @@ -759,10 +756,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index e64c140..90b698d 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -126,7 +126,7 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall, def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def flushw : SDNode<"SPISD::FLUSHW", SDTNone, [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 9c1c30b..25e90b7 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -56,45 +56,27 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -void SparcRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - MachineInstr &MI = *I; - DebugLoc dl = MI.getDebugLoc(); - int Size = MI.getOperand(0).getImm(); - if (MI.getOpcode() == SP::ADJCALLSTACKDOWN) - Size = -Size; - if (Size) - BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size); - MBB.erase(I); -} - void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); - unsigned i = 0; MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Addressable stack objects are accessed using neg. offsets from %fp MachineFunction &MF = *MI.getParent()->getParent(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(i+1).getImm(); + MI.getOperand(FIOperandNum + 1).getImm(); // Replace frame index with a frame pointer reference. if (Offset >= -4096 && Offset <= 4095) { // If the offset is small enough to fit in the immediate field, directly // encode it. - MI.getOperand(i).ChangeToRegister(SP::I6, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(SP::I6, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } else { // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to // scavenge a register here instead of reserving G1 all of the time. @@ -104,8 +86,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) .addReg(SP::I6); // Insert: G1+%lo(offset) into the user. - MI.getOperand(i).ChangeToRegister(SP::G1, false); - MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1)); + MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1)); } } diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 9515ad3..357879b 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -36,12 +36,9 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index ca438eb..b2c6d55 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -57,7 +57,7 @@ private: X86Operand *ParseATTOperand(); X86Operand *ParseIntelOperand(); X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); - X86Operand *ParseIntelTypeOperator(SMLoc StartLoc); + X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind); X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc); X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); @@ -168,6 +168,7 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SMLoc OffsetOfLoc; + bool AddressOf; union { struct { @@ -340,6 +341,10 @@ struct X86Operand : public MCParsedAsmOperand { return OffsetOfLoc.getPointer(); } + bool needAddressOf() const { + return AddressOf; + } + bool needSizeDirective() const { assert(Kind == Memory && "Invalid access!"); return Mem.NeedSizeDir; @@ -471,9 +476,11 @@ struct X86Operand : public MCParsedAsmOperand { } static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, + bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc()) { X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; + Res->AddressOf = AddressOf; Res->OffsetOfLoc = OffsetOfLoc; return Res; } @@ -488,7 +495,7 @@ struct X86Operand : public MCParsedAsmOperand { /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false){ + unsigned Size = 0, bool NeedSizeDir = false) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -497,6 +504,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.Scale = 1; Res->Mem.Size = Size; Res->Mem.NeedSizeDir = NeedSizeDir; + Res->AddressOf = false; return Res; } @@ -520,6 +528,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.Scale = Scale; Res->Mem.Size = Size; Res->Mem.NeedSizeDir = NeedSizeDir; + Res->AddressOf = false; return Res; } }; @@ -675,115 +684,299 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { return Size; } +enum IntelBracExprState { + IBES_START, + IBES_LBRAC, + IBES_RBRAC, + IBES_REGISTER, + IBES_REGISTER_STAR, + IBES_REGISTER_STAR_INTEGER, + IBES_INTEGER, + IBES_INTEGER_STAR, + IBES_INDEX_REGISTER, + IBES_IDENTIFIER, + IBES_DISP_EXPR, + IBES_MINUS, + IBES_ERROR +}; + +class IntelBracExprStateMachine { + IntelBracExprState State; + unsigned BaseReg, IndexReg, Scale; + int64_t Disp; + + unsigned TmpReg; + int64_t TmpInteger; + + bool isPlus; + +public: + IntelBracExprStateMachine(MCAsmParser &parser) : + State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(0), + TmpReg(0), TmpInteger(0), isPlus(true) {} + + unsigned getBaseReg() { return BaseReg; } + unsigned getIndexReg() { return IndexReg; } + unsigned getScale() { return Scale; } + int64_t getDisp() { return Disp; } + bool isValidEndState() { return State == IBES_RBRAC; } + + void onPlus() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_INTEGER: + State = IBES_START; + if (isPlus) + Disp += TmpInteger; + else + Disp -= TmpInteger; + break; + case IBES_REGISTER: + State = IBES_START; + // If we already have a BaseReg, then assume this is the IndexReg with a + // scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + break; + case IBES_INDEX_REGISTER: + State = IBES_START; + break; + } + isPlus = true; + } + void onMinus() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_MINUS; + break; + case IBES_INTEGER: + State = IBES_START; + if (isPlus) + Disp += TmpInteger; + else + Disp -= TmpInteger; + break; + case IBES_REGISTER: + State = IBES_START; + // If we already have a BaseReg, then assume this is the IndexReg with a + // scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + break; + case IBES_INDEX_REGISTER: + State = IBES_START; + break; + } + isPlus = false; + } + void onRegister(unsigned Reg) { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_REGISTER; + TmpReg = Reg; + break; + case IBES_INTEGER_STAR: + assert (!IndexReg && "IndexReg already set!"); + State = IBES_INDEX_REGISTER; + IndexReg = Reg; + Scale = TmpInteger; + break; + } + } + void onDispExpr() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_DISP_EXPR; + break; + } + } + void onInteger(int64_t TmpInt) { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_START: + State = IBES_INTEGER; + TmpInteger = TmpInt; + break; + case IBES_MINUS: + State = IBES_INTEGER; + TmpInteger = TmpInt; + break; + case IBES_REGISTER_STAR: + assert (!IndexReg && "IndexReg already set!"); + State = IBES_INDEX_REGISTER; + IndexReg = TmpReg; + Scale = TmpInt; + break; + } + } + void onStar() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_INTEGER: + State = IBES_INTEGER_STAR; + break; + case IBES_REGISTER: + State = IBES_REGISTER_STAR; + break; + } + } + void onLBrac() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_RBRAC: + State = IBES_START; + isPlus = true; + break; + } + } + void onRBrac() { + switch (State) { + default: + State = IBES_ERROR; + break; + case IBES_DISP_EXPR: + State = IBES_RBRAC; + break; + case IBES_INTEGER: + State = IBES_RBRAC; + if (isPlus) + Disp += TmpInteger; + else + Disp -= TmpInteger; + break; + case IBES_REGISTER: + State = IBES_RBRAC; + // If we already have a BaseReg, then assume this is the IndexReg with a + // scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + break; + case IBES_INDEX_REGISTER: + State = IBES_RBRAC; + break; + } + } +}; + X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { - unsigned BaseReg = 0, IndexReg = 0, Scale = 1; const AsmToken &Tok = Parser.getTok(); SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc(); - const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); - // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] - // Eat '[' if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); Parser.Lex(); + unsigned TmpReg = 0; + + // Try to handle '[' 'symbol' ']' if (getLexer().is(AsmToken::Identifier)) { - // Parse BaseReg - if (ParseRegister(BaseReg, Start, End)) { - // Handle '[' 'symbol' ']' - if (getParser().ParseExpression(Disp, End)) return 0; + if (ParseRegister(TmpReg, Start, End)) { + const MCExpr *Disp; + if (getParser().parseExpression(Disp, End)) + return 0; + if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); - End = Parser.getTok().getEndLoc(); + // Adjust the EndLoc due to the ']'. + End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1); Parser.Lex(); return X86Operand::CreateMem(Disp, Start, End, Size); } - } else if (getLexer().is(AsmToken::Integer)) { - int64_t Val = Tok.getIntVal(); - Parser.Lex(); - SMLoc Loc = Tok.getLoc(); - if (getLexer().is(AsmToken::RBrac)) { - // Handle '[' number ']' - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - const MCExpr *Disp = MCConstantExpr::Create(Val, getContext()); - if (SegReg) - return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale, - Start, End, Size); - return X86Operand::CreateMem(Disp, Start, End, Size); - } else if (getLexer().is(AsmToken::Star)) { - // Handle '[' Scale*IndexReg ']' - Parser.Lex(); - SMLoc IdxRegLoc = Tok.getLoc(); - if (ParseRegister(IndexReg, IdxRegLoc, End)) - return ErrorOperand(IdxRegLoc, "Expected register"); - Scale = Val; - } else - return ErrorOperand(Loc, "Unexpected token"); } - // Parse ][ as a plus. - bool ExpectRBrac = true; - if (getLexer().is(AsmToken::RBrac)) { - ExpectRBrac = false; - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - } + // Parse [ BaseReg + Scale*IndexReg + Disp ]. + bool Done = false; + IntelBracExprStateMachine SM(Parser); + + // If we parsed a register, then the end loc has already been set and + // the identifier has already been lexed. We also need to update the + // state. + if (TmpReg) + SM.onRegister(TmpReg); + + const MCExpr *Disp = 0; + while (!Done) { + bool UpdateLocLex = true; - if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) || - getLexer().is(AsmToken::LBrac)) { - ExpectRBrac = true; - bool isPlus = getLexer().is(AsmToken::Plus) || - getLexer().is(AsmToken::LBrac); - Parser.Lex(); - SMLoc PlusLoc = Tok.getLoc(); - if (getLexer().is(AsmToken::Integer)) { + // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an + // identifier. Don't try an parse it as a register. + if (Tok.getString().startswith(".")) + break; + + switch (getLexer().getKind()) { + default: { + if (SM.isValidEndState()) { + Done = true; + break; + } + return ErrorOperand(Tok.getLoc(), "Unexpected token!"); + } + case AsmToken::Identifier: { + // This could be a register or a displacement expression. + if(!ParseRegister(TmpReg, Start, End)) { + SM.onRegister(TmpReg); + UpdateLocLex = false; + break; + } else if (!getParser().parseExpression(Disp, End)) { + SM.onDispExpr(); + UpdateLocLex = false; + break; + } + return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); + } + case AsmToken::Integer: { int64_t Val = Tok.getIntVal(); - Parser.Lex(); - if (getLexer().is(AsmToken::Star)) { - Parser.Lex(); - SMLoc IdxRegLoc = Tok.getLoc(); - if (ParseRegister(IndexReg, IdxRegLoc, End)) - return ErrorOperand(IdxRegLoc, "Expected register"); - Scale = Val; - } else if (getLexer().is(AsmToken::RBrac)) { - const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext()); - Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext()); - } else - return ErrorOperand(PlusLoc, "unexpected token after +"); - } else if (getLexer().is(AsmToken::Identifier)) { - // This could be an index register or a displacement expression. - if (!IndexReg) - ParseRegister(IndexReg, Start, End); - else if (getParser().ParseExpression(Disp, End)) - return 0; + SM.onInteger(Val); + break; } - } - - // Parse ][ as a plus. - if (getLexer().is(AsmToken::RBrac)) { - ExpectRBrac = false; - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - if (getLexer().is(AsmToken::LBrac)) { - ExpectRBrac = true; - Parser.Lex(); - if (getParser().ParseExpression(Disp, End)) - return 0; + case AsmToken::Plus: SM.onPlus(); break; + case AsmToken::Minus: SM.onMinus(); break; + case AsmToken::Star: SM.onStar(); break; + case AsmToken::LBrac: SM.onLBrac(); break; + case AsmToken::RBrac: SM.onRBrac(); break; + } + if (!Done && UpdateLocLex) { + End = Tok.getLoc(); + Parser.Lex(); // Consume the token. } - } else if (ExpectRBrac) { - if (getParser().ParseExpression(Disp, End)) - return 0; } - if (ExpectRBrac) { - if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(End, "expected ']' token!"); - End = Parser.getTok().getEndLoc(); - Parser.Lex(); - } + if (!Disp) + Disp = MCConstantExpr::Create(SM.getDisp(), getContext()); // Parse the dot operator (e.g., [ebx].foo.bar). if (Tok.getString().startswith(".")) { @@ -797,10 +990,18 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, Disp = NewDisp; } + int BaseReg = SM.getBaseReg(); + int IndexReg = SM.getIndexReg(); + // handle [-42] - if (!BaseReg && !IndexReg) - return X86Operand::CreateMem(Disp, Start, End, Size); + if (!BaseReg && !IndexReg) { + if (!SegReg) + return X86Operand::CreateMem(Disp, Start, End); + else + return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); + } + int Scale = SM.getScale(); return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, End, Size); } @@ -832,28 +1033,43 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { } const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); - if (getParser().ParseExpression(Disp, End)) + if (getParser().parseExpression(Disp, End)) return 0; bool NeedSizeDir = false; - if (!Size && isParsingInlineAsm()) { + bool IsVarDecl = false; + if (isParsingInlineAsm()) { if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { const MCSymbol &Sym = SymRef->getSymbol(); // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size); + unsigned tLength, tSize, tType; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, + tSize, tType, IsVarDecl); + if (!Size) + Size = tType * 8; // Size is in terms of bits in this context. NeedSizeDir = Size > 0; } } if (!isParsingInlineAsm()) return X86Operand::CreateMem(Disp, Start, End, Size); - else + else { + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); + } + // When parsing inline assembly we set the base register to a non-zero value // as we don't know the actual value at this time. This is necessary to // get the matching correct in some cases. return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, /*Scale*/1, Start, End, Size, NeedSizeDir); + } } /// Parse the '.' operator. @@ -919,7 +1135,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { SMLoc End; const MCExpr *Val; - if (getParser().ParseExpression(Val, End)) + if (getParser().parseExpression(Val, End)) return ErrorOperand(Start, "Unable to parse expression!"); // Don't emit the offset operator. @@ -929,13 +1145,23 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { // register operand to ensure proper matching. Just pick a GPR based on // the size of a pointer. unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc); + return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, + OffsetOfLoc); } -/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or -/// C++ type or variable. If the variable is an array, TYPE returns the size of -/// a single element of the array. -X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { +enum IntelOperatorKind { + IOK_LENGTH, + IOK_SIZE, + IOK_TYPE +}; + +/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator +/// returns the number of elements in an array. It returns the value 1 for +/// non-array variables. The SIZE operator returns the size of a C or C++ +/// variable. A variable's size is the product of its LENGTH and TYPE. The +/// TYPE operator returns the size of a C or C++ type or variable. If the +/// variable is an array, TYPE returns the size of a single element. +X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { SMLoc TypeLoc = Start; Parser.Lex(); // Eat offset. Start = Parser.getTok().getLoc(); @@ -943,60 +1169,63 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { SMLoc End; const MCExpr *Val; - if (getParser().ParseExpression(Val, End)) + if (getParser().parseExpression(Val, End)) return 0; - unsigned Size = 0; + unsigned Length = 0, Size = 0, Type = 0; if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) { const MCSymbol &Sym = SymRef->getSymbol(); // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. - if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size)) - return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); - - Size /= 8; // Size is in terms of bits, but we want bytes in the context. + bool IsVarDecl; + if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length, + Size, Type, IsVarDecl)) + return ErrorOperand(Start, "Unable to lookup expr!"); + } + unsigned CVal; + switch(OpKind) { + default: llvm_unreachable("Unexpected operand kind!"); + case IOK_LENGTH: CVal = Length; break; + case IOK_SIZE: CVal = Size; break; + case IOK_TYPE: CVal = Type; break; } // Rewrite the type operator and the C or C++ type or variable in terms of an // immediate. E.g. TYPE foo -> $$4 unsigned Len = End.getPointer() - TypeLoc.getPointer(); - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size)); + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); - const MCExpr *Imm = MCConstantExpr::Create(Size, getContext()); + const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); } X86Operand *X86AsmParser::ParseIntelOperand() { SMLoc Start = Parser.getTok().getLoc(), End; - - // offset operator. StringRef AsmTokStr = Parser.getTok().getString(); - if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") && - isParsingInlineAsm()) - return ParseIntelOffsetOfOperator(Start); - - // Type directive. - if ((AsmTokStr == "type" || AsmTokStr == "TYPE") && - isParsingInlineAsm()) - return ParseIntelTypeOperator(Start); - - // Unsupported directives. - if (isParsingIntelSyntax() && - (AsmTokStr == "size" || AsmTokStr == "SIZE" || - AsmTokStr == "length" || AsmTokStr == "LENGTH")) - return ErrorOperand(Start, "Unsupported directive!"); - - // immediate. + + // Offset, length, type and size operators. + if (isParsingInlineAsm()) { + if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") + return ParseIntelOffsetOfOperator(Start); + if (AsmTokStr == "length" || AsmTokStr == "LENGTH") + return ParseIntelOperator(Start, IOK_LENGTH); + if (AsmTokStr == "size" || AsmTokStr == "SIZE") + return ParseIntelOperator(Start, IOK_SIZE); + if (AsmTokStr == "type" || AsmTokStr == "TYPE") + return ParseIntelOperator(Start, IOK_TYPE); + } + + // Immediate. if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { const MCExpr *Val; - if (!getParser().ParseExpression(Val, End)) { + if (!getParser().parseExpression(Val, End)) { return X86Operand::CreateImm(Val, Start, End); } } - // register + // Register. unsigned RegNo = 0; if (!ParseRegister(RegNo, Start, End)) { // If this is a segment register followed by a ':', then this is the start @@ -1008,7 +1237,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { return ParseIntelMemOperand(RegNo, Start); } - // mem operand + // Memory operand. return ParseIntelMemOperand(0, Start); } @@ -1042,7 +1271,7 @@ X86Operand *X86AsmParser::ParseATTOperand() { SMLoc Start = Parser.getTok().getLoc(), End; Parser.Lex(); const MCExpr *Val; - if (getParser().ParseExpression(Val, End)) + if (getParser().parseExpression(Val, End)) return 0; return X86Operand::CreateImm(Val, Start, End); } @@ -1060,7 +1289,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; - if (getParser().ParseExpression(Disp, ExprEnd)) return 0; + if (getParser().parseExpression(Disp, ExprEnd)) return 0; // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. @@ -1086,7 +1315,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc ExprEnd; // It must be an parenthesized expression, parse it now. - if (getParser().ParseParenExpression(Disp, ExprEnd)) + if (getParser().parseParenExpression(Disp, ExprEnd)) return 0; // After parsing the base expression we could either have a parenthesized @@ -1146,7 +1375,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc Loc = Parser.getTok().getLoc(); int64_t ScaleVal; - if (getParser().ParseAbsoluteExpression(ScaleVal)){ + if (getParser().parseAbsoluteExpression(ScaleVal)){ Error(Loc, "expected scale expression"); return 0; } @@ -1165,7 +1394,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc Loc = Parser.getTok().getLoc(); int64_t Value; - if (getParser().ParseAbsoluteExpression(Value)) + if (getParser().parseAbsoluteExpression(Value)) return 0; if (Value != 1) @@ -1306,7 +1535,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); else { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } @@ -1317,14 +1546,14 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); else { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } } if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } } @@ -2014,10 +2243,10 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); + getParser().getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 64ac5e6..0f6eeb1 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -34,10 +34,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, switch (MI->getOpcode()) { case X86::INSERTPSrr: - Src1Name = getRegName(MI->getOperand(0).getReg()); - Src2Name = getRegName(MI->getOperand(2).getReg()); - DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); - break; case X86::VINSERTPSrr: DestName = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -46,10 +42,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::MOVLHPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeMOVLHPSMask(2, ShuffleMask); - break; case X86::VMOVLHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -58,10 +50,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::MOVHLPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeMOVHLPSMask(2, ShuffleMask); - break; case X86::VMOVHLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -69,6 +57,29 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeMOVHLPSMask(2, ShuffleMask); break; + case X86::PALIGNR128rr: + case X86::VPALIGNR128rr: + Src1Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PALIGNR128rm: + case X86::VPALIGNR128rm: + Src2Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePALIGNRMask(MVT::v16i8, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::VPALIGNR256rr: + Src1Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPALIGNR256rm: + Src2Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePALIGNRMask(MVT::v32i8, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::PSHUFDri: case X86::VPSHUFDri: Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -131,15 +142,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::PUNPCKHBWrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHBWrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v16i8, ShuffleMask); - break; case X86::VPUNPCKHBWrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHBWrm: case X86::VPUNPCKHBWrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -154,15 +160,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHMask(MVT::v32i8, ShuffleMask); break; case X86::PUNPCKHWDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHWDrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v8i16, ShuffleMask); - break; case X86::VPUNPCKHWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHWDrm: case X86::VPUNPCKHWDrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -177,15 +178,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHMask(MVT::v16i16, ShuffleMask); break; case X86::PUNPCKHDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v4i32, ShuffleMask); - break; case X86::VPUNPCKHDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHDQrm: case X86::VPUNPCKHDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -200,15 +196,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHMask(MVT::v8i32, ShuffleMask); break; case X86::PUNPCKHQDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHQDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v2i64, ShuffleMask); - break; case X86::VPUNPCKHQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKHQDQrm: case X86::VPUNPCKHQDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -224,15 +215,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::PUNPCKLBWrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLBWrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v16i8, ShuffleMask); - break; case X86::VPUNPCKLBWrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLBWrm: case X86::VPUNPCKLBWrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -247,15 +233,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKLMask(MVT::v32i8, ShuffleMask); break; case X86::PUNPCKLWDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLWDrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v8i16, ShuffleMask); - break; case X86::VPUNPCKLWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLWDrm: case X86::VPUNPCKLWDrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -270,15 +251,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKLMask(MVT::v16i16, ShuffleMask); break; case X86::PUNPCKLDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v4i32, ShuffleMask); - break; case X86::VPUNPCKLDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLDQrm: case X86::VPUNPCKLDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -293,15 +269,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKLMask(MVT::v8i32, ShuffleMask); break; case X86::PUNPCKLQDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLQDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v2i64, ShuffleMask); - break; case X86::VPUNPCKLQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::PUNPCKLQDQrm: case X86::VPUNPCKLQDQrm: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -317,16 +288,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::SHUFPDrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::SHUFPDrmi: - DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VSHUFPDrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::SHUFPDrmi: case X86::VSHUFPDrmi: DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); @@ -344,16 +309,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::SHUFPSrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::SHUFPSrmi: - DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VSHUFPSrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::SHUFPSrmi: case X86::VSHUFPSrmi: DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); @@ -371,15 +330,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::UNPCKLPDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKLPDrm: - DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKLPDrm: case X86::VUNPCKLPDrm: DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -394,15 +348,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKLPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKLPSrm: - DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKLPSrm: case X86::VUNPCKLPSrm: DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -417,15 +366,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKHPDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKHPDrm: - DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKHPDrm: case X86::VUNPCKHPDrm: DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -440,15 +384,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKHPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKHPSrm: - DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; case X86::VUNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::UNPCKHPSrm: case X86::VUNPCKHPSrm: DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 7ea1961..9e68388 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -104,7 +104,7 @@ namespace X86II { /// MO_TLSLD - On a symbol operand this indicates that the immediate is /// the offset of the GOT entry with the TLS index for the module that - /// contains the symbol. When this index is passed to a call to to + /// contains the symbol. When this index is passed to a call to /// __tls_get_addr, the function will return the base address of the TLS /// block for the symbol. Used in the x86-64 local dynamic TLS access model. /// @@ -114,7 +114,7 @@ namespace X86II { /// MO_TLSLDM - On a symbol operand this indicates that the immediate is /// the offset of the GOT entry with the TLS index for the module that - /// contains the symbol. When this index is passed to a call to to + /// contains the symbol. When this index is passed to a call to /// ___tls_get_addr, the function will return the base address of the TLS /// block for the symbol. Used in the IA32 local dynamic TLS access model. /// diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 16488eb..7815ae9 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -44,7 +44,7 @@ void X86MCAsmInfoDarwin::anchor() { } X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { bool is64Bit = T.getArch() == Triple::x86_64; if (is64Bit) - PointerSize = 8; + PointerSize = CalleeSaveStackSlotSize = 8; AssemblerDialect = AsmWriterFlavor; @@ -76,8 +76,16 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) void X86ELFMCAsmInfo::anchor() { } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { - if (T.getArch() == Triple::x86_64) - PointerSize = 8; + bool is64Bit = T.getArch() == Triple::x86_64; + bool isX32 = T.getEnvironment() == Triple::GNUX32; + + // For ELF, x86-64 pointer size depends on the ABI. + // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64 + // with the x32 ABI, pointer size remains the default 4. + PointerSize = (is64Bit && !isX32) ? 8 : 4; + + // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI. + CalleeSaveStackSlotSize = is64Bit ? 8 : 4; AssemblerDialect = AsmWriterFlavor; diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 4011035..496b704 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -953,3 +953,12 @@ similarly, v[0]-v[1] should match to hsubpd, and {v[0]-v[1], w[0]-w[1]} should turn into hsubpd also. //===---------------------------------------------------------------------===// + +define <2 x i32> @foo(<2 x double> %in) { + %x = fptosi <2 x double> %in to <2 x i32> + ret <2 x i32> %x +} + +Should compile into cvttpd2dq instead of being scalarized into 2 cvttsd2si. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 8b87c1f..bbd4904 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -61,6 +61,24 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { ShuffleMask.push_back(NElts+i); } +void DecodePALIGNRMask(MVT VT, unsigned Imm, + SmallVectorImpl<int> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); + + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Base = i + Offset; + // if i+offset is out of this lane then we actually need the other source + if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; + ShuffleMask.push_back(Base + l); + } + } +} + /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 70d8171..017ab32 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -35,6 +35,8 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); // <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); +void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); + void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 3ab2899..0216252 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -120,6 +120,8 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", "Support RTM instructions">; +def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", + "Support ADX instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 5b3e0ba..ac5daec 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -252,14 +252,15 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } case MachineOperand::MO_Immediate: - O << '$' << MO.getImm(); + if (AsmVariant == 0) O << '$'; + O << MO.getImm(); return; case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: { - O << '$'; + if (AsmVariant == 0) O << '$'; printSymbolOperand(MO, O); break; } @@ -355,19 +356,23 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op, NeedPlus = true; } - assert (DispSpec.isImm() && "Displacement is not an immediate!"); - int64_t DispVal = DispSpec.getImm(); - if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { - if (NeedPlus) { - if (DispVal > 0) - O << " + "; - else { - O << " - "; - DispVal = -DispVal; + if (!DispSpec.isImm()) { + if (NeedPlus) O << " + "; + printOperand(MI, Op+3, O, Modifier, AsmVariant); + } else { + int64_t DispVal = DispSpec.getImm(); + if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { + if (NeedPlus) { + if (DispVal > 0) + O << " + "; + else { + O << " - "; + DispVal = -DispVal; + } } + O << DispVal; } - O << DispVal; - } + } O << ']'; } @@ -543,7 +548,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { MCSA_IndirectSymbol); // hlt; hlt; hlt; hlt; hlt hlt = 0xf4. const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4"; - OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/); + OutStreamer.EmitBytes(StringRef(HltInsts, 5)); } Stubs.clear(); @@ -569,7 +574,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // .long 0 if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, 4/*size*/); else // Internal to current translation unit. // @@ -578,8 +583,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // using NLPs. However, sometimes the types are local to the file. So // we need to fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), - OutContext), - 4/*size*/, 0/*addrspace*/); + OutContext), 4/*size*/); } Stubs.clear(); OutStreamer.AddBlankLine(); @@ -596,8 +600,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // .long _foo OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), - OutContext), - 4/*size*/, 0/*addrspace*/); + OutContext), 4/*size*/); } Stubs.clear(); OutStreamer.AddBlankLine(); @@ -663,7 +666,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { name += ",DATA"; else name += ",data"; - OutStreamer.EmitBytes(name, 0); + OutStreamer.EmitBytes(name); } for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) { @@ -672,7 +675,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { else name = " -export:"; name += DLLExportedFns[i]->getName(); - OutStreamer.EmitBytes(name, 0); + OutStreamer.EmitBytes(name); } } } @@ -692,7 +695,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(), 0); + TD->getPointerSize()); } Stubs.clear(); } diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 61eb14e..bc7496b 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -1,4 +1,4 @@ -//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===// +//===-- X86AsmPrinter.h - X86 implementation of AsmPrinter ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// AT&T assembly code printer class. -// -//===----------------------------------------------------------------------===// #ifndef X86ASMPRINTER_H #define X86ASMPRINTER_H @@ -35,7 +31,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { } virtual const char *getPassName() const LLVM_OVERRIDE { - return "X86 AT&T-Style Assembly Printer"; + return "X86 Assembly / Object Emitter"; } const X86Subtarget &getSubtarget() const { return *Subtarget; } diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 7ad2fdd..b516be0 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -519,6 +519,9 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>; def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, (sequence "XMM%u", 6, 15))>; +def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10, + R11, R12, R13, R14, R15, RBP, + (sequence "XMM%u", 0, 15))>; // Standard C + YMM6-15 def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index bc77334..ece38aa 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -124,7 +124,7 @@ template<class CodeEmitter> } // end anonymous namespace. /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code -/// to the specified templated MachineCodeEmitter object. +/// to the specified JITCodeEmitter object. FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE) { return new Emitter<JITCodeEmitter>(TM, JCE); diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm index f321778..69b4c71 100644 --- a/lib/Target/X86/X86CompilationCallback_Win64.asm +++ b/lib/Target/X86/X86CompilationCallback_Win64.asm @@ -11,7 +11,7 @@ ;; ;;===----------------------------------------------------------------------=== -extrn X86CompilationCallback2: PROC +extrn LLVMX86CompilationCallback2: PROC .code X86CompilationCallback proc @@ -42,7 +42,7 @@ X86CompilationCallback proc ; Pass prev frame and return address. mov rcx, rbp mov rdx, qword ptr [rbp+8] - call X86CompilationCallback2 + call LLVMX86CompilationCallback2 ; Restore all XMM arg registers. movaps xmm3, [rsp+48+32] diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 5facb7b..b5c3270 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -75,6 +75,8 @@ public: virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); + virtual bool FastLowerArguments(); + #include "X86GenFastISel.inc" private: @@ -326,12 +328,11 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned &ResultReg) { unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src, /*TODO: Kill=*/false); - - if (RR != 0) { - ResultReg = RR; - return true; - } else + if (RR == 0) return false; + + ResultReg = RR; + return true; } /// X86SelectAddress - Attempt to fill in an address from the given value. @@ -727,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // Don't handle popping bytes on return for now. if (X86MFInfo->getBytesToPopOnReturn() != 0) - return 0; + return false; // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. @@ -738,6 +739,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (F.isVarArg()) return false; + // Build a list of return value registers. + SmallVector<unsigned, 4> RetRegs; + if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); @@ -805,8 +809,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); - // Mark the register as live out of the function. - MRI.addLiveOut(VA.getLocReg()); + // Add register to return instruction. + RetRegs.push_back(VA.getLocReg()); } // The x86-64 ABI for returning structs by value requires that we copy @@ -819,11 +823,14 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { "SRetReturnReg should have been set in LowerFormalArguments()!"); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), X86::RAX).addReg(Reg); - MRI.addLiveOut(X86::RAX); + RetRegs.push_back(X86::RAX); } // Now emit the RET. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); return true; } @@ -1372,7 +1379,6 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, else if (Len >= 2) VT = MVT::i16; else { - assert(Len == 1); VT = MVT::i8; } @@ -1516,6 +1522,78 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { } } +bool X86FastISel::FastLowerArguments() { + if (!FuncInfo.CanLowerReturn) + return false; + + const Function *F = FuncInfo.Fn; + if (F->isVarArg()) + return false; + + CallingConv::ID CC = F->getCallingConv(); + if (CC != CallingConv::C) + return false; + + if (!Subtarget->is64Bit()) + return false; + + // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. + unsigned Idx = 1; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (Idx > 6) + return false; + + if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || + F->getAttributes().hasAttribute(Idx, Attribute::InReg) || + F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || + F->getAttributes().hasAttribute(Idx, Attribute::Nest)) + return false; + + Type *ArgTy = I->getType(); + if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + return false; + + EVT ArgVT = TLI.getValueType(ArgTy); + if (!ArgVT.isSimple()) return false; + switch (ArgVT.getSimpleVT().SimpleTy) { + case MVT::i32: + case MVT::i64: + break; + default: + return false; + } + } + + static const uint16_t GPR32ArgRegs[] = { + X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D + }; + static const uint16_t GPR64ArgRegs[] = { + X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 + }; + + Idx = 0; + const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32); + const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (I->use_empty()) + continue; + bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32; + const TargetRegisterClass *RC = is32Bit ? RC32 : RC64; + unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx]; + unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); + // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. + // Without this, EmitLiveInCopies may eliminate the livein if its only + // use is a bitcast (which isn't turned into an instruction). + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(DstReg, getKillRegState(true)); + UpdateValueMap(I, ResultReg); + } + return true; +} + bool X86FastISel::X86SelectCall(const Instruction *I) { const CallInst *CI = cast<CallInst>(I); const Value *Callee = CI->getCalledValue(); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 420aeb8..a05cf5c 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -50,13 +50,13 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { return (MF.getTarget().Options.DisableFramePointerElim(MF) || RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || + MFI->isFrameAddressTaken() || MF.hasMSInlineAsm() || MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || MMI.callsUnwindInit() || MMI.callsEHReturn()); } -static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { +static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { + if (IsLP64) { if (isInt<8>(Imm)) return X86::SUB64ri8; return X86::SUB64ri32; @@ -67,8 +67,8 @@ static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { } } -static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { +static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { + if (IsLP64) { if (isInt<8>(Imm)) return X86::ADD64ri8; return X86::ADD64ri32; @@ -79,8 +79,8 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { } } -static unsigned getLEArOpcode(unsigned is64Bit) { - return is64Bit ? X86::LEA64r : X86::LEA32r; +static unsigned getLEArOpcode(unsigned IsLP64) { + return IsLP64 ? X86::LEA64r : X86::LEA32r; } /// findDeadCallerSavedReg - Return a caller-saved register that isn't live @@ -145,17 +145,17 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, static void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, int64_t NumBytes, - bool Is64Bit, bool UseLEA, + bool Is64Bit, bool IsLP64, bool UseLEA, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; unsigned Opc; if (UseLEA) - Opc = getLEArOpcode(Is64Bit); + Opc = getLEArOpcode(IsLP64); else Opc = isSub - ? getSUBriOpcode(Is64Bit, Offset) - : getADDriOpcode(Is64Bit, Offset); + ? getSUBriOpcode(IsLP64, Offset) + : getADDriOpcode(IsLP64, Offset); uint64_t Chunk = (1LL << 31) - 1; DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -660,6 +660,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); bool Is64Bit = STI.is64Bit(); + bool IsLP64 = STI.isTarget64BitLP64(); bool IsWin64 = STI.isTargetWin64(); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); @@ -711,7 +712,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, - TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), + TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) .addImm(-TailCallReturnAddrDelta) @@ -927,7 +928,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // MSVC x64's __chkstk needs to adjust %rsp. // FIXME: %rax preserves the offset and should be available. if (isSPUpdateNeeded) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); if (isEAXAlive) { @@ -939,7 +940,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MBB.insert(MBBI, MI); } } else if (NumBytes) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); // If we need a base pointer, set it up here. It's whatever the value @@ -996,6 +997,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); bool Is64Bit = STI.is64Bit(); + bool IsLP64 = STI.isTarget64BitLP64(); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); @@ -1081,7 +1083,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (RegInfo->needsStackRealignment(MF)) MBBI = FirstCSPop; if (CSSize != 0) { - unsigned Opc = getLEArOpcode(Is64Bit); + unsigned Opc = getLEArOpcode(IsLP64); addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), FramePtr, false, -CSSize); } else { @@ -1091,7 +1093,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } else if (NumBytes) { // Adjust stack pointer back: ESP += numbytes. - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA, + TII, *RegInfo); } // We're returning from function via eh_return. @@ -1126,7 +1129,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64, + UseLEA, TII, *RegInfo); } // Jump to label or value in register. @@ -1169,7 +1173,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Check for possible merge with preceding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII, + *RegInfo); } } @@ -1382,16 +1387,25 @@ HasNestArgument(const MachineFunction *MF) { } -/// GetScratchRegister - Get a register for performing work in the segmented -/// stack prologue. Depending on platform and the properties of the function -/// either one or two registers will be needed. Set primary to true for -/// the first register, false for the second. +/// GetScratchRegister - Get a temp register for performing work in the +/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform +/// and the properties of the function either one or two registers will be +/// needed. Set primary to true for the first register, false for the second. static unsigned GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) { + CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); + + // Erlang stuff. + if (CallingConvention == CallingConv::HiPE) { + if (Is64Bit) + return Primary ? X86::R14 : X86::R13; + else + return Primary ? X86::EBX : X86::EDI; + } + if (Is64Bit) return Primary ? X86::R11 : X86::R12; - CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); bool IsNested = HasNestArgument(&MF); if (CallingConvention == CallingConv::X86_FastCall || @@ -1419,7 +1433,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { bool Is64Bit = STI.is64Bit(); unsigned TlsReg, TlsOffset; DebugLoc DL; - const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>(); unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true); assert(!MF.getRegInfo().isLiveIn(ScratchReg) && @@ -1427,8 +1440,8 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { if (MF.getFunction()->isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); - if (!ST->isTargetLinux() && !ST->isTargetDarwin() && - !ST->isTargetWin32() && !ST->isTargetFreeBSD()) + if (!STI.isTargetLinux() && !STI.isTargetDarwin() && + !STI.isTargetWin32() && !STI.isTargetFreeBSD()) report_fatal_error("Segmented stacks not supported on this platform."); MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); @@ -1466,13 +1479,13 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // Read the limit off the current stacklet off the stack_guard location. if (Is64Bit) { - if (ST->isTargetLinux()) { + if (STI.isTargetLinux()) { TlsReg = X86::FS; TlsOffset = 0x70; - } else if (ST->isTargetDarwin()) { + } else if (STI.isTargetDarwin()) { TlsReg = X86::GS; TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. - } else if (ST->isTargetFreeBSD()) { + } else if (STI.isTargetFreeBSD()) { TlsReg = X86::FS; TlsOffset = 0x18; } else { @@ -1488,16 +1501,16 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); } else { - if (ST->isTargetLinux()) { + if (STI.isTargetLinux()) { TlsReg = X86::GS; TlsOffset = 0x30; - } else if (ST->isTargetDarwin()) { + } else if (STI.isTargetDarwin()) { TlsReg = X86::GS; TlsOffset = 0x48 + 90*4; - } else if (ST->isTargetWin32()) { + } else if (STI.isTargetWin32()) { TlsReg = X86::FS; TlsOffset = 0x14; // pvArbitrary, reserved for application use - } else if (ST->isTargetFreeBSD()) { + } else if (STI.isTargetFreeBSD()) { report_fatal_error("Segmented stacks not supported on FreeBSD i386."); } else { report_fatal_error("Segmented stacks not supported on this platform."); @@ -1509,10 +1522,10 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) .addImm(1).addReg(0).addImm(-StackSize).addReg(0); - if (ST->isTargetLinux() || ST->isTargetWin32()) { + if (STI.isTargetLinux() || STI.isTargetWin32()) { BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); - } else if (ST->isTargetDarwin()) { + } else if (STI.isTargetDarwin()) { // TlsOffset doesn't fit into a mod r/m byte so we need an extra register unsigned ScratchReg2; @@ -1598,3 +1611,229 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.verify(); #endif } + +// Erlang programs may need a special prologue to handle the stack size they +// might need at runtime. That is because Erlang/OTP does not implement a C +// stack but uses a custom implementation of hybrid stack/heap +// architecture. (for more information see Eric Stenman's Ph.D. thesis: +// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) +// +// +// CheckStack: +// temp0 = sp - MaxStack +// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +// OldStart: +// ... +// IncStack: +// call inc_stack # doubles the stack space +// temp0 = sp - MaxStack +// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { + const X86InstrInfo &TII = *TM.getInstrInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize(); + const bool Is64Bit = STI.is64Bit(); + DebugLoc DL; + // HiPE-specific values + const unsigned HipeLeafWords = 24; + const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; + const unsigned Guaranteed = HipeLeafWords * SlotSize; + unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? + MF.getFunction()->arg_size() - CCRegisteredArgs : 0; + unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; + + assert(STI.isTargetLinux() && + "HiPE prologue is only supported on Linux operating systems."); + + // Compute the largest caller's frame that is needed to fit the callees' + // frames. This 'MaxStack' is computed from: + // + // a) the fixed frame size, which is the space needed for all spilled temps, + // b) outgoing on-stack parameter areas, and + // c) the minimum stack space this function needs to make available for the + // functions it calls (a tunable ABI property). + if (MFI->hasCalls()) { + unsigned MoreStackForCalls = 0; + + for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); + MBBI != MBBE; ++MBBI) + for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); + MI != ME; ++MI) { + if (!MI->isCall()) + continue; + + // Get callee operand. + const MachineOperand &MO = MI->getOperand(0); + + // Only take account of global function calls (no closures etc.). + if (!MO.isGlobal()) + continue; + + const Function *F = dyn_cast<Function>(MO.getGlobal()); + if (!F) + continue; + + // Do not update 'MaxStack' for primitive and built-in functions + // (encoded with names either starting with "erlang."/"bif_" or not + // having a ".", such as a simple <Module>.<Function>.<Arity>, or an + // "_", such as the BIF "suspend_0") as they are executed on another + // stack. + if (F->getName().find("erlang.") != StringRef::npos || + F->getName().find("bif_") != StringRef::npos || + F->getName().find_first_of("._") == StringRef::npos) + continue; + + unsigned CalleeStkArity = + F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; + if (HipeLeafWords - 1 > CalleeStkArity) + MoreStackForCalls = std::max(MoreStackForCalls, + (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); + } + MaxStack += MoreStackForCalls; + } + + // If the stack frame needed is larger than the guaranteed then runtime checks + // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. + if (MaxStack > Guaranteed) { + MachineBasicBlock &prologueMBB = MF.front(); + MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); + + for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(), + E = prologueMBB.livein_end(); I != E; I++) { + stackCheckMBB->addLiveIn(*I); + incStackMBB->addLiveIn(*I); + } + + MF.push_front(incStackMBB); + MF.push_front(stackCheckMBB); + + unsigned ScratchReg, SPReg, PReg, SPLimitOffset; + unsigned LEAop, CMPop, CALLop; + if (Is64Bit) { + SPReg = X86::RSP; + PReg = X86::RBP; + LEAop = X86::LEA64r; + CMPop = X86::CMP64rm; + CALLop = X86::CALL64pcrel32; + SPLimitOffset = 0x90; + } else { + SPReg = X86::ESP; + PReg = X86::EBP; + LEAop = X86::LEA32r; + CMPop = X86::CMP32rm; + CALLop = X86::CALLpcrel32; + SPLimitOffset = 0x4c; + } + + ScratchReg = GetScratchRegister(Is64Bit, MF, true); + assert(!MF.getRegInfo().isLiveIn(ScratchReg) && + "HiPE prologue scratch register is live-in"); + + // Create new MBB for StackCheck: + addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), + SPReg, false, -MaxStack); + // SPLimitOffset is in a fixed heap location (pointed by BP). + addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) + .addReg(ScratchReg), PReg, false, SPLimitOffset); + BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB); + + // Create new MBB for IncStack: + BuildMI(incStackMBB, DL, TII.get(CALLop)). + addExternalSymbol("inc_stack_0"); + addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), + SPReg, false, -MaxStack); + addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) + .addReg(ScratchReg), PReg, false, SPLimitOffset); + BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB); + + stackCheckMBB->addSuccessor(&prologueMBB, 99); + stackCheckMBB->addSuccessor(incStackMBB, 1); + incStackMBB->addSuccessor(&prologueMBB, 99); + incStackMBB->addSuccessor(incStackMBB, 1); + } +#ifdef XDEBUG + MF.verify(); +#endif +} + +void X86FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const X86InstrInfo &TII = *TM.getInstrInfo(); + const X86RegisterInfo &RegInfo = *TM.getRegisterInfo(); + unsigned StackPtr = RegInfo.getStackRegister(); + bool reseveCallFrame = hasReservedCallFrame(MF); + int Opcode = I->getOpcode(); + bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + bool IsLP64 = STI.isTarget64BitLP64(); + DebugLoc DL = I->getDebugLoc(); + uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; + uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; + I = MBB.erase(I); + + if (!reseveCallFrame) { + // If the stack pointer can be changed after prologue, turn the + // adjcallstackup instruction into a 'sub ESP, <amt>' and the + // adjcallstackdown instruction into 'add ESP, <amt>' + // TODO: consider using push / pop instead of sub + store / add + if (Amount == 0) + return; + + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; + + MachineInstr *New = 0; + if (Opcode == TII.getCallFrameSetupOpcode()) { + New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), + StackPtr) + .addReg(StackPtr) + .addImm(Amount); + } else { + assert(Opcode == TII.getCallFrameDestroyOpcode()); + + // Factor out the amount the callee already popped. + Amount -= CalleeAmt; + + if (Amount) { + unsigned Opc = getADDriOpcode(IsLP64, Amount); + New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(Amount); + } + } + + if (New) { + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + + // Replace the pseudo instruction with a new instruction. + MBB.insert(I, New); + } + + return; + } + + if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. We do this until we have + // more advanced stack pointer tracking ability. + unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt); + MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(CalleeAmt); + + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + + // We are not tracking the stack pointer adjustment by the callee, so make + // sure we restore the stack pointer immediately after the call, there may + // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. + MachineBasicBlock::iterator B = MBB.begin(); + while (I != B && !llvm::prior(I)->isCall()) + --I; + MBB.insert(I, New); + } +} + diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index dc515dc..3f08b9a 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -43,6 +43,8 @@ public: void adjustForSegmentedStacks(MachineFunction &MF) const; + void adjustForHiPEPrologue(MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; @@ -63,6 +65,10 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; uint32_t getCompactUnwindEncoding(MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 935f9bd..00fbe69 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -280,13 +280,13 @@ namespace { /// getTargetMachine - Return a reference to the TargetMachine, casted /// to the target-specific type. - const X86TargetMachine &getTargetMachine() { + const X86TargetMachine &getTargetMachine() const { return static_cast<const X86TargetMachine &>(TM); } /// getInstrInfo - Return a reference to the TargetInstrInfo, casted /// to the target-specific type. - const X86InstrInfo *getInstrInfo() { + const X86InstrInfo *getInstrInfo() const { return getTargetMachine().getInstrInfo(); } }; @@ -446,7 +446,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && (N->getOpcode() == X86ISD::CALL || (N->getOpcode() == X86ISD::TC_RETURN && - // Only does this if load can be foled into TC_RETURN. + // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { /// Also try moving call address load from outside callseq_start to just diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4ab92ad..1c3b9ae 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -605,10 +605,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); // We don't support sin/cos/fmod - setOperationAction(ISD::FSIN , MVT::f64, Expand); - setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // Expand FP immediates into loads from the stack, except for the special // cases we handle. @@ -633,8 +635,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); // We don't support sin/cos/fmod - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // Special cases we handle for FP constants. addLegalFPImmediate(APFloat(+0.0f)); // xorps @@ -644,8 +647,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS if (!TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FSIN , MVT::f64 , Expand); - setOperationAction(ISD::FCOS , MVT::f64 , Expand); + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); } } else if (!TM.Options.UseSoftFloat) { // f32 and f64 in x87. @@ -659,10 +663,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); if (!TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FSIN , MVT::f32 , Expand); - setOperationAction(ISD::FSIN , MVT::f64 , Expand); - setOperationAction(ISD::FCOS , MVT::f32 , Expand); - setOperationAction(ISD::FCOS , MVT::f64 , Expand); + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); } addLegalFPImmediate(APFloat(+0.0)); // FLD0 addLegalFPImmediate(APFloat(+1.0)); // FLD1 @@ -699,8 +705,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } if (!TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FSIN , MVT::f80 , Expand); - setOperationAction(ISD::FCOS , MVT::f80 , Expand); + setOperationAction(ISD::FSIN , MVT::f80, Expand); + setOperationAction(ISD::FCOS , MVT::f80, Expand); + setOperationAction(ISD::FSINCOS, MVT::f80, Expand); } setOperationAction(ISD::FFLOOR, MVT::f80, Expand); @@ -748,7 +755,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); @@ -1047,6 +1056,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v4i32, Custom); } + setOperationAction(ISD::SDIV, MVT::v8i16, Custom); + setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) { @@ -1111,6 +1122,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v16i16, Custom); setOperationAction(ISD::SRA, MVT::v32i8, Custom); + setOperationAction(ISD::SDIV, MVT::v16i16, Custom); + setOperationAction(ISD::SETCC, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i16, Custom); setOperationAction(ISD::SETCC, MVT::v8i32, Custom); @@ -1166,6 +1179,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SHL, MVT::v8i32, Legal); setOperationAction(ISD::SRA, MVT::v8i32, Legal); + + setOperationAction(ISD::SDIV, MVT::v8i32, Custom); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); @@ -1275,6 +1290,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setLibcallName(RTLIB::SRA_I128, 0); } + // Combine sin / cos into one node or libcall if possible. + if (Subtarget->hasSinCos()) { + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + if (Subtarget->isTargetDarwin()) { + // For MacOSX, we don't want to the normal expansion of a libcall to + // sincos. We want to issue a libcall to __sincos_stret to avoid memory + // traffic. + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); + } + } + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); @@ -1295,6 +1323,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::SETCC); @@ -1306,17 +1335,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // On Darwin, -Os means optimize for size without hurting performance, // do not reduce the limit. - maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores - maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; - maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores - maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; - maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores - maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores + MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; + MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores + MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores + MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; setPrefLoopAlignment(4); // 2^4 bytes. - benefitFromCodePlacementOpt = true; + BenefitFromCodePlacementOpt = true; // Predictable cmov don't hurt on atom because it's in-order. - predictableSelectIsExpensive = !Subtarget->isAtom(); + PredictableSelectIsExpensive = !Subtarget->isAtom(); setPrefFunctionAlignment(4); // 2^4 bytes. } @@ -1562,14 +1591,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); - // Add the regs to the liveout set for the function. - MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) - MRI.addLiveOut(RVLocs[i].getLocReg()); - SDValue Flag; - SmallVector<SDValue, 6> RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop @@ -1638,12 +1660,13 @@ X86TargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - // The x86-64 ABI for returning structs by value requires that we copy - // the sret argument into %rax for the return. We saved the argument into - // a virtual register in the entry block, so now we copy the value out - // and into %rax. + // The x86-64 ABIs require that for returning structs by value we copy + // the sret argument into %rax/%eax (depending on ABI) for the return. + // We saved the argument into a virtual register in the entry block, + // so now we copy the value out and into %rax/%eax. if (Subtarget->is64Bit() && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { MachineFunction &MF = DAG.getMachineFunction(); @@ -1653,11 +1676,12 @@ X86TargetLowering::LowerReturn(SDValue Chain, "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); - Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); + unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX; + Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); Flag = Chain.getValue(1); - // RAX now acts like a return value. - MRI.addLiveOut(X86::RAX); + // RAX/EAX now acts like a return value. + RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64)); } RetOps[0] = Chain; // Update chain. @@ -2009,14 +2033,16 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(ArgValue); } - // The x86-64 ABI for returning structs by value requires that we copy - // the sret argument into %rax for the return. Save the argument into - // a virtual register so that we can access it from the return points. + // The x86-64 ABIs require that for returning structs by value we copy + // the sret argument into %rax/%eax (depending on ABI) for the return. + // Save the argument into a virtual register so that we can access it + // from the return points. if (Is64Bit && MF.getFunction()->hasStructRetAttr()) { X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + MVT PtrTy = getPointerTy(); + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); FuncInfo->setSRetReturnReg(Reg); } SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); @@ -2630,8 +2656,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This isn't right, although it's probably harmless on x86; liveouts // should be computed from returns not tail calls. Consider a void // function making a tail call to a function returning int. - return DAG.getNode(X86ISD::TC_RETURN, dl, - NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); } Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); @@ -2789,7 +2814,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, - SelectionDAG& DAG) const { + SelectionDAG &DAG) const { if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) return false; @@ -2828,7 +2853,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // An stdcall caller is expected to clean up its arguments; the callee // isn't going to do that. - if (!CCMatch && CallerCC==CallingConv::X86_StdCall) + if (!CCMatch && CallerCC == CallingConv::X86_StdCall) return false; // Do not sibcall optimize vararg calls unless all arguments are passed via @@ -2948,9 +2973,15 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // callee-saved registers are restored. These happen to be the same // registers used to pass 'inreg' arguments so watch out for those. if (!Subtarget->is64Bit() && - !isa<GlobalAddressSDNode>(Callee) && - !isa<ExternalSymbolSDNode>(Callee)) { + ((!isa<GlobalAddressSDNode>(Callee) && + !isa<ExternalSymbolSDNode>(Callee)) || + getTargetMachine().getRelocationModel() == Reloc::PIC_)) { unsigned NumInRegs = 0; + // In PIC we need an extra register to formulate the address computation + // for the callee. + unsigned MaxInRegs = + (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (!VA.isRegLoc()) @@ -2959,7 +2990,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, switch (Reg) { default: break; case X86::EAX: case X86::EDX: case X86::ECX: - if (++NumInRegs == 3) + if (++NumInRegs == MaxInRegs) return false; break; } @@ -2995,7 +3026,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::SHUFP: - case X86ISD::PALIGN: + case X86ISD::PALIGNR: case X86ISD::MOVLHPS: case X86ISD::MOVLHPD: case X86ISD::MOVHLPS: @@ -3045,7 +3076,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); - case X86ISD::PALIGN: + case X86ISD::PALIGNR: case X86ISD::SHUFP: case X86ISD::VPERM2X128: return DAG.getNode(Opc, dl, VT, V1, V2, @@ -3355,8 +3386,8 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) { /// is suitable for input to PALIGNR. static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT, const X86Subtarget *Subtarget) { - if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) || - (VT.getSizeInBits() == 256 && !Subtarget->hasInt256())) + if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) || + (VT.is256BitVector() && !Subtarget->hasInt256())) return false; unsigned NumElts = VT.getVectorNumElements(); @@ -3445,7 +3476,7 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, /// reverse of what x86 shuffles want. static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256, bool Commuted = false) { - if (!HasFp256 && VT.getSizeInBits() == 256) + if (!HasFp256 && VT.is256BitVector()) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -3580,7 +3611,7 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) { static SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); DebugLoc dl = SVOp->getDebugLoc(); if (VT != MVT::v8i32 && VT != MVT::v8f32) @@ -3630,7 +3661,7 @@ static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT, assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3669,7 +3700,7 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT, assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3700,14 +3731,14 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT, /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, - bool HasInt256) { +static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) { unsigned NumElts = VT.getVectorNumElements(); + bool Is256BitVec = VT.is256BitVector(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (Is256BitVec && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3715,7 +3746,7 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, // FIXME: Need a better way to get rid of this, there's no latency difference // between UNPCKLPD and MOVDDUP, the later should always be checked first and // the former later. We should also remove the "_undef" special mask. - if (NumElts == 4 && VT.getSizeInBits() == 256) + if (NumElts == 4 && Is256BitVec) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3749,7 +3780,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) { assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 && (!HasInt256 || (NumElts != 16 && NumElts != 32))) return false; @@ -3831,7 +3862,7 @@ static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) { /// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle /// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions. static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned HalfSize = VT.getVectorNumElements()/2; @@ -3865,7 +3896,7 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) { unsigned NumElts = VT.getVectorNumElements(); // Only match 256-bit with 32/64-bit types - if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8)) + if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8)) return false; unsigned NumLanes = VT.getSizeInBits()/128; @@ -3921,8 +3952,8 @@ static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT, unsigned NumElems = VT.getVectorNumElements(); - if ((VT.getSizeInBits() == 128 && NumElems != 4) || - (VT.getSizeInBits() == 256 && NumElems != 8)) + if ((VT.is128BitVector() && NumElems != 4) || + (VT.is256BitVector() && NumElems != 8)) return false; // "i+1" is the value the indexed mask element must have @@ -3944,8 +3975,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT, unsigned NumElems = VT.getVectorNumElements(); - if ((VT.getSizeInBits() == 128 && NumElems != 4) || - (VT.getSizeInBits() == 256 && NumElems != 8)) + if ((VT.is128BitVector() && NumElems != 4) || + (VT.is256BitVector() && NumElems != 8)) return false; // "i" is the value the indexed mask element must have @@ -4005,9 +4036,8 @@ bool X86::isVEXTRACTF128Index(SDNode *N) { uint64_t Index = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); - unsigned VL = N->getValueType(0).getVectorNumElements(); - unsigned VBits = N->getValueType(0).getSizeInBits(); - unsigned ElSize = VBits / VL; + MVT VT = N->getValueType(0).getSimpleVT(); + unsigned ElSize = VT.getVectorElementType().getSizeInBits(); bool Result = (Index * ElSize) % 128 == 0; return Result; @@ -4024,9 +4054,8 @@ bool X86::isVINSERTF128Index(SDNode *N) { uint64_t Index = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); - unsigned VL = N->getValueType(0).getVectorNumElements(); - unsigned VBits = N->getValueType(0).getSizeInBits(); - unsigned ElSize = VBits / VL; + MVT VT = N->getValueType(0).getSimpleVT(); + unsigned ElSize = VT.getVectorElementType().getSizeInBits(); bool Result = (Index * ElSize) % 128 == 0; return Result; @@ -4036,7 +4065,7 @@ bool X86::isVINSERTF128Index(SDNode *N) { /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. /// Handles 128-bit and 256-bit. static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for PSHUF/SHUFP"); @@ -4066,7 +4095,7 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); assert((VT == MVT::v8i16 || VT == MVT::v16i16) && "Unsupported vector type for PSHUFHW"); @@ -4090,7 +4119,7 @@ static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); assert((VT == MVT::v8i16 || VT == MVT::v16i16) && "Unsupported vector type for PSHUFHW"); @@ -4114,7 +4143,7 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3; unsigned NumElts = VT.getVectorNumElements(); @@ -4145,8 +4174,8 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) { uint64_t Index = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); - EVT VecVT = N->getOperand(0).getValueType(); - EVT ElVT = VecVT.getVectorElementType(); + MVT VecVT = N->getOperand(0).getValueType().getSimpleVT(); + MVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); return Index / NumElemsPerChunk; @@ -4162,8 +4191,8 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { uint64_t Index = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); - EVT VecVT = N->getValueType(0); - EVT ElVT = VecVT.getVectorElementType(); + MVT VecVT = N->getValueType(0).getSimpleVT(); + MVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); return Index / NumElemsPerChunk; @@ -4173,7 +4202,7 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { /// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions. /// Handles 256-bit. static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); + MVT VT = N->getValueType(0).getSimpleVT(); unsigned NumElts = VT.getVectorNumElements(); @@ -4193,17 +4222,18 @@ static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { - return ((isa<ConstantSDNode>(Elt) && - cast<ConstantSDNode>(Elt)->isNullValue()) || - (isa<ConstantFPSDNode>(Elt) && - cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Elt)) + return CN->isNullValue(); + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt)) + return CFP->getValueAPF().isPosZero(); + return false; } /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in /// their permute mask. static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned NumElems = VT.getVectorNumElements(); SmallVector<int, 8> MaskVec; @@ -4352,12 +4382,11 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - unsigned Size = VT.getSizeInBits(); // Always build SSE zero vectors as <4 x i32> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - if (Size == 128) { // SSE + if (VT.is128BitVector()) { // SSE if (Subtarget->hasSSE2()) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); @@ -4365,7 +4394,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); } - } else if (Size == 256) { // AVX + } else if (VT.is256BitVector()) { // AVX if (Subtarget->hasInt256()) { // AVX2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; @@ -4387,14 +4416,13 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately. /// Then bitcast to their original type, ensuring they get CSE'd. -static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG, +static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - unsigned Size = VT.getSizeInBits(); SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; - if (Size == 256) { + if (VT.is256BitVector()) { if (HasInt256) { // AVX2 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); @@ -4402,7 +4430,7 @@ static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG, Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); } - } else if (Size == 128) { + } else if (VT.is128BitVector()) { Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } else llvm_unreachable("Unexpected vector type"); @@ -4481,14 +4509,13 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { EVT VT = V.getValueType(); DebugLoc dl = V.getDebugLoc(); - unsigned Size = VT.getSizeInBits(); - if (Size == 128) { + if (VT.is128BitVector()) { V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V); int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32), &SplatMask[0]); - } else if (Size == 256) { + } else if (VT.is256BitVector()) { // To use VPERMILPS to splat scalars, the second half of indicies must // refer to the higher part, which is a duplication of the lower one, // because VPERMILPS can only handle in-lane permutations. @@ -4512,14 +4539,14 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { int EltNo = SV->getSplatIndex(); int NumElems = SrcVT.getVectorNumElements(); - unsigned Size = SrcVT.getSizeInBits(); + bool Is256BitVec = SrcVT.is256BitVector(); - assert(((Size == 128 && NumElems > 4) || Size == 256) && - "Unknown how to promote splat for type"); + assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) && + "Unknown how to promote splat for type"); // Extract the 128-bit part containing the splat element and update // the splat element index when it refers to the higher register. - if (Size == 256) { + if (Is256BitVec) { V1 = Extract128BitVector(V1, EltNo, DAG, dl); if (EltNo >= NumElems/2) EltNo -= NumElems/2; @@ -4536,7 +4563,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { // Recreate the 256-bit vector and place the same 128-bit vector // into the low and high part. This is necessary because we want // to use VPERM* to shuffle the vectors - if (Size == 256) { + if (Is256BitVec) { V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1); } @@ -4588,6 +4615,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::MOVLHPS: DecodeMOVLHPSMask(NumElems, Mask); break; + case X86ISD::PALIGNR: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + break; case X86ISD::PSHUFD: case X86ISD::VPERMILP: ImmN = N->getOperand(N->getNumOperands()-1); @@ -4631,7 +4662,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::MOVLPS: case X86ISD::MOVSHDUP: case X86ISD::MOVSLDUP: - case X86ISD::PALIGN: // Not yet implemented return false; default: llvm_unreachable("unknown target shuffle node"); @@ -5099,7 +5129,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasFp256()) return SDValue(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); assert((VT.is128BitVector() || VT.is256BitVector()) && @@ -5297,8 +5327,8 @@ SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT ExtVT = VT.getVectorElementType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT ExtVT = VT.getVectorElementType(); unsigned NumElems = Op.getNumOperands(); // Vectors containing all zeros can be matched by pxor and xorps later @@ -5314,7 +5344,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Vectors containing all ones can be matched by pcmpeqd on 128-bit width // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. - if (ISD::isBuildVectorAllOnes(Op.getNode())) { + if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256())) return Op; @@ -5629,7 +5659,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // to create 256-bit vectors from two other 128-bit ones. static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); - EVT ResVT = Op.getValueType(); + MVT ResVT = Op.getValueType().getSimpleVT(); assert(ResVT.is256BitVector() && "Value type must be 256-bit wide"); @@ -5655,8 +5685,8 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - EVT VT = SVOp->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + MVT VT = SVOp->getValueType(0).getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); if (!Subtarget->hasSSE41() || EltVT == MVT::i8) @@ -5667,41 +5697,40 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, // Check the mask for BLEND and build the value. unsigned MaskValue = 0; // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. - unsigned NumLanes = (NumElems-1)/8 + 1; + unsigned NumLanes = (NumElems-1)/8 + 1; unsigned NumElemsInLane = NumElems / NumLanes; // Blend for v16i16 should be symetric for the both lanes. for (unsigned i = 0; i < NumElemsInLane; ++i) { - int SndLaneEltIdx = (NumLanes == 2) ? + int SndLaneEltIdx = (NumLanes == 2) ? SVOp->getMaskElt(i + NumElemsInLane) : -1; int EltIdx = SVOp->getMaskElt(i); - if ((EltIdx == -1 || EltIdx == (int)i) && - (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane))) + if ((EltIdx < 0 || EltIdx == (int)i) && + (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane))) continue; - if (((unsigned)EltIdx == (i + NumElems)) && - (SndLaneEltIdx == -1 || + if (((unsigned)EltIdx == (i + NumElems)) && + (SndLaneEltIdx < 0 || (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane)) MaskValue |= (1<<i); - else + else return SDValue(); } // Convert i32 vectors to floating point if it is not AVX2. // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors. - EVT BlendVT = VT; + MVT BlendVT = VT; if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) { - BlendVT = EVT::getVectorVT(*DAG.getContext(), - EVT::getFloatingPointVT(EltVT.getSizeInBits()), - NumElems); + BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()), + NumElems); V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1); V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2); } - - SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2, - DAG.getConstant(MaskValue, MVT::i32)); + + SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2, + DAG.getConstant(MaskValue, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, VT, Ret); } @@ -5836,6 +5865,11 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, } } + // Promote splats to a larger type which usually leads to more efficient code. + // FIXME: Is this true if pshufb is available? + if (SVOp->isSplat()) + return PromoteSplat(SVOp, DAG); + // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. @@ -5851,7 +5885,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, int EltIdx = MaskVals[i] * 2; int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx; int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1; - pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8)); pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8)); } V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1); @@ -5969,6 +6003,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); ArrayRef<int> MaskVals = SVOp->getMask(); + // Promote splats to a larger type which usually leads to more efficient code. + // FIXME: Is this true if pshufb is available? + if (SVOp->isSplat()) + return PromoteSplat(SVOp, DAG); + // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. @@ -6087,7 +6126,7 @@ static SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); @@ -6134,8 +6173,9 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, /// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15> static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, DebugLoc dl) { + SelectionDAG &DAG) { MVT VT = SVOp->getValueType(0).getSimpleVT(); + DebugLoc dl = SVOp->getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); MVT NewVT; unsigned Scale; @@ -6171,7 +6211,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, /// getVZextMovL - Return a zero-extending vector move low node. /// -static SDValue getVZextMovL(EVT VT, EVT OpVT, +static SDValue getVZextMovL(MVT VT, EVT OpVT, SDValue SrcOp, SelectionDAG &DAG, const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { @@ -6213,14 +6253,14 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { if (NewOp.getNode()) return NewOp; - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned NumElems = VT.getVectorNumElements(); unsigned NumLaneElems = NumElems / 2; DebugLoc dl = SVOp->getDebugLoc(); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems); + MVT EltVT = VT.getVectorElementType(); + MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems); SDValue Output[2]; SmallVector<int, 16> Mask; @@ -6325,7 +6365,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getValueType(0).getSimpleVT(); assert(VT.is128BitVector() && "Unsupported vector size"); @@ -6579,7 +6619,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { // Reduce a vector shuffle to zext. SDValue -X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { +X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { // PMOVZX is only available from SSE41. if (!Subtarget->hasSSE41()) return SDValue(); @@ -6623,9 +6663,10 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } + LLVMContext *Context = DAG.getContext(); unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift; - EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift); + EVT NeVT = EVT::getIntegerVT(*Context, NBits); + EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift); if (!isTypeLegal(NVT)) return SDValue(); @@ -6644,8 +6685,21 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { // If it's foldable, i.e. normal load with single use, we will let code // selection to fold it. Otherwise, we will short the conversion sequence. if (CIdx && CIdx->getZExtValue() == 0 && - (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) + (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) { + if (V.getValueSizeInBits() > V1.getValueSizeInBits()) { + // The "ext_vec_elt" node is wider than the result node. + // In this case we should extract subvector from V. + // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)). + unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits(); + EVT FullVT = V.getValueType(); + EVT SubVecVT = EVT::getVectorVT(*Context, + FullVT.getVectorElementType(), + FullVT.getVectorNumElements()/Ratio); + V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V, + DAG.getIntPtrConstant(0)); + } V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V); + } } return DAG.getNode(ISD::BITCAST, DL, VT, @@ -6655,7 +6709,7 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -6665,25 +6719,14 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // Handle splat operations if (SVOp->isSplat()) { - unsigned NumElem = VT.getVectorNumElements(); - int Size = VT.getSizeInBits(); - // Use vbroadcast whenever the splat comes from a foldable load SDValue Broadcast = LowerVectorBroadcast(Op, DAG); if (Broadcast.getNode()) return Broadcast; - - // Handle splats by matching through known shuffle masks - if ((Size == 128 && NumElem <= 4) || - (Size == 256 && NumElem <= 8)) - return SDValue(); - - // All remaning splats are promoted to target supported vector shuffles. - return PromoteSplat(SVOp, DAG); } // Check integer expanding shuffles. - SDValue NewOp = lowerVectorIntExtend(Op, DAG); + SDValue NewOp = LowerVectorIntExtend(Op, DAG); if (NewOp.getNode()) return NewOp; @@ -6691,7 +6734,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 || VT == MVT::v32i8) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); } else if ((VT == MVT::v4i32 || @@ -6699,18 +6742,18 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { - EVT NewVT = NewOp.getValueType(); + MVT NewVT = NewOp.getValueType().getSimpleVT(); if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT, true, false)) return getVZextMovL(VT, NewVT, NewOp.getOperand(0), DAG, Subtarget, dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { - EVT NewVT = NewOp.getValueType(); + MVT NewVT = NewOp.getValueType().getSimpleVT(); if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT)) return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget, dl); @@ -6725,7 +6768,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; @@ -6816,7 +6859,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. - EVT EltVT = VT.getVectorElementType(); + MVT EltVT = VT.getVectorElementType(); ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } @@ -6855,7 +6898,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (isShift) { // No better options. Use a vshldq / vsrldq. - EVT EltVT = VT.getVectorElementType(); + MVT EltVT = VT.getVectorElementType(); ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } @@ -6926,7 +6969,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // nodes, and remove one by one until they don't return Op anymore. if (isPALIGNRMask(M, VT, Subtarget)) - return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, + return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2, getShufflePALIGNRImmediate(SVOp), DAG); @@ -7035,13 +7078,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue -X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); +static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); - if (!Op.getOperand(0).getValueType().is128BitVector()) + if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector()) return SDValue(); if (VT.getSizeInBits() == 8) { @@ -7106,7 +7147,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return SDValue(); SDValue Vec = Op.getOperand(0); - EVT VecVT = Vec.getValueType(); + MVT VecVT = Vec.getValueType().getSimpleVT(); // If this is a 256-bit vector result, first extract the 128-bit vector and // then extract the element from the 128-bit vector. @@ -7133,7 +7174,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return Res; } - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); DebugLoc dl = Op.getDebugLoc(); // TODO: handle v16i8. if (VT.getSizeInBits() == 16) { @@ -7146,7 +7187,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. - EVT EltVT = MVT::i32; + MVT EltVT = MVT::i32; SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT, Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract, @@ -7161,7 +7202,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // SHUFPS the element to the lowest double word, then movss. int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 }; - EVT VVT = Op.getOperand(0).getValueType(); + MVT VVT = Op.getOperand(0).getValueType().getSimpleVT(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, @@ -7180,7 +7221,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. int Mask[2] = { 1, -1 }; - EVT VVT = Op.getOperand(0).getValueType(); + MVT VVT = Op.getOperand(0).getValueType().getSimpleVT(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, @@ -7190,11 +7231,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return SDValue(); } -SDValue -X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - EVT EltVT = VT.getVectorElementType(); +static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SDValue N0 = Op.getOperand(0); @@ -7247,8 +7286,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - EVT EltVT = VT.getVectorElementType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SDValue N0 = Op.getOperand(0); @@ -7296,7 +7335,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - EVT OpVT = Op.getValueType(); + MVT OpVT = Op.getValueType().getSimpleVT(); // If this is a 256-bit vector result, first insert into a 128-bit // vector and then insert into the 256-bit vector. @@ -7511,8 +7550,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, - int64_t Offset, - SelectionDAG &DAG) const { + int64_t Offset, SelectionDAG &DAG) const { // Create the TargetGlobalAddress node, folding in the constant // offset if it is legal. unsigned char OpFlags = @@ -7732,7 +7770,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, Subtarget->is64Bit(), - getTargetMachine().getRelocationModel() == Reloc::PIC_); + getTargetMachine().getRelocationModel() == Reloc::PIC_); } llvm_unreachable("Unknown TLS model."); } @@ -8015,9 +8053,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SmallVector<Constant*,2> CV1; CV1.push_back( - ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); + ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, 0x4330000000000000ULL)))); CV1.push_back( - ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); + ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, 0x4530000000000000ULL)))); Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); @@ -8111,7 +8151,8 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, SVT == MVT::v8i8 || SVT == MVT::v8i16) && "Custom UINT_TO_FP is not supported!"); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements()); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, + SVT.getVectorNumElements()); return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0)); } @@ -8204,8 +8245,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0)); } -std::pair<SDValue,SDValue> X86TargetLowering:: -FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const { +std::pair<SDValue,SDValue> +X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, + bool IsSigned, bool IsReplace) const { DebugLoc DL = Op.getDebugLoc(); EVT DstTy = Op.getValueType(); @@ -8299,9 +8341,9 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { - EVT VT = Op->getValueType(0); + MVT VT = Op->getValueType(0).getSimpleVT(); SDValue In = Op->getOperand(0); - EVT InVT = In.getValueType(); + MVT InVT = In.getValueType().getSimpleVT(); DebugLoc dl = Op->getDebugLoc(); // Optimize vectors in AVX mode: @@ -8330,7 +8372,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); - EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + MVT HVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements()/2); OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); @@ -8352,9 +8394,9 @@ SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op, SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); - EVT SVT = In.getValueType(); + MVT SVT = In.getValueType().getSimpleVT(); if (Subtarget->hasFp256()) { SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); @@ -8382,11 +8424,11 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi); } -SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); - EVT SVT = In.getValueType(); + MVT SVT = In.getValueType().getSimpleVT(); if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. @@ -8501,9 +8543,10 @@ SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { - if (Op.getValueType().isVector()) { - if (Op.getValueType() == MVT::v8i16) - return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), Op.getValueType(), + MVT VT = Op.getValueType().getSimpleVT(); + if (VT.isVector()) { + if (VT == MVT::v8i16) + return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), VT, DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(), MVT::v8i32, Op.getOperand(0))); return SDValue(); @@ -8542,12 +8585,11 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, return FIST; } -SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); SDValue In = Op.getOperand(0); - EVT SVT = In.getValueType(); + MVT SVT = In.getValueType().getSimpleVT(); assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); @@ -8559,8 +8601,8 @@ SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT EltVT = VT; + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT; unsigned NumElts = VT == MVT::f64 ? 2 : 4; if (VT.isVector()) { EltVT = VT.getVectorElementType(); @@ -8568,9 +8610,11 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { } Constant *C; if (EltVT == MVT::f64) - C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, ~(1ULL << 63)))); else - C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle, + APInt(32, ~(1U << 31)))); C = ConstantVector::getSplat(NumElts, C); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); @@ -8591,8 +8635,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT EltVT = VT; + MVT VT = Op.getValueType().getSimpleVT(); + MVT EltVT = VT; unsigned NumElts = VT == MVT::f64 ? 2 : 4; if (VT.isVector()) { EltVT = VT.getVectorElementType(); @@ -8600,9 +8644,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { } Constant *C; if (EltVT == MVT::f64) - C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, + APInt(64, 1ULL << 63))); else - C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); + C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle, + APInt(32, 1U << 31))); C = ConstantVector::getSplat(NumElts, C); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); @@ -8626,8 +8672,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT SrcVT = Op1.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); + MVT SrcVT = Op1.getValueType().getSimpleVT(); // If second operand is smaller, extend it first. if (SrcVT.bitsLT(VT)) { @@ -8646,13 +8692,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // First get the sign bit of second operand. SmallVector<Constant*,4> CV; if (SrcVT == MVT::f64) { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); + const fltSemantics &Sem = APFloat::IEEEdouble; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0)))); } else { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + const fltSemantics &Sem = APFloat::IEEEsingle; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); } Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -8675,13 +8723,17 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Clear first operand sign bit. CV.clear(); if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); + const fltSemantics &Sem = APFloat::IEEEdouble; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, + APInt(64, ~(1ULL << 63))))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0)))); } else { - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + const fltSemantics &Sem = APFloat::IEEEsingle; + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, + APInt(32, ~(1U << 31))))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); } C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -8697,7 +8749,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1). SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0, @@ -8707,7 +8759,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { // LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able. // -SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree."); if (!Subtarget->hasSSE41()) @@ -9139,65 +9192,10 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, return SDValue(); } -SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - - if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG); - - assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - - // Optimize to BT if possible. - // Lower (X & (1 << N)) == 0 to BT(X, N). - // Lower ((X >>u N) & 1) != 0 to BT(X, N). - // Lower ((X >>s N) & 1) != 0 to BT(X, N). - if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && - Op1.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op1)->isNullValue() && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { - SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); - if (NewSetCC.getNode()) - return NewSetCC; - } - - // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of - // these. - if (Op1.getOpcode() == ISD::Constant && - (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || - cast<ConstantSDNode>(Op1)->isNullValue()) && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { - - // If the input is a setcc, then reuse the input setcc or use a new one with - // the inverted condition. - if (Op0.getOpcode() == X86ISD::SETCC) { - X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); - bool Invert = (CC == ISD::SETNE) ^ - cast<ConstantSDNode>(Op1)->isNullValue(); - if (!Invert) return Op0; - - CCode = X86::GetOppositeBranchCondition(CCode); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); - } - } - - bool isFP = Op1.getValueType().isFloatingPoint(); - unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); - if (X86CC == X86::COND_INVALID) - return SDValue(); - - SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); - EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), EFLAGS); -} - // Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128 // ones, and then concatenate the result back. static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC && "Unsupported value type for operation"); @@ -9217,26 +9215,27 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl); // Issue the operation on the smaller types and concatenate the result back - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + MVT EltVT = VT.getVectorElementType(); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC), DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); } -SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDValue Cond; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); - bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); + bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint(); DebugLoc dl = Op.getDebugLoc(); if (isFP) { #ifndef NDEBUG - EVT EltVT = Op0.getValueType().getVectorElementType(); + MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT(); assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif @@ -9377,6 +9376,63 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { return Result; } +SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + + MVT VT = Op.getValueType().getSimpleVT(); + + if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); + + assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + + // Optimize to BT if possible. + // Lower (X & (1 << N)) == 0 to BT(X, N). + // Lower ((X >>u N) & 1) != 0 to BT(X, N). + // Lower ((X >>s N) & 1) != 0 to BT(X, N). + if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && + Op1.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Op1)->isNullValue() && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); + if (NewSetCC.getNode()) + return NewSetCC; + } + + // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of + // these. + if (Op1.getOpcode() == ISD::Constant && + (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + cast<ConstantSDNode>(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + + // If the input is a setcc, then reuse the input setcc or use a new one with + // the inverted condition. + if (Op0.getOpcode() == X86ISD::SETCC) { + X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); + bool Invert = (CC == ISD::SETNE) ^ + cast<ConstantSDNode>(Op1)->isNullValue(); + if (!Invert) return Op0; + + CCode = X86::GetOppositeBranchCondition(CCode); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + } + } + + bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint(); + unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); + if (X86CC == X86::COND_INVALID) + return SDValue(); + + SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); + EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), EFLAGS); +} + // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getNode()->getOpcode(); @@ -9499,7 +9555,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); - EVT VT = Op.getValueType(); + MVT VT = Op.getValueType().getSimpleVT(); bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && @@ -9610,9 +9666,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op->getValueType(0); + MVT VT = Op->getValueType(0).getSimpleVT(); SDValue In = Op->getOperand(0); - EVT InVT = In.getValueType(); + MVT InVT = In.getValueType().getSimpleVT(); DebugLoc dl = Op->getDebugLoc(); if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && @@ -9646,7 +9702,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), VT.getVectorNumElements()/2); OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); @@ -10155,7 +10211,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } -// getTargetVShiftNOde - Handle vector element shifts where the shift amount +// getTargetVShiftNode - Handle vector element shifts where the shift amount // may or may not be a constant. Takes immediate version of shift as input. static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, SDValue SrcOp, SDValue ShAmt, @@ -11377,13 +11433,55 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); } +SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + EVT EltTy = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + SDValue N0 = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + + // Lower sdiv X, pow2-const. + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1)); + if (!C) + return SDValue(); + + APInt SplatValue, SplatUndef; + unsigned MinSplatBits; + bool HasAnyUndefs; + if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs)) + return SDValue(); + + if ((SplatValue != 0) && + (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) { + unsigned lg2 = SplatValue.countTrailingZeros(); + // Splat the sign bit. + SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32); + SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG); + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32); + SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG); + SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL); + SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32); + SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (SplatValue.isNonNegative()) + return SRA; + + SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts); + return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA); + } + return SDValue(); +} + SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - LLVMContext *Context = DAG.getContext(); if (!Subtarget->hasSSE2()) return SDValue(); @@ -11500,17 +11598,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { - Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1), - DAG.getConstant(23, MVT::i32)); - - const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U}; - Constant *C = ConstantDataVector::get(*Context, CV); - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); - SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); + Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT)); - Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend); + Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT)); Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); @@ -11519,8 +11609,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq."); // a = a << 5; - Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1), - DAG.getConstant(5, MVT::i32)); + Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT)); Op = DAG.getNode(ISD::BITCAST, dl, VT, Op); // Turn 'a' into a mask suitable for VSELECT @@ -11952,6 +12041,43 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } +SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit()); + + // For MacOSX, we want to call an alternative entry point: __sincos_stret, + // which returns the values in two XMM registers. + DebugLoc dl = Op.getDebugLoc(); + SDValue Arg = Op.getOperand(0); + EVT ArgVT = Arg.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + ArgListTy Args; + ArgListEntry Entry; + + Entry.Node = Arg; + Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Only optimize x86_64 for now. i386 is a bit messy. For f32, + // the small struct {f32, f32} is returned in (eax, edx). For f64, + // the results are returned via SRet in memory. + const char *LibcallName = (ArgVT == MVT::f64) + ? "__sincos_stret" : "__sincosf_stret"; + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + + StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, + false, false, false, false, 0, + CallingConv::C, /*isTaillCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed*/true, + Callee, Args, DAG, dl); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); + return CallResult.first; +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -11981,13 +12107,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); - case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG); + case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); - case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); @@ -12033,6 +12159,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); + case ISD::SDIV: return LowerSDIV(Op, DAG); + case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); } } @@ -12372,7 +12500,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; - case X86ISD::PALIGN: return "X86ISD::PALIGN"; + case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; @@ -12783,7 +12911,7 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, MachineFunction::iterator I = MBB; ++I; - assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 && + assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && "Unexpected number of operands"); assert(MI->hasOneMemOperand() && @@ -13015,7 +13143,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, MachineFunction::iterator I = MBB; ++I; - assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && + assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 && "Unexpected number of operands"); assert(MI->hasOneMemOperand() && @@ -15246,13 +15374,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) { APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue(); - if (CondRHS.getConstantOperandVal(0) == -A-1) { - SmallVector<SDValue, 32> V(VT.getVectorNumElements(), - DAG.getConstant(-A, VT.getScalarType())); + if (CondRHS.getConstantOperandVal(0) == -A-1) return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, - DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - V.data(), V.size())); - } + DAG.getConstant(-A, VT)); } // Another special case: If C was a sign bit, the sub has been @@ -15552,7 +15676,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, ConstantSDNode *CmpAgainst = 0; if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) && - dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) { + !isa<ConstantSDNode>(Cond.getOperand(0))) { if (CC == X86::COND_NE && CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) { @@ -15832,8 +15956,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, if (VT == MVT::f32 || VT == MVT::f64) { bool ExpectingFlags = false; // Check for any users that want flags: - for (SDNode::use_iterator UI = N->use_begin(), - UE = N->use_end(); + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); !ExpectingFlags && UI != UE; ++UI) switch (UI->getOpcode()) { default: @@ -15920,7 +16043,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); - if (VT.getSizeInBits() != 256) + if (!VT.is256BitVector()) return SDValue(); assert((N->getOpcode() == ISD::ANY_EXTEND || @@ -15929,7 +16052,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, SDValue Narrow = N->getOperand(0); EVT NarrowVT = Narrow->getValueType(0); - if (NarrowVT.getSizeInBits() != 128) + if (!NarrowVT.is128BitVector()) return SDValue(); if (Narrow->getOpcode() != ISD::XOR && @@ -16125,11 +16248,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, DebugLoc DL = N->getDebugLoc(); - // We are going to replace the AND, OR, NAND with either BLEND - // or PSIGN, which only look at the MSB. The VSRAI instruction - // does not affect the highest bit, so we can get rid of it. - Mask = Mask.getOperand(0); - // Now we know we at least have a plendvb with the mask val. See if // we can form a psignb/w/d. // psign = x.type == y.type == mask.type && y = sub(0, x); @@ -16138,7 +16256,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Unsupported VT for PSIGN"); - Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask); + Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0)); return DAG.getNode(ISD::BITCAST, DL, VT, Mask); } // PBLENDVB only available on SSE 4.1 @@ -16296,8 +16414,42 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, EVT MemVT = Ld->getMemoryVT(); DebugLoc dl = Ld->getDebugLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned RegSz = RegVT.getSizeInBits(); ISD::LoadExtType Ext = Ld->getExtensionType(); + unsigned Alignment = Ld->getAlignment(); + bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8; + + // On Sandybridge unaligned 256bit loads are inefficient. + if (RegVT.is256BitVector() && !Subtarget->hasInt256() && + !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) { + unsigned NumElems = RegVT.getVectorNumElements(); + if (NumElems < 2) + return SDValue(); + + SDValue Ptr = Ld->getBasePtr(); + SDValue Increment = DAG.getConstant(16, TLI.getPointerTy()); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + NumElems/2); + SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo(), Ld->isVolatile(), + Ld->isNonTemporal(), Ld->isInvariant(), + Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo(), Ld->isVolatile(), + Ld->isNonTemporal(), Ld->isInvariant(), + std::max(Alignment/2U, 1U)); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Load1.getValue(1), + Load2.getValue(1)); + + SDValue NewVec = DAG.getUNDEF(RegVT); + NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl); + NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl); + return DCI.CombineTo(N, NewVec, TF, true); + } // If this is a vector EXT Load then attempt to optimize it using a // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the @@ -16312,7 +16464,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, assert(MemVT.isVector() && "Must load a vector from memory"); unsigned NumElems = RegVT.getVectorNumElements(); - unsigned RegSz = RegVT.getSizeInBits(); unsigned MemSz = MemVT.getSizeInBits(); assert(RegSz > MemSz && "Register size must be greater than the mem size"); @@ -16356,8 +16507,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // Represent the data using the same element type that is stored in // memory. In practice, we ''widen'' MemVT. - EVT WideVecVT = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + EVT WideVecVT = + EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), loadRegZize/MemVT.getScalarType().getSizeInBits()); assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && @@ -16426,10 +16577,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // Build the arithmetic shift. unsigned Amt = RegVT.getVectorElementType().getSizeInBits() - MemVT.getVectorElementType().getSizeInBits(); - SmallVector<SDValue, 8> C(NumElems, - DAG.getConstant(Amt, RegVT.getScalarType())); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size()); - Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV); + Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, + DAG.getConstant(Amt, RegVT)); return DCI.CombineTo(N, Shuff, TF, true); } @@ -16462,16 +16611,21 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = St->getDebugLoc(); SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned Alignment = St->getAlignment(); + bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8; // If we are saving a concatenation of two XMM registers, perform two stores. // On Sandy Bridge, 256-bit memory operations are executed by two // 128-bit ports. However, on Haswell it is better to issue a single 256-bit // memory operation. if (VT.is256BitVector() && !Subtarget->hasInt256() && - StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS && - StoredVal.getNumOperands() == 2) { - SDValue Value0 = StoredVal.getOperand(0); - SDValue Value1 = StoredVal.getOperand(1); + StVT == VT && !IsAligned) { + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems < 2) + return SDValue(); + + SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl); + SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl); SDValue Stride = DAG.getConstant(16, TLI.getPointerTy()); SDValue Ptr0 = St->getBasePtr(); @@ -16479,10 +16633,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + St->isNonTemporal(), Alignment); SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + St->isNonTemporal(), + std::max(Alignment/2U, 1U)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } @@ -16917,6 +17072,41 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (!VT.isVector()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT ExtraVT = cast<VTSDNode>(N1)->getVT(); + DebugLoc dl = N->getDebugLoc(); + + // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the + // both SSE and AVX2 since there is no sign-extended shift right + // operation on a vector with 64-bit elements. + //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> + // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT))) + if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND)) { + SDValue N00 = N0.getOperand(0); + + // EXTLOAD has a better solution on AVX2, + // it may be replaced with X86ISD::VSEXT node. + if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256()) + if (!ISD::isNormalLoad(N00.getNode())) + return SDValue(); + + if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) { + SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, + N00, N1); + return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp); + } + } + return SDValue(); +} + static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -17002,7 +17192,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, } } - if (VT.isVector() && VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); if (R.getNode()) return R; @@ -17037,8 +17227,8 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// Helper function of PerformSETCCCombine. It is to materialize "setb reg" -// as "sbb reg,reg", since it can be extended without zext and produces +// Helper function of PerformSETCCCombine. It is to materialize "setb reg" +// as "sbb reg,reg", since it can be extended without zext and produces // an all-ones bit which is more useful than 0/1 in some cases. static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) { return DAG.getNode(ISD::AND, DL, MVT::i8, @@ -17056,13 +17246,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, SDValue EFLAGS = N->getOperand(1); if (CC == X86::COND_A) { - // Try to convert COND_A into COND_B in an attempt to facilitate + // Try to convert COND_A into COND_B in an attempt to facilitate // materializing "setb reg". // // Do not flip "e > c", where "c" is a constant, because Cmp instruction // cannot take an immediate as its first operand. // - if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && EFLAGS.getValueType().isInteger() && !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(), @@ -17270,7 +17460,8 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, if (In.getOpcode() != X86ISD::VZEXT) return SDValue(); - return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0)); + return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), + In.getOperand(0)); } SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, @@ -17308,13 +17499,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); + case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles - case X86ISD::PALIGN: + case X86ISD::PALIGNR: case X86ISD::UNPCKH: case X86ISD::UNPCKL: case X86ISD::MOVHLPS: @@ -17497,7 +17689,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces.clear(); const std::string &ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); - std::sort(AsmPieces.begin(), AsmPieces.end()); + array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (AsmPieces.size() == 4 && AsmPieces[0] == "~{cc}" && AsmPieces[1] == "~{dirflag}" && @@ -17515,7 +17707,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces.clear(); const std::string &ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); - std::sort(AsmPieces.begin(), AsmPieces.end()); + array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (AsmPieces.size() == 4 && AsmPieces[0] == "~{cc}" && AsmPieces[1] == "~{dirflag}" && @@ -17995,7 +18187,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // really want an 8-bit or 32-bit register, map to the appropriate register // class and return the appropriate register. if (Res.second == &X86::GR16RegClass) { - if (VT == MVT::i8) { + if (VT == MVT::i8 || VT == MVT::i1) { unsigned DestReg = 0; switch (Res.first) { default: break; @@ -18008,7 +18200,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, Res.first = DestReg; Res.second = &X86::GR8RegClass; } - } else if (VT == MVT::i32) { + } else if (VT == MVT::i32 || VT == MVT::f32) { unsigned DestReg = 0; switch (Res.first) { default: break; @@ -18025,7 +18217,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, Res.first = DestReg; Res.second = &X86::GR32RegClass; } - } else if (VT == MVT::i64) { + } else if (VT == MVT::i64 || VT == MVT::f64) { unsigned DestReg = 0; switch (Res.first) { default: break; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 16ce364..958ceb0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -234,11 +234,8 @@ namespace llvm { // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. EH_SJLJ_LONGJMP, - /// TC_RETURN - Tail call return. - /// operand #0 chain - /// operand #1 callee (register or absolute) - /// operand #2 stack adjustment - /// operand #3 optional in flag + /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for + /// the list of operands. TC_RETURN, // VZEXT_MOVL - Vector move low and zero extend. @@ -294,7 +291,7 @@ namespace llvm { TESTP, // Several flavors of instructions with vector shuffle behaviors. - PALIGN, + PALIGNR, PSHUFD, PSHUFHW, PSHUFLW, @@ -794,9 +791,7 @@ namespace llvm { SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, @@ -811,20 +806,18 @@ namespace llvm { SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToBT(SDValue And, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; @@ -841,8 +834,9 @@ namespace llvm { SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; - + SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const; @@ -851,7 +845,7 @@ namespace llvm { SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index 54b91c3..bb362f5 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -87,12 +87,10 @@ defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">; def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), - "prefetch $addr", []>; + "prefetch\t$addr", []>; -// FIXME: Diassembler gets a bogus decode conflict. -let isAsmParserOnly = 1 in def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), - "prefetchw $addr", []>; + "prefetchw\t$addr", []>; // "3DNowA" instructions defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 0eecd5f..d86a406 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -29,11 +29,11 @@ def LEA32r : I<0x8D, MRMSrcMem, def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)], IIC_LEA>, + [(set GR32:$dst, lea64_32addr:$src)], IIC_LEA>, Requires<[In64BitMode]>; let isReMaterializable = 1 in -def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), +def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; @@ -1256,3 +1256,49 @@ let Predicates = [HasBMI2] in { let Uses = [RDX] in defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W; } + +//===----------------------------------------------------------------------===// +// ADCX Instruction +// +let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "adcx{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8, OpSize; + + def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "adcx{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + + let mayLoad = 1 in { + def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "adcx{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8, OpSize; + + def ADCX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "adcx{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + } +} + +//===----------------------------------------------------------------------===// +// ADOX Instruction +// +let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "adox{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8XS; + + def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "adox{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>; + + let mayLoad = 1 in { + def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "adox{l}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8XS; + + def ADOX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "adox{q}\t{$src, $dst|$dst, $src}", + [], IIC_BIN_MEM>, T8XS, REX_W, Requires<[In64BitMode]>; + } +} diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 2a26a22..734e598 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -513,15 +513,19 @@ def CMOV_RFP80 : I<0, Pseudo, multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> { let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { + let Defs = [EFLAGS, AL] in def NAME#8 : I<0, Pseudo, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), !strconcat(mnemonic, "8 PSEUDO!"), []>; + let Defs = [EFLAGS, AX] in def NAME#16 : I<0, Pseudo,(outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), !strconcat(mnemonic, "16 PSEUDO!"), []>; + let Defs = [EFLAGS, EAX] in def NAME#32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), !strconcat(mnemonic, "32 PSEUDO!"), []>; + let Defs = [EFLAGS, RAX] in def NAME#64 : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), !strconcat(mnemonic, "64 PSEUDO!"), []>; @@ -559,7 +563,8 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">; defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> { - let usesCustomInserter = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in + let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX], + mayLoad = 1, mayStore = 1, hasSideEffects = 0 in def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), (ins i64mem:$ptr, GR32:$val1, GR32:$val2), !strconcat(mnemonic, "6432 PSEUDO!"), []>; @@ -1076,12 +1081,14 @@ def : Pat<(X86cmp GR64:$src1, 0), // inverted. multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32, Instruction Inst64> { - def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), - (Inst16 GR16:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), - (Inst32 GR32:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), - (Inst64 GR64:$src2, addr:$src1)>; + let Predicates = [HasCMov] in { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), + (Inst16 GR16:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), + (Inst32 GR32:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), + (Inst64 GR64:$src2, addr:$src1)>; + } } defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>; diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index f48f133..7759a8a 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -60,14 +60,14 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, PatFrag MemFrag128, PatFrag MemFrag256, SDNode Op, ValueType OpTy128, ValueType OpTy256> { defm r213 : fma3p_rm<opc213, - !strconcat(OpcodeStr, !strconcat("213", PackTy)), + !strconcat(OpcodeStr, "213", PackTy), MemFrag128, MemFrag256, OpTy128, OpTy256, Op>; let neverHasSideEffects = 1 in { defm r132 : fma3p_rm<opc132, - !strconcat(OpcodeStr, !strconcat("132", PackTy)), + !strconcat(OpcodeStr, "132", PackTy), MemFrag128, MemFrag256, OpTy128, OpTy256>; defm r231 : fma3p_rm<opc231, - !strconcat(OpcodeStr, !strconcat("231", PackTy)), + !strconcat(OpcodeStr, "231", PackTy), MemFrag128, MemFrag256, OpTy128, OpTy256>; } // neverHasSideEffects = 1 } @@ -160,15 +160,15 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, X86MemOperand x86memop, Operand memop, PatFrag mem_frag, ComplexPattern mem_cpat> { let neverHasSideEffects = 1 in { - defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)), + defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy), x86memop, RC, OpVT, mem_frag>; - defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)), + defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy), x86memop, RC, OpVT, mem_frag>; } -defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), +defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy), x86memop, RC, OpVT, mem_frag, OpNode>, - fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), + fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy), memop, mem_cpat, Int, RC>; } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 6151d5c..44e574d 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -570,7 +570,7 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, // FMA4 Instruction Templates class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, TA, + : Ii8<o, F, outs, ins, asm, pattern, itin>, TA, OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7025e93..2a72fb6 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -160,7 +160,7 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; -def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; +def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 9ecf5e2..d989ec7 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -525,6 +525,13 @@ def lea64_32mem : Operand<i32> { let ParserMatchClass = X86MemAsmOperand; } +// Memory operands that use 64-bit pointers in both ILP32 and LP64. +def lea64mem : Operand<i64> { + let PrintMethod = "printi64mem"; + let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. @@ -535,6 +542,12 @@ def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>; def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr", [add, sub, mul, X86mul_imm, shl, or, frameindex], []>; +// In 64-bit mode 32-bit LEAs can use RIP-relative addressing. +def lea64_32addr : ComplexPattern<i32, 5, "SelectLEAAddr", + [add, sub, mul, X86mul_imm, shl, or, + frameindex, X86WrapperRIP], + []>; + def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; @@ -590,6 +603,7 @@ def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def HasRTM : Predicate<"Subtarget->hasRTM()">; +def HasADX : Predicate<"Subtarget->hasADX()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; @@ -856,16 +870,14 @@ let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, Requires<[In64BitMode]>; - - let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], mayLoad=1, neverHasSideEffects=1 in { -def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", [], IIC_POP_A>, +def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>, Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], mayStore=1, neverHasSideEffects=1 in { -def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", [], IIC_PUSH_A>, +def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>, Requires<[In32BitMode]>; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3175324..0979752 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -436,93 +436,69 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // in terms of a copy, and just mentioned, we don't use movss/movsd for copies. //===----------------------------------------------------------------------===// -class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> : - SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, - [(set VR128:$dst, (vt (OpNode VR128:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>; +multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, + X86MemOperand x86memop, string base_opc, + string asm_opr> { + def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, RC:$src2), + !strconcat(base_opc, asm_opr), + [(set VR128:$dst, (vt (OpNode VR128:$src1, + (scalar_to_vector RC:$src2))))], + IIC_SSE_MOV_S_RR>; -// Loading from memory automatically zeroing upper bits. -class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, - PatFrag mem_pat, string OpcodeStr> : - SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; - -// AVX -def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32, - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V, - VEX_LIG; -def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64, - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V, - VEX_LIG; - -// For the disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { - def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XS, VEX_4V, VEX_LIG; - def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XD, VEX_4V, VEX_LIG; + // For the disassembler + let isCodeGenOnly = 1, hasSideEffects = 0 in + def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, RC:$src2), + !strconcat(base_opc, asm_opr), + [], IIC_SSE_MOV_S_RR>; } -let canFoldAsLoad = 1, isReMaterializable = 1 in { - def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX, - VEX_LIG; - let AddedComplexity = 20 in - def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX, - VEX_LIG; -} +multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, + X86MemOperand x86memop, string OpcodeStr> { + // AVX + defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, + VEX_4V, VEX_LIG; -def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - XS, VEX, VEX_LIG; -def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - XD, VEX, VEX_LIG; + def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + VEX, VEX_LIG; + // SSE1 & 2 + let Constraints = "$src1 = $dst" in { + defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, + "\t{$src2, $dst|$dst, $src2}">; + } -// SSE1 & 2 -let Constraints = "$src1 = $dst" in { - def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32, - "movss\t{$src2, $dst|$dst, $src2}">, XS; - def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64, - "movsd\t{$src2, $dst|$dst, $src2}">, XD; + def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; +} - // For the disassembler - let isCodeGenOnly = 1, hasSideEffects = 0 in { - def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", [], - IIC_SSE_MOV_S_RR>, XS; - def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", [], - IIC_SSE_MOV_S_RR>, XD; - } +// Loading from memory automatically zeroing upper bits. +multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_pat, string OpcodeStr> { + def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG; + def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], + IIC_SSE_MOV_S_RM>; } +defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS; +defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD; + let canFoldAsLoad = 1, isReMaterializable = 1 in { - def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS; + defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS; let AddedComplexity = 20 in - def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; + defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; } -def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; -def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; - // Patterns let Predicates = [HasAVX] in { let AddedComplexity = 15 in { @@ -1110,34 +1086,41 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), // SSE 1 & 2 - Move Low packed FP Instructions //===----------------------------------------------------------------------===// -multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, - SDNode psnode, SDNode pdnode, string base_opc, - string asm_opr, InstrItinClass itin> { +multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, + string base_opc, string asm_opr, + InstrItinClass itin> { def PSrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "s", asm_opr), - [(set RC:$dst, - (psnode RC:$src1, + [(set VR128:$dst, + (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], itin, SSEPackedSingle>, TB; def PDrm : PI<opc, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f64mem:$src2), + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), - [(set RC:$dst, (v2f64 (pdnode RC:$src1, + [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], itin, SSEPackedDouble>, TB, OpSize; + } -let AddedComplexity = 20 in { - defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}", - IIC_SSE_MOV_LH>, VEX_4V; +multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode, + string base_opc, InstrItinClass itin> { + defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", + itin>, VEX_4V; + +let Constraints = "$src1 = $dst" in + defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, + "\t{$src2, $dst|$dst, $src2}", + itin>; } -let Constraints = "$src1 = $dst", AddedComplexity = 20 in { - defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", - "\t{$src2, $dst|$dst, $src2}", - IIC_SSE_MOV_LH>; + +let AddedComplexity = 20 in { + defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp", + IIC_SSE_MOV_LH>; } def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1235,14 +1218,8 @@ let Predicates = [UseSSE2] in { //===----------------------------------------------------------------------===// let AddedComplexity = 20 in { - defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}", - IIC_SSE_MOV_LH>, VEX_4V; -} -let Constraints = "$src1 = $dst", AddedComplexity = 20 in { - defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", - "\t{$src2, $dst|$dst, $src2}", - IIC_SSE_MOV_LH>; + defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp", + IIC_SSE_MOV_LH>; } // v2f64 extract element 1 is always custom lowered to unpack high to low @@ -3012,18 +2989,18 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG; } @@ -3054,18 +3031,18 @@ multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG; } @@ -3100,22 +3077,22 @@ multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, VEX; def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, VEX; def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], itins.rr>, VEX, VEX_L; def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], itins.rm>, VEX, VEX_L; @@ -3135,23 +3112,23 @@ multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], itins.rr>, VEX; def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], itins.rm>, VEX; def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int VR256:$src))], itins.rr>, VEX, VEX_L; def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))], itins.rm>, VEX, VEX_L; @@ -3173,18 +3150,18 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG; let mayLoad = 1 in { def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG; def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG; } @@ -3211,22 +3188,22 @@ multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, VEX; def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, VEX; def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], itins.rr>, VEX, VEX_L; def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], itins.rm>, VEX, VEX_L; @@ -3985,14 +3962,14 @@ multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, let Predicates = [HasAVX] in { def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), @@ -4002,14 +3979,14 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), @@ -5190,7 +5167,7 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -multiclass ssse3_palign<string asm, bit Is2Addr = 1> { +multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { let neverHasSideEffects = 1 in { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), @@ -5210,7 +5187,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { } } -multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> { +multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> { let neverHasSideEffects = 1 in { def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), @@ -5227,42 +5204,42 @@ multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> { } let Predicates = [HasAVX] in - defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; + defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V; let Predicates = [HasAVX2] in - defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L; + defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L; let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in - defm PALIGN : ssse3_palign<"palignr">; + defm PALIGN : ssse3_palignr<"palignr">; let Predicates = [HasAVX2] in { -def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; } let Predicates = [HasAVX] in { -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } let Predicates = [UseSSSE3] in { -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } @@ -5590,6 +5567,30 @@ defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; let Predicates = [HasAVX2] in { + def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; + def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>; + + def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>; + + def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; + + def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))), + (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))), + (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))), + (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))), (VPMOVSXWDYrm addr:$src)>; def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))), @@ -5628,6 +5629,15 @@ let Predicates = [HasAVX] in { } let Predicates = [UseSSE41] in { + def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; + // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl @@ -5727,6 +5737,15 @@ let Predicates = [HasAVX] in { def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), (VPMOVZXDQrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), (VPMOVSXWDrm addr:$src)>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index ea716bf..3caa1b5 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -352,11 +352,11 @@ def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), // Descriptor-table support instructions def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), - "sgdtw\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; + "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), "sgdt\t$dst", [], IIC_SGDT>, TB; def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), - "sidtw\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>; + "sidt{w}\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>; def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), "sidt\t$dst", []>, TB; def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), @@ -374,11 +374,11 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), "sldt{q}\t$dst", [], IIC_SLDT>, TB; def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), - "lgdtw\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>; + "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>; def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), "lgdt\t$src", [], IIC_LGDT>, TB; def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), - "lidtw\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>; + "lidt{w}\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>; def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), "lidt\t$src", [], IIC_LIDT>, TB; def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index cca391f..44d8cce 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -79,7 +79,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; # define CFI(x) #endif -// Provide a wrapper for X86CompilationCallback2 that saves non-traditional +// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional // callee saved registers, for the fastcc calling convention. extern "C" { #if defined(X86_64_JIT) @@ -131,12 +131,12 @@ extern "C" { "subq $32, %rsp\n" "movq %rbp, %rcx\n" // Pass prev frame and return address "movq 8(%rbp), %rdx\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" "addq $32, %rsp\n" #else "movq %rbp, %rdi\n" // Pass prev frame and return address "movq 8(%rbp), %rsi\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" #endif // Restore all XMM arg registers "movaps 112(%rsp), %xmm7\n" @@ -213,7 +213,7 @@ extern "C" { "movl 4(%ebp), %eax\n" // Pass prev frame and return address "movl %eax, 4(%esp)\n" "movl %ebp, (%esp)\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" "movl %ebp, %esp\n" // Restore ESP CFI(".cfi_def_cfa_register %esp\n") "subl $12, %esp\n" @@ -269,7 +269,7 @@ extern "C" { "movl 4(%ebp), %eax\n" // Pass prev frame and return address "movl %eax, 4(%esp)\n" "movl %ebp, (%esp)\n" - "call " ASMPREFIX "X86CompilationCallback2\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" "addl $16, %esp\n" "movaps 48(%esp), %xmm3\n" CFI(".cfi_restore %xmm3\n") @@ -300,10 +300,7 @@ extern "C" { SIZE(X86CompilationCallback_SSE) ); # else - // the following function is called only from this translation unit, - // unless we are under 64bit Windows with MSC, where there is - // no support for inline assembly - static void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); + void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); _declspec(naked) void X86CompilationCallback(void) { __asm { @@ -317,7 +314,7 @@ extern "C" { mov eax, dword ptr [ebp+4] mov dword ptr [esp+4], eax mov dword ptr [esp], ebp - call X86CompilationCallback2 + call LLVMX86CompilationCallback2 mov esp, ebp sub esp, 12 pop ecx @@ -337,20 +334,17 @@ extern "C" { #endif } -/// X86CompilationCallback2 - This is the target-specific function invoked by the +/// This is the target-specific function invoked by the /// function stub when we did not know the real target of a call. This function /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. extern "C" { -#if !(defined (X86_64_JIT) && defined(_MSC_VER)) - // the following function is called only from this translation unit, - // unless we are under 64bit Windows with MSC, where there is - // no support for inline assembly -static -#endif -void LLVM_ATTRIBUTE_USED -X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { +LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr, + intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; + // We are reading raw stack data here. Tell MemorySanitizer that it is + // sufficiently initialized. + __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc)); assert(*RetAddrLoc == RetAddr && "Could not find return address on the stack!"); @@ -517,7 +511,7 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, // This used to use 0xCD, but that value is used by JITMemoryManager to // initialize the buffer with garbage, which means it may follow a - // noreturn function call, confusing X86CompilationCallback2. PR 4929. + // noreturn function call, confusing LLVMX86CompilationCallback2. PR 4929. JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub! return Result; } diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 5a1e1b8..3af1b3e 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -239,7 +239,8 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { if (!MI->getOperand(OpNo+i).isReg()) continue; unsigned Reg = MI->getOperand(OpNo+i).getReg(); - if (Reg == 0) continue; + // LEAs can use RIP-relative addressing, and RIP has no sub/super register. + if (Reg == 0 || Reg == X86::RIP) continue; MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64)); } diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp index c22872f..83e75ea 100644 --- a/lib/Target/X86/X86PadShortFunction.cpp +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -33,6 +33,19 @@ using namespace llvm; STATISTIC(NumBBsPadded, "Number of basic blocks padded"); namespace { + struct VisitedBBInfo { + // HasReturn - Whether the BB contains a return instruction + bool HasReturn; + + // Cycles - Number of cycles until return if HasReturn is true, otherwise + // number of cycles until end of the BB + unsigned int Cycles; + + VisitedBBInfo() : HasReturn(false), Cycles(0) {} + VisitedBBInfo(bool HasReturn, unsigned int Cycles) + : HasReturn(HasReturn), Cycles(Cycles) {} + }; + struct PadShortFunc : public MachineFunctionPass { static char ID; PadShortFunc() : MachineFunctionPass(ID) @@ -49,16 +62,21 @@ namespace { unsigned int Cycles = 0); bool cyclesUntilReturn(MachineBasicBlock *MBB, - unsigned int &Cycles, - MachineBasicBlock::iterator *Location = 0); + unsigned int &Cycles); void addPadding(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI, unsigned int NOOPsToAdd); const unsigned int Threshold; + + // ReturnBBs - Maps basic blocks that return to the minimum number of + // cycles until the return, starting from the entry block. DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs; + // VisitedBBs - Cache of previously visited BBs. + DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs; + const TargetMachine *TM; const TargetInstrInfo *TII; }; @@ -73,25 +91,26 @@ FunctionPass *llvm::createX86PadShortFunctions() { /// runOnMachineFunction - Loop over all of the basic blocks, inserting /// NOOP instructions before early exits. bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { - bool OptForSize = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); - - if (OptForSize) + const AttributeSet &FnAttrs = MF.getFunction()->getAttributes(); + if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize)) { return false; + } TM = &MF.getTarget(); TII = TM->getInstrInfo(); // Search through basic blocks and mark the ones that have early returns ReturnBBs.clear(); + VisitedBBs.clear(); findReturns(MF.begin()); bool MadeChange = false; - MachineBasicBlock::iterator ReturnLoc; MachineBasicBlock *MBB; unsigned int Cycles = 0; - unsigned int BBCycles; // Pad the identified basic blocks with NOOPs for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin(); @@ -100,8 +119,16 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { Cycles = I->second; if (Cycles < Threshold) { - if (!cyclesUntilReturn(MBB, BBCycles, &ReturnLoc)) - continue; + // BB ends in a return. Skip over any DBG_VALUE instructions + // trailing the terminator. + assert(MBB->size() > 0 && + "Basic block should contain at least a RET but is empty"); + MachineBasicBlock::iterator ReturnLoc = --MBB->end(); + + while (ReturnLoc->isDebugValue()) + --ReturnLoc; + assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() && + "Basic block does not end with RET"); addPadding(MBB, ReturnLoc, Threshold - Cycles); NumBBsPadded++; @@ -127,18 +154,30 @@ void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) { // Follow branches in BB and look for returns for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(); - I != MBB->succ_end(); ++I) { + I != MBB->succ_end(); ++I) { + if (*I == MBB) + continue; findReturns(*I, Cycles); } } -/// cyclesUntilReturn - if the MBB has a return instruction, set Location -/// to the instruction and return true. Return false otherwise. +/// cyclesUntilReturn - return true if the MBB has a return instruction, +/// and return false otherwise. /// Cycles will be incremented by the number of cycles taken to reach the /// return or the end of the BB, whichever occurs first. bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB, - unsigned int &Cycles, - MachineBasicBlock::iterator *Location) { + unsigned int &Cycles) { + // Return cached result if BB was previously visited + DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it + = VisitedBBs.find(MBB); + if (it != VisitedBBs.end()) { + VisitedBBInfo BBInfo = it->second; + Cycles += BBInfo.Cycles; + return BBInfo.HasReturn; + } + + unsigned int CyclesToEnd = 0; + for (MachineBasicBlock::iterator MBBI = MBB->begin(); MBBI != MBB->end(); ++MBBI) { MachineInstr *MI = MBBI; @@ -146,14 +185,16 @@ bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB, // functions do not count because the called function will be padded, // if necessary. if (MI->isReturn() && !MI->isCall()) { - if (Location) - *Location = MBBI; + VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd); + Cycles += CyclesToEnd; return true; } - Cycles += TII->getInstrLatency(TM->getInstrItineraryData(), MI); + CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI); } + VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd); + Cycles += CyclesToEnd; return false; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 58064b8..16886e4 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -50,7 +50,7 @@ ForceStackAlign("force-align-stack", " needed for the function."), cl::init(false), cl::Hidden); -cl::opt<bool> +static cl::opt<bool> EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); @@ -177,20 +177,21 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ const TargetRegisterClass * X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); switch (Kind) { default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); case 0: // Normal GPRs. - if (TM.getSubtarget<X86Subtarget>().is64Bit()) + if (Subtarget.isTarget64BitLP64()) return &X86::GR64RegClass; return &X86::GR32RegClass; case 1: // Normal GPRs except the stack pointer (for encoding reasons). - if (TM.getSubtarget<X86Subtarget>().is64Bit()) + if (Subtarget.isTarget64BitLP64()) return &X86::GR64_NOSPRegClass; return &X86::GR32_NOSPRegClass; case 2: // Available for tailcall (not callee-saved GPRs). - if (TM.getSubtarget<X86Subtarget>().isTargetWin64()) + if (Subtarget.isTargetWin64()) return &X86::GR64_TCW64RegClass; - if (TM.getSubtarget<X86Subtarget>().is64Bit()) + else if (Subtarget.is64Bit()) return &X86::GR64_TCRegClass; const Function *F = MF.getFunction(); @@ -234,38 +235,40 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const uint16_t * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - bool callsEHReturn = false; - bool ghcCall = false; - bool oclBiCall = false; - bool hipeCall = false; - bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); - - if (MF) { - callsEHReturn = MF->getMMI().callsEHReturn(); - const Function *F = MF->getFunction(); - ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); - oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false); - hipeCall = (F ? F->getCallingConv() == CallingConv::HiPE : false); - } - - if (ghcCall || hipeCall) + switch (MF->getFunction()->getCallingConv()) { + case CallingConv::GHC: + case CallingConv::HiPE: return CSR_NoRegs_SaveList; - if (oclBiCall) { + + case CallingConv::Intel_OCL_BI: { + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); if (HasAVX && IsWin64) - return CSR_Win64_Intel_OCL_BI_AVX_SaveList; + return CSR_Win64_Intel_OCL_BI_AVX_SaveList; if (HasAVX && Is64Bit) - return CSR_64_Intel_OCL_BI_AVX_SaveList; + return CSR_64_Intel_OCL_BI_AVX_SaveList; if (!HasAVX && !IsWin64 && Is64Bit) - return CSR_64_Intel_OCL_BI_SaveList; + return CSR_64_Intel_OCL_BI_SaveList; + break; } + + case CallingConv::Cold: + if (Is64Bit) + return CSR_MostRegs_64_SaveList; + break; + + default: + break; + } + + bool CallsEHReturn = MF->getMMI().callsEHReturn(); if (Is64Bit) { if (IsWin64) return CSR_Win64_SaveList; - if (callsEHReturn) + if (CallsEHReturn) return CSR_64EHRet_SaveList; return CSR_64_SaveList; } - if (callsEHReturn) + if (CallsEHReturn) return CSR_32EHRet_SaveList; return CSR_32_SaveList; } @@ -286,6 +289,8 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return CSR_NoRegs_RegMask; if (!Is64Bit) return CSR_32_RegMask; + if (CC == CallingConv::Cold) + return CSR_MostRegs_64_RegMask; if (IsWin64) return CSR_Win64_RegMask; return CSR_64_RegMask; @@ -389,7 +394,13 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { // When we need stack realignment and there are dynamic allocas, we can't // reference off of the stack pointer, so we reserve a base pointer. - if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + // + // This is also true if the function contain MS-style inline assembly. We + // do this because if any stack changes occur in the inline assembly, e.g., + // "pusha", then any C local variable or C argument references in the + // inline assembly will be wrong because the SP is not properly tracked. + if ((needsStackRealignment(MF) && MFI->hasVarSizedObjects()) || + MF.hasMSInlineAsm()) return true; return false; @@ -440,123 +451,16 @@ bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, return false; } -static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { - if (isInt<8>(Imm)) - return X86::SUB64ri8; - return X86::SUB64ri32; - } else { - if (isInt<8>(Imm)) - return X86::SUB32ri8; - return X86::SUB32ri; - } -} - -static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { - if (is64Bit) { - if (isInt<8>(Imm)) - return X86::ADD64ri8; - return X86::ADD64ri32; - } else { - if (isInt<8>(Imm)) - return X86::ADD32ri8; - return X86::ADD32ri; - } -} - -void X86RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - bool reseveCallFrame = TFI->hasReservedCallFrame(MF); - int Opcode = I->getOpcode(); - bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); - DebugLoc DL = I->getDebugLoc(); - uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; - uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; - I = MBB.erase(I); - - if (!reseveCallFrame) { - // If the stack pointer can be changed after prologue, turn the - // adjcallstackup instruction into a 'sub ESP, <amt>' and the - // adjcallstackdown instruction into 'add ESP, <amt>' - // TODO: consider using push / pop instead of sub + store / add - if (Amount == 0) - return; - - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); - Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; - - MachineInstr *New = 0; - if (Opcode == TII.getCallFrameSetupOpcode()) { - New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)), - StackPtr) - .addReg(StackPtr) - .addImm(Amount); - } else { - assert(Opcode == TII.getCallFrameDestroyOpcode()); - - // Factor out the amount the callee already popped. - Amount -= CalleeAmt; - - if (Amount) { - unsigned Opc = getADDriOpcode(Is64Bit, Amount); - New = BuildMI(MF, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(Amount); - } - } - - if (New) { - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); - - // Replace the pseudo instruction with a new instruction. - MBB.insert(I, New); - } - - return; - } - - if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { - // If we are performing frame pointer elimination and if the callee pops - // something off the stack pointer, add it back. We do this until we have - // more advanced stack pointer tracking ability. - unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); - MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(CalleeAmt); - - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); - - // We are not tracking the stack pointer adjustment by the callee, so make - // sure we restore the stack pointer immediately after the call, there may - // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. - MachineBasicBlock::iterator B = MBB.begin(); - while (I != B && !llvm::prior(I)->isCall()) - --I; - MBB.insert(I, New); - } -} - void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); - unsigned i = 0; MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned BasePtr; unsigned Opc = MI.getOpcode(); @@ -572,7 +476,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // This must be part of a four operand memory reference. Replace the // FrameIndex with base register with EBP. Add an offset to the offset. - MI.getOperand(i).ChangeToRegister(BasePtr, false); + MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); // Now add the frame object offset to the offset from EBP. int FIOffset; @@ -583,17 +487,18 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex); - if (MI.getOperand(i+3).isImm()) { + if (MI.getOperand(FIOperandNum+3).isImm()) { // Offset is a 32-bit integer. - int Imm = (int)(MI.getOperand(i + 3).getImm()); + int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); int Offset = FIOffset + Imm; assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && "Requesting 64-bit offset in 32-bit immediate!"); - MI.getOperand(i + 3).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. - uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset(); - MI.getOperand(i+3).setOffset(Offset); + uint64_t Offset = FIOffset + + (uint64_t)MI.getOperand(FIOperandNum+3).getOffset(); + MI.getOperand(FIOperandNum + 3).setOffset(Offset); } } @@ -618,7 +523,15 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, case MVT::i8: if (High) { switch (Reg) { - default: return getX86SubSuperRegister(Reg, MVT::i64, High); + default: return getX86SubSuperRegister(Reg, MVT::i64); + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SP; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AH; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -738,22 +651,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, return X86::R15D; } case MVT::i64: - // For 64-bit mode if we've requested a "high" register and the - // Q or r constraints we want one of these high registers or - // just the register name otherwise. - if (High) { - switch (Reg) { - case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: - return X86::SI; - case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: - return X86::DI; - case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: - return X86::BP; - case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: - return X86::SP; - // Fallthrough. - } - } switch (Reg) { default: llvm_unreachable("Unexpected register"); case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 7932ede..b9d7b8c 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -117,12 +117,9 @@ public: bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; - void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index c14407f..d99d085 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -470,12 +470,17 @@ def IIC_NOP : InstrItinClass; // latencies. Since these latencies are not used for pipeline hazards, // they do not need to be exact. // +// ILPWindow=10 is an arbitrary threshold that approximates cycles of +// latency hidden by instruction buffers. The actual value is not very +// important but should be zero for inorder and nonzero for OOO processors. +// // The GenericModel contains no instruciton itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; let LoadLatency = 4; let HighLatency = 10; + let ILPWindow = 10; } include "X86ScheduleAtom.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 8710261..1e5f2d6 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -525,6 +525,7 @@ def AtomModel : SchedMachineModel { // OperandCycles may be used for expected latency. let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. let HighLatency = 30;// Expected, may be overriden by OperandCycles. + let ILPWindow = 0; // Always try to hide expected latency. let Itineraries = AtomItineraries; } diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 757e8c7..f934fdd 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -202,6 +202,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); + // ESI might be used as a base pointer, in that case we can't simply overwrite + // the register. Fall back to generic code. + const X86RegisterInfo *TRI = + static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo()); + if (TRI->hasBasePointer(DAG.getMachineFunction()) && + TRI->getBaseRegister() == X86::ESI) + return SDValue(); + MVT AVT; if (Align & 1) AVT = MVT::i8; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 53c28f4..0f2c008 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -14,6 +14,8 @@ #define DEBUG_TYPE "subtarget" #include "X86Subtarget.h" #include "X86InstrInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -155,6 +157,12 @@ const char *X86Subtarget::getBZeroEntry() const { return 0; } +bool X86Subtarget::hasSinCos() const { + return getTargetTriple().isMacOSX() && + !getTargetTriple().isMacOSXVersionLT(10, 9) && + is64Bit(); +} + /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// to immediate address. bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { @@ -318,45 +326,23 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } } -X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, - unsigned StackAlignOverride, bool is64Bit) - : X86GenSubtargetInfo(TT, CPU, FS) - , X86ProcFamily(Others) - , PICStyle(PICStyles::None) - , X86SSELevel(NoMMXSSE) - , X863DNowLevel(NoThreeDNow) - , HasCMov(false) - , HasX86_64(false) - , HasPOPCNT(false) - , HasSSE4A(false) - , HasAES(false) - , HasPCLMUL(false) - , HasFMA(false) - , HasFMA4(false) - , HasXOP(false) - , HasMOVBE(false) - , HasRDRAND(false) - , HasF16C(false) - , HasFSGSBase(false) - , HasLZCNT(false) - , HasBMI(false) - , HasBMI2(false) - , HasRTM(false) - , IsBTMemSlow(false) - , IsUAMemFast(false) - , HasVectorUAMem(false) - , HasCmpxchg16b(false) - , UseLeaForSP(false) - , HasSlowDivide(false) - , PostRAScheduler(false) - , PadShortFunctions(false) - , stackAlignment(4) - // FIXME: this is a known good value for Yonah. How about others? - , MaxInlineSizeThreshold(128) - , TargetTriple(TT) - , In64BitMode(is64Bit) { - // Determine default and user specified characteristics +void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { + AttributeSet FnAttrs = MF->getFunction()->getAttributes(); + Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-cpu"); + Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-features"); + std::string CPU = + !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; + std::string FS = + !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; + if (!FS.empty()) { + initializeEnvironment(); + resetSubtargetFeatures(CPU, FS); + } +} + +void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { std::string CPUName = CPU; if (!FS.empty() || !CPU.empty()) { if (CPUName.empty()) { @@ -433,6 +419,53 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, stackAlignment = 16; } +void X86Subtarget::initializeEnvironment() { + X86SSELevel = NoMMXSSE; + X863DNowLevel = NoThreeDNow; + HasCMov = false; + HasX86_64 = false; + HasPOPCNT = false; + HasSSE4A = false; + HasAES = false; + HasPCLMUL = false; + HasFMA = false; + HasFMA4 = false; + HasXOP = false; + HasMOVBE = false; + HasRDRAND = false; + HasF16C = false; + HasFSGSBase = false; + HasLZCNT = false; + HasBMI = false; + HasBMI2 = false; + HasRTM = false; + HasADX = false; + IsBTMemSlow = false; + IsUAMemFast = false; + HasVectorUAMem = false; + HasCmpxchg16b = false; + UseLeaForSP = false; + HasSlowDivide = false; + PostRAScheduler = false; + PadShortFunctions = false; + stackAlignment = 4; + // FIXME: this is a known good value for Yonah. How about others? + MaxInlineSizeThreshold = 128; +} + +X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, + unsigned StackAlignOverride, bool is64Bit) + : X86GenSubtargetInfo(TT, CPU, FS) + , X86ProcFamily(Others) + , PICStyle(PICStyles::None) + , TargetTriple(TT) + , StackAlignOverride(StackAlignOverride) + , In64BitMode(is64Bit) { + initializeEnvironment(); + resetSubtargetFeatures(CPU, FS); +} + bool X86Subtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 080f4cf..e97da4b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -121,6 +121,9 @@ protected: /// HasRTM - Processor has RTM instructions. bool HasRTM; + /// HasADX - Processor has ADX instructions. + bool HasADX; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -165,11 +168,13 @@ protected: InstrItineraryData InstrItins; private: + /// StackAlignOverride - Override the stack alignment. + unsigned StackAlignOverride; + /// In64BitMode - True if compiling for 64-bit, false for 32-bit. bool In64BitMode; public: - /// This constructor initializes the data members to match that /// of the specified triple. /// @@ -194,7 +199,26 @@ public: /// instruction. void AutoDetectSubtargetFeatures(); - bool is64Bit() const { return In64BitMode; } + /// \brief Reset the features for the X86 target. + virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); + void resetSubtargetFeatures(StringRef CPU, StringRef FS); +public: + /// Is this x86_64? (disregarding specific ABI / programming model) + bool is64Bit() const { + return In64BitMode; + } + + /// Is this x86_64 with the ILP32 programming model (x32 ABI)? + bool isTarget64BitILP32() const { + return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32); + } + + /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? + bool isTarget64BitLP64() const { + return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32); + } PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } @@ -229,6 +253,7 @@ public: bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } bool hasRTM() const { return HasRTM; } + bool hasADX() const { return HasADX; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } @@ -315,6 +340,10 @@ public: /// memset with zero passed as the second argument. Otherwise it /// returns null. const char *getBZeroEntry() const; + + /// This function returns true if the target has sincos() routine in its + /// compiler runtime or math libraries. + bool hasSinCos() const; /// enablePostRAScheduler - run for Atom optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 706e64a..8aa58a2 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -59,8 +59,12 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true), - DL("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" - "n8:16:32:64-S128"), + // The x32 ABI dictates the ILP32 programming model for x64. + DL(getSubtargetImpl()->isTarget64BitILP32() ? + "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" + "n8:16:32:64-S128" : + "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" + "n8:16:32:64-S128"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), @@ -151,6 +155,7 @@ public: } virtual bool addInstSelector(); + virtual bool addILPOpts(); virtual bool addPreRegAlloc(); virtual bool addPostRegAlloc(); virtual bool addPreEmitPass(); @@ -158,12 +163,7 @@ public: } // namespace TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { - X86PassConfig *PC = new X86PassConfig(this, PM); - - if (X86EarlyIfConv && Subtarget.hasCMov()) - PC->enablePass(&EarlyIfConverterID); - - return PC; + return new X86PassConfig(this, PM); } bool X86PassConfig::addInstSelector() { @@ -181,6 +181,14 @@ bool X86PassConfig::addInstSelector() { return false; } +bool X86PassConfig::addILPOpts() { + if (X86EarlyIfConv && getX86Subtarget().hasCMov()) { + addPass(&EarlyIfConverterID); + return true; + } + return false; +} + bool X86PassConfig::addPreRegAlloc() { return false; // -print-machineinstr shouldn't print after this. } diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index b8ee319..871dacd 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -8,16 +8,12 @@ //===----------------------------------------------------------------------===// #include "X86TargetObjectFile.h" -#include "X86TargetMachine.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/Mangler.h" + using namespace llvm; using namespace dwarf; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 9cc1b18..fefb479 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" using namespace llvm; // Declare the pass initialization routine locally as target-specific passes @@ -75,7 +76,6 @@ public: /// \name Scalar TTI Implementations /// @{ - virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; /// @} @@ -84,6 +84,8 @@ public: /// @{ virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getRegisterBitWidth(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const; @@ -118,45 +120,6 @@ llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) { // //===----------------------------------------------------------------------===// -namespace { -struct X86CostTblEntry { - int ISD; - MVT Type; - unsigned Cost; -}; -} - -static int -FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) - return i; - - // Could not find an entry. - return -1; -} - -namespace { -struct X86TypeConversionCostTblEntry { - int ISD; - MVT Dst; - MVT Src; - unsigned Cost; -}; -} - -static int -FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, - int ISD, MVT Dst, MVT Src) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) - return i; - - // Could not find an entry. - return -1; -} - - X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); // TODO: Currently the __builtin_popcount() implementation using SSE3 @@ -166,11 +129,39 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { } unsigned X86TTI::getNumberOfRegisters(bool Vector) const { + if (Vector && !ST->hasSSE1()) + return 0; + if (ST->is64Bit()) return 16; return 8; } +unsigned X86TTI::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasAVX()) return 256; + if (ST->hasSSE1()) return 128; + return 0; + } + + if (ST->is64Bit()) + return 64; + return 32; + +} + +unsigned X86TTI::getMaximumUnrollFactor() const { + if (ST->isAtom()) + return 1; + + // Sandybridge and Haswell have multiple execution ports and pipelined + // vector units. + if (ST->hasAVX()) + return 4; + + return 2; +} + unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { // Legalize the type. std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); @@ -178,7 +169,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const X86CostTblEntry AVX1CostTable[] = { + static const CostTblEntry<MVT> AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized // operations and we only need to extract the upper YMM half. // Two ops + 1 extract + 1 insert = 4. @@ -192,7 +183,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { // Look for AVX1 lowering tricks. if (ST->hasAVX()) { - int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, LT.second); if (Idx != -1) return LT.first * AVX1CostTable[Idx].Cost; @@ -226,7 +217,7 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); - static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + static const TypeConversionCostTblEntry<MVT> AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, @@ -241,11 +232,14 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; if (ST->hasAVX()) { - int Idx = FindInConvertTable(AVXConversionTbl, + int Idx = ConvertCostTableLookup<MVT>(AVXConversionTbl, array_lengthof(AVXConversionTbl), ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) @@ -265,7 +259,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const X86CostTblEntry SSE42CostTbl[] = { + static const CostTblEntry<MVT> SSE42CostTbl[] = { { ISD::SETCC, MVT::v2f64, 1 }, { ISD::SETCC, MVT::v4f32, 1 }, { ISD::SETCC, MVT::v2i64, 1 }, @@ -274,7 +268,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v16i8, 1 }, }; - static const X86CostTblEntry AVX1CostTbl[] = { + static const CostTblEntry<MVT> AVX1CostTbl[] = { { ISD::SETCC, MVT::v4f64, 1 }, { ISD::SETCC, MVT::v8f32, 1 }, // AVX1 does not support 8-wide integer compare. @@ -284,7 +278,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v32i8, 4 }, }; - static const X86CostTblEntry AVX2CostTbl[] = { + static const CostTblEntry<MVT> AVX2CostTbl[] = { { ISD::SETCC, MVT::v4i64, 1 }, { ISD::SETCC, MVT::v8i32, 1 }, { ISD::SETCC, MVT::v16i16, 1 }, @@ -292,19 +286,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, }; if (ST->hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + int Idx = CostTableLookup<MVT>(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); if (Idx != -1) return LT.first * AVX2CostTbl[Idx].Cost; } if (ST->hasAVX()) { - int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + int Idx = CostTableLookup<MVT>(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); if (Idx != -1) return LT.first * AVX1CostTbl[Idx].Cost; } if (ST->hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + int Idx = CostTableLookup<MVT>(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); if (Idx != -1) return LT.first * SSE42CostTbl[Idx].Cost; } diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index c4a5887..0f77948 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -120,9 +120,19 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { return false; } +static bool clobbersAllYmmRegs(const MachineOperand &MO) { + for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) { + if (!MO.clobbersPhysReg(reg)) + return false; + } + return true; +} + static bool hasYmmReg(MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); + if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO)) + return true; if (!MO.isReg()) continue; if (MO.isDebug()) diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 094f18c..7e7d396 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -92,11 +92,19 @@ static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); + static DecodeStatus Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus Decode2RImmInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -132,6 +140,66 @@ static DecodeStatus DecodeLR2RInstruction(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus Decode3RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus Decode3RImmInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus Decode2RUSInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL3RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL6RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL5RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + #include "XCoreGenDisassemblerTables.inc" static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, @@ -157,13 +225,24 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } +static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(Val)); + Inst.addOperand(MCOperand::CreateImm(0)); + return MCDisassembler::Success; +} + static DecodeStatus Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) { - unsigned Combined = fieldFromInstruction(Insn, 6, 5) + - fieldFromInstruction(Insn, 5, 1) * 5 - 27; - if (Combined >= 9) + unsigned Combined = fieldFromInstruction(Insn, 6, 5); + if (Combined < 27) return MCDisassembler::Fail; - + if (fieldFromInstruction(Insn, 5, 1)) { + if (Combined == 31) + return MCDisassembler::Fail; + Combined += 5; + } + Combined -= 27; unsigned Op1High = Combined % 3; unsigned Op2High = Combined / 3; Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 2, 2); @@ -172,14 +251,114 @@ Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) { } static DecodeStatus +Decode3OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2, + unsigned &Op3) { + unsigned Combined = fieldFromInstruction(Insn, 6, 5); + if (Combined >= 27) + return MCDisassembler::Fail; + + unsigned Op1High = Combined % 3; + unsigned Op2High = (Combined / 3) % 3; + unsigned Op3High = Combined / 9; + Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 4, 2); + Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 2, 2); + Op3 = (Op3High << 2) | fieldFromInstruction(Insn, 0, 2); + return MCDisassembler::Success; +} + +static DecodeStatus +Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + // Try and decode as a 3R instruction. + unsigned Opcode = fieldFromInstruction(Insn, 11, 5); + switch (Opcode) { + case 0x0: + Inst.setOpcode(XCore::STW_2rus); + return Decode2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x1: + Inst.setOpcode(XCore::LDW_2rus); + return Decode2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x2: + Inst.setOpcode(XCore::ADD_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x3: + Inst.setOpcode(XCore::SUB_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x4: + Inst.setOpcode(XCore::SHL_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x5: + Inst.setOpcode(XCore::SHR_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x6: + Inst.setOpcode(XCore::EQ_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x7: + Inst.setOpcode(XCore::AND_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x8: + Inst.setOpcode(XCore::OR_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x9: + Inst.setOpcode(XCore::LDW_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x10: + Inst.setOpcode(XCore::LD16S_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x11: + Inst.setOpcode(XCore::LD8U_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x12: + Inst.setOpcode(XCore::ADD_2rus); + return Decode2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x13: + Inst.setOpcode(XCore::SUB_2rus); + return Decode2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x14: + Inst.setOpcode(XCore::SHL_2rus); + return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder); + case 0x15: + Inst.setOpcode(XCore::SHR_2rus); + return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder); + case 0x16: + Inst.setOpcode(XCore::EQ_2rus); + return Decode2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x17: + Inst.setOpcode(XCore::TSETR_3r); + return Decode3RImmInstruction(Inst, Insn, Address, Decoder); + case 0x18: + Inst.setOpcode(XCore::LSS_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + case 0x19: + Inst.setOpcode(XCore::LSU_3r); + return Decode3RInstruction(Inst, Insn, Address, Decoder); + } + return MCDisassembler::Fail; +} + +static DecodeStatus Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + return S; +} + +static DecodeStatus +Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2; + DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + Inst.addOperand(MCOperand::CreateImm(Op1)); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); return S; } @@ -188,10 +367,11 @@ DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op2, Op1); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); return S; } @@ -200,11 +380,12 @@ Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); return S; } @@ -213,10 +394,11 @@ DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - Inst.addOperand(MCOperand::CreateImm(Op2)); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(Op2)); return S; } @@ -225,10 +407,11 @@ DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeBitpOperand(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeBitpOperand(Inst, Op2, Address, Decoder); return S; } @@ -237,24 +420,97 @@ DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeBitpOperand(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return Decode2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeBitpOperand(Inst, Op2, Address, Decoder); return S; } static DecodeStatus +DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + // Try and decode as a L3R / L2RUS instruction. + unsigned Opcode = fieldFromInstruction(Insn, 16, 4) | + fieldFromInstruction(Insn, 27, 5) << 4; + switch (Opcode) { + case 0x0c: + Inst.setOpcode(XCore::STW_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x1c: + Inst.setOpcode(XCore::XOR_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x2c: + Inst.setOpcode(XCore::ASHR_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x3c: + Inst.setOpcode(XCore::LDAWF_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x4c: + Inst.setOpcode(XCore::LDAWB_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x5c: + Inst.setOpcode(XCore::LDA16F_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x6c: + Inst.setOpcode(XCore::LDA16B_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x7c: + Inst.setOpcode(XCore::MUL_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x8c: + Inst.setOpcode(XCore::DIVS_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x9c: + Inst.setOpcode(XCore::DIVU_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x10c: + Inst.setOpcode(XCore::ST16_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x11c: + Inst.setOpcode(XCore::ST8_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x12c: + Inst.setOpcode(XCore::ASHR_l2rus); + return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder); + case 0x12d: + Inst.setOpcode(XCore::OUTPW_l2rus); + return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder); + case 0x12e: + Inst.setOpcode(XCore::INPW_l2rus); + return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder); + case 0x13c: + Inst.setOpcode(XCore::LDAWF_l2rus); + return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x14c: + Inst.setOpcode(XCore::LDAWB_l2rus); + return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder); + case 0x15c: + Inst.setOpcode(XCore::CRC_l3r); + return DecodeL3RSrcDstInstruction(Inst, Insn, Address, Decoder); + case 0x18c: + Inst.setOpcode(XCore::REMS_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + case 0x19c: + Inst.setOpcode(XCore::REMU_l3r); + return DecodeL3RInstruction(Inst, Insn, Address, Decoder); + } + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2); - if (S == MCDisassembler::Success) { - DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); - DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); - } + if (S != MCDisassembler::Success) + return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); return S; } @@ -264,9 +520,212 @@ DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, unsigned Op1, Op2; DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2); + if (S != MCDisassembler::Success) + return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + return S; +} + +static DecodeStatus +Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + +static DecodeStatus +Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + Inst.addOperand(MCOperand::CreateImm(Op1)); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + +static DecodeStatus +Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); + if (S == MCDisassembler::Success) { DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(Op3)); + } + return S; +} + +static DecodeStatus +Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeBitpOperand(Inst, Op3, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(Op3)); + } + return S; +} + +static DecodeStatus +DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeBitpOperand(Inst, Op3, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3, Op4, Op5, Op6; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S != MCDisassembler::Success) + return S; + S = Decode3OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5, Op6); + if (S != MCDisassembler::Success) + return S; + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op6, Address, Decoder); + return S; +} + +static DecodeStatus +DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + // Try and decode as a L6R instruction. + Inst.clear(); + unsigned Opcode = fieldFromInstruction(Insn, 27, 5); + switch (Opcode) { + case 0x00: + Inst.setOpcode(XCore::LMUL_l6r); + return DecodeL6RInstruction(Inst, Insn, Address, Decoder); + } + return MCDisassembler::Fail; +} + +static DecodeStatus +DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3, Op4, Op5; + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S != MCDisassembler::Success) + return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder); + S = Decode2OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5); + if (S != MCDisassembler::Success) + return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder); + + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder); + return S; +} + +static DecodeStatus +DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + unsigned Op4 = fieldFromInstruction(Insn, 16, 4); + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + } + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2, Op3; + unsigned Op4 = fieldFromInstruction(Insn, 16, 4); + DecodeStatus S = + Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + } + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder); } return S; } diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index ea77d92..0d146ba 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -171,7 +171,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // The ABI requires that unsigned scalar types smaller than 32 bits // are padded to 32 bits. if (Size < 4) - OutStreamer.EmitZeros(4 - Size, 0); + OutStreamer.EmitZeros(4 - Size); // Mark the end of the global OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data"); diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index bb9c77a..019c457 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -332,6 +332,58 @@ bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +// This function eliminates ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void XCoreFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const XCoreInstrInfo &TII = + *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo()); + if (!hasReservedCallFrame(MF)) { + // Turn the adjcallstackdown instruction into 'extsp <amt>' and the + // adjcallstackup instruction into 'ldaw sp, sp[<amt>]' + MachineInstr *Old = I; + uint64_t Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + assert(Amount%4 == 0); + Amount /= 4; + + bool isU6 = isImmU6(Amount); + if (!isU6 && !isImmU16(Amount)) { + // FIX could emit multiple instructions in this case. +#ifndef NDEBUG + errs() << "eliminateCallFramePseudoInstr size too big: " + << Amount << "\n"; +#endif + llvm_unreachable(0); + } + + MachineInstr *New; + if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) { + int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; + New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode)) + .addImm(Amount); + } else { + assert(Old->getOpcode() == XCore::ADJCALLSTACKUP); + int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs; + New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP) + .addImm(Amount); + } + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + + MBB.erase(I); +} + void XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index db1bbb6..ebad62f 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -39,6 +39,10 @@ namespace llvm { const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool hasFP(const MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 472ce63..fbf86c5 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -211,15 +211,10 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32, Ops, 4); } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - switch (IntNo) { - case Intrinsic::xcore_crc8: - SDValue Ops[] = { N->getOperand(1), N->getOperand(2), N->getOperand(3) }; - return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32, - Ops, 3); - } - break; + case XCoreISD::CRC8: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32, + Ops, 3); } case ISD::BRIND: if (SDNode *ResNode = SelectBRIND(N)) diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 6e894ac..f8a9125 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -54,6 +54,7 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::LMUL : return "XCoreISD::LMUL"; case XCoreISD::MACCU : return "XCoreISD::MACCU"; case XCoreISD::MACCS : return "XCoreISD::MACCS"; + case XCoreISD::CRC8 : return "XCoreISD::CRC8"; case XCoreISD::BR_JT : return "XCoreISD::BR_JT"; case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32"; default : return NULL; @@ -152,9 +153,12 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); - maxStoresPerMemset = maxStoresPerMemsetOptSize = 4; - maxStoresPerMemmove = maxStoresPerMemmoveOptSize - = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2; + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 4; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize + = MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 2; // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::STORE); @@ -167,24 +171,25 @@ SDValue XCoreTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::BR_JT: return LowerBR_JT(Op, DAG); - case ISD::LOAD: return LowerLOAD(Op, DAG); - case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::VAARG: return LowerVAARG(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG); - case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::VAARG: return LowerVAARG(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG); + case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG); // FIXME: Remove these when LegalizeDAGTypes lands. case ISD::ADD: - case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); - case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); default: llvm_unreachable("unimplemented operand"); } @@ -736,13 +741,13 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD : XCoreISD::LSUB; SDValue Zero = DAG.getConstant(0, MVT::i32); - SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), - LHSL, RHSL, Zero); - SDValue Lo(Carry.getNode(), 1); + SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + LHSL, RHSL, Zero); + SDValue Carry(Lo.getNode(), 1); - SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), - LHSH, RHSH, Carry); - SDValue Hi(Ignored.getNode(), 1); + SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + LHSH, RHSH, Carry); + SDValue Ignored(Hi.getNode(), 1); // Merge the pieces return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); } @@ -858,6 +863,23 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5); } +SDValue XCoreTargetLowering:: +LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + switch (IntNo) { + case Intrinsic::xcore_crc8: + EVT VT = Op.getValueType(); + SDValue Data = + DAG.getNode(XCoreISD::CRC8, DL, DAG.getVTList(VT, VT), + Op.getOperand(1), Op.getOperand(2) , Op.getOperand(3)); + SDValue Crc(Data.getNode(), 1); + SDValue Results[] = { Crc, Data }; + return DAG.getMergeValues(Results, 2, DL); + } + return SDValue(); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1227,15 +1249,11 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // Analyze return values. CCInfo.AnalyzeReturn(Outs, RetCC_XCore); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); + + // Return on XCore is always a "retsp 0" + RetOps.push_back(DAG.getConstant(0, MVT::i32)); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -1248,15 +1266,17 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // guarantee that all emitted copies are // stuck together, avoiding something bad Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - // Return on XCore is always a "retsp 0" + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, - Chain, DAG.getConstant(0, MVT::i32), Flag); - else // Return Void - return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, - Chain, DAG.getConstant(0, MVT::i32)); + RetOps.push_back(Flag); + + return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, + &RetOps[0], RetOps.size()); } //===----------------------------------------------------------------------===// @@ -1353,13 +1373,13 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDValue Carry = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2, DAG.getConstant(1, VT)); - SDValue Ops [] = { Carry, Result }; + SDValue Ops[] = { Result, Carry }; return DAG.getMergeValues(Ops, 2, dl); } // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1367,7 +1387,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if ((KnownZero & Mask) == Mask) { SDValue Carry = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2); - SDValue Ops [] = { Carry, Result }; + SDValue Ops[] = { Result, Carry }; return DAG.getMergeValues(Ops, 2, dl); } } @@ -1391,14 +1411,14 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDValue Borrow = N2; SDValue Result = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), N2); - SDValue Ops [] = { Borrow, Result }; + SDValue Ops[] = { Result, Borrow }; return DAG.getMergeValues(Ops, 2, dl); } } // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1406,7 +1426,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if ((KnownZero & Mask) == Mask) { SDValue Borrow = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2); - SDValue Ops [] = { Borrow, Result }; + SDValue Ops[] = { Result, Borrow }; return DAG.getMergeValues(Ops, 2, dl); } } @@ -1432,11 +1452,15 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // If the high result is unused fold to add(a, b) if (N->hasNUsesOfValue(0, 0)) { SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3); - SDValue Ops [] = { Lo, Lo }; + SDValue Ops[] = { Lo, Lo }; return DAG.getMergeValues(Ops, 2, dl); } // Otherwise fold to ladd(a, b, 0) - return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1); + SDValue Result = + DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1); + SDValue Carry(Result.getNode(), 1); + SDValue Ops[] = { Carry, Result }; + return DAG.getMergeValues(Ops, 2, dl); } } break; @@ -1530,7 +1554,7 @@ void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, default: break; case XCoreISD::LADD: case XCoreISD::LSUB: - if (Op.getResNo() == 0) { + if (Op.getResNo() == 1) { // Top bits of carry / borrow are clear. KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), KnownZero.getBitWidth() - 1); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 2874f00..6d430ef 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -63,6 +63,9 @@ namespace llvm { // Corresponds to MACCS instruction MACCS, + // Corresponds to CRC8 instruction + CRC8, + // Jumptable branch. BR_JT, @@ -147,6 +150,7 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; // Inline asm support std::pair<unsigned, const TargetRegisterClass*> diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 44ac45c..379cc39 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -33,44 +33,122 @@ class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern> // Instruction formats //===----------------------------------------------------------------------===// -class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern> +class _F3R<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<2, outs, ins, asmstr, pattern> { + let Inst{15-11} = opc; + let DecoderMethod = "Decode3RInstruction"; } -class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern> +// 3R with first operand as an immediate. Used for TSETR where the first +// operand is treated as an immediate since it refers to a register number in +// another thread. +class _F3RImm<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern> + : _F3R<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "Decode3RImmInstruction"; +} + +class _FL3R<bits<9> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + let Inst{31-27} = opc{8-4}; + let Inst{26-20} = 0b1111110; + let Inst{19-16} = opc{3-0}; + + let Inst{15-11} = 0b11111; + let DecoderMethod = "DecodeL3RInstruction"; } -class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern> +// L3R with first operand as both a source and a destination. +class _FL3RSrcDst<bits<9> opc, dag outs, dag ins, string asmstr, + list<dag> pattern> : _FL3R<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "DecodeL3RSrcDstInstruction"; +} + +class _F2RUS<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<2, outs, ins, asmstr, pattern> { + let Inst{15-11} = opc; + let DecoderMethod = "Decode2RUSInstruction"; +} + +// 2RUS with bitp operand +class _F2RUSBitp<bits<5> opc, dag outs, dag ins, string asmstr, + list<dag> pattern> + : _F2RUS<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "Decode2RUSBitpInstruction"; } -class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern> +class _FL2RUS<bits<9> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + let Inst{31-27} = opc{8-4}; + let Inst{26-20} = 0b1111110; + let Inst{19-16} = opc{3-0}; + + let Inst{15-11} = 0b11111; + let DecoderMethod = "DecodeL2RUSInstruction"; +} + +// L2RUS with bitp operand +class _FL2RUSBitp<bits<9> opc, dag outs, dag ins, string asmstr, + list<dag> pattern> + : _FL2RUS<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "DecodeL2RUSBitpInstruction"; } -class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern> +class _FRU6<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<2, outs, ins, asmstr, pattern> { + bits<4> a; + bits<6> b; + + let Inst{15-10} = opc; + let Inst{9-6} = a; + let Inst{5-0} = b; } -class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern> +class _FLRU6<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + bits<4> a; + bits<16> b; + + let Inst{31-26} = opc; + let Inst{25-22} = a; + let Inst{21-16} = b{5-0}; + let Inst{15-10} = 0b111100; + let Inst{9-0} = b{15-6}; } -class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern> +class _FU6<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<2, outs, ins, asmstr, pattern> { + bits<6> a; + + let Inst{15-6} = opc; + let Inst{5-0} = a; } -class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern> +class _FLU6<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + bits<16> a; + + let Inst{31-22} = opc; + let Inst{21-16} = a{5-0}; + let Inst{15-10} = 0b111100; + let Inst{9-0} = a{15-6}; } -class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern> +class _FU10<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<2, outs, ins, asmstr, pattern> { + bits<10> a; + + let Inst{15-10} = opc; + let Inst{9-0} = a; } -class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern> +class _FLU10<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + bits<20> a; + + let Inst{31-26} = opc; + let Inst{25-16} = a{9-0}; + let Inst{15-10} = 0b111100; + let Inst{9-0} = a{19-10}; } class _F2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> @@ -80,6 +158,14 @@ class _F2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> let DecoderMethod = "Decode2RInstruction"; } +// 2R with first operand as an immediate. Used for TSETMR where the first +// operand is treated as an immediate since it refers to a register number in +// another thread. +class _F2RImm<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> + : _F2R<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "Decode2RImmInstruction"; +} + // 2R with first operand as both a source and a destination. class _F2RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : _F2R<opc, outs, ins, asmstr, pattern> { @@ -148,14 +234,44 @@ class _F0R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{4-0} = opc{4-0}; } -class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern> +class _FL4R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + bits<4> d; + + let Inst{31-27} = opc{5-1}; + let Inst{26-21} = 0b111111; + let Inst{20} = opc{0}; + let Inst{19-16} = d; + let Inst{15-11} = 0b11111; } -class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern> +// L4R with 4th operand as both a source and a destination. +class _FL4RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr, + list<dag> pattern> + : _FL4R<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "DecodeL4RSrcDstInstruction"; +} + +// L4R with 1st and 4th operand as both a source and a destination. +class _FL4RSrcDstSrcDst<bits<6> opc, dag outs, dag ins, string asmstr, + list<dag> pattern> + : _FL4R<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "DecodeL4RSrcDstSrcDstInstruction"; +} + +class _FL5R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + let Inst{31-27} = opc{5-1}; + let Inst{20} = opc{0}; + let Inst{15-11} = 0b11111; + + let DecoderMethod = "DecodeL5RInstruction"; } -class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern> +class _FL6R<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern> : InstXCore<4, outs, ins, asmstr, pattern> { + let Inst{31-27} = opc; + let Inst{15-11} = 0b11111; + + let DecoderMethod = "DecodeL6RInstruction"; } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 95b076f..e140ef2 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -32,8 +32,8 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind, - [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad]>; +def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind, + [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad, SDNPVariadic]>; def SDT_XCoreBR_JT : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; @@ -182,6 +182,7 @@ def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper], // Address operands def MEMii : Operand<i32> { let PrintMethod = "printMemOperand"; + let DecoderMethod = "DecodeMEMiiOperand"; let MIOperandInfo = (ops i32imm, i32imm); } @@ -200,146 +201,110 @@ def InlineJT32 : Operand<i32> { // Three operand short -multiclass F3R_2RUS<string OpcStr, SDNode OpNode> { - def _3r: _F3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; - def _2rus : _F2RUS< - (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>; +multiclass F3R_2RUS<bits<5> opc1, bits<5> opc2, string OpcStr, SDNode OpNode> { + def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; + def _2rus : _F2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>; } -multiclass F3R_2RUS_np<string OpcStr> { - def _3r: _F3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - []>; - def _2rus : _F2RUS< - (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - []>; +multiclass F3R_2RUS_np<bits<5> opc1, bits<5> opc2, string OpcStr> { + def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), []>; + def _2rus : _F2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), []>; } -multiclass F3R_2RBITP<string OpcStr, SDNode OpNode> { - def _3r: _F3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; - def _2rus : _F2RUS< - (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>; +multiclass F3R_2RBITP<bits<5> opc1, bits<5> opc2, string OpcStr, + SDNode OpNode> { + def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; + def _2rus : _F2RUSBitp<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>; } -class F3R<string OpcStr, SDNode OpNode> : _F3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; +class F3R<bits<5> opc, string OpcStr, SDNode OpNode> : + _F3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; -class F3R_np<string OpcStr> : _F3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - []>; +class F3R_np<bits<5> opc, string OpcStr> : + _F3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), []>; // Three operand long /// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot. -multiclass FL3R_L2RUS<string OpcStr, SDNode OpNode> { - def _l3r: _FL3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; - def _l2rus : _FL2RUS< - (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>; +multiclass FL3R_L2RUS<bits<9> opc1, bits<9> opc2, string OpcStr, + SDNode OpNode> { + def _l3r: _FL3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; + def _l2rus : _FL2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>; } /// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot. -multiclass FL3R_L2RBITP<string OpcStr, SDNode OpNode> { - def _l3r: _FL3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; - def _l2rus : _FL2RUS< - (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>; +multiclass FL3R_L2RBITP<bits<9> opc1, bits<9> opc2, string OpcStr, + SDNode OpNode> { + def _l3r: _FL3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; + def _l2rus : _FL2RUSBitp<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>; } -class FL3R<string OpcStr, SDNode OpNode> : _FL3R< - (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), - !strconcat(OpcStr, " $dst, $b, $c"), - [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; +class FL3R<bits<9> opc, string OpcStr, SDNode OpNode> : + _FL3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c), + !strconcat(OpcStr, " $dst, $b, $c"), + [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>; // Register - U6 // Operand register - U6 -multiclass FRU6_LRU6_branch<string OpcStr> { - def _ru6: _FRU6< - (outs), (ins GRRegs:$cond, brtarget:$dest), - !strconcat(OpcStr, " $cond, $dest"), - []>; - def _lru6: _FLRU6< - (outs), (ins GRRegs:$cond, brtarget:$dest), - !strconcat(OpcStr, " $cond, $dest"), - []>; +multiclass FRU6_LRU6_branch<bits<6> opc, string OpcStr> { + def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b), + !strconcat(OpcStr, " $a, $b"), []>; + def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b), + !strconcat(OpcStr, " $a, $b"), []>; } -multiclass FRU6_LRU6_cp<string OpcStr> { - def _ru6: _FRU6< - (outs GRRegs:$dst), (ins i32imm:$a), - !strconcat(OpcStr, " $dst, cp[$a]"), - []>; - def _lru6: _FLRU6< - (outs GRRegs:$dst), (ins i32imm:$a), - !strconcat(OpcStr, " $dst, cp[$a]"), - []>; +multiclass FRU6_LRU6_backwards_branch<bits<6> opc, string OpcStr> { + def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b), + !strconcat(OpcStr, " $a, -$b"), []>; + def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b), + !strconcat(OpcStr, " $a, -$b"), []>; } -// U6 -multiclass FU6_LU6<string OpcStr, SDNode OpNode> { - def _u6: _FU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - [(OpNode immU6:$b)]>; - def _lu6: _FLU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - [(OpNode immU16:$b)]>; +multiclass FRU6_LRU6_cp<bits<6> opc, string OpcStr> { + def _ru6: _FRU6<opc, (outs GRRegs:$a), (ins i32imm:$b), + !strconcat(OpcStr, " $a, cp[$b]"), []>; + def _lru6: _FLRU6<opc, (outs GRRegs:$a), (ins i32imm:$b), + !strconcat(OpcStr, " $a, cp[$b]"), []>; } -multiclass FU6_LU6_int<string OpcStr, Intrinsic Int> { - def _u6: _FU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - [(Int immU6:$b)]>; - def _lu6: _FLU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - [(Int immU16:$b)]>; + +// U6 +multiclass FU6_LU6<bits<10> opc, string OpcStr, SDNode OpNode> { + def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), + [(OpNode immU6:$a)]>; + def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), + [(OpNode immU16:$a)]>; } -multiclass FU6_LU6_np<string OpcStr> { - def _u6: _FU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - []>; - def _lu6: _FLU6< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - []>; +multiclass FU6_LU6_int<bits<10> opc, string OpcStr, Intrinsic Int> { + def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), + [(Int immU6:$a)]>; + def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), + [(Int immU16:$a)]>; } -// U10 -multiclass FU10_LU10_np<string OpcStr> { - def _u10: _FU10< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - []>; - def _lu10: _FLU10< - (outs), (ins i32imm:$b), - !strconcat(OpcStr, " $b"), - []>; +multiclass FU6_LU6_np<bits<10> opc, string OpcStr> { + def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), []>; + def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), []>; } // Two operand short @@ -390,368 +355,351 @@ let usesCustomInserter = 1 in { //===----------------------------------------------------------------------===// // Three operand short -defm ADD : F3R_2RUS<"add", add>; -defm SUB : F3R_2RUS<"sub", sub>; +defm ADD : F3R_2RUS<0b00010, 0b10010, "add", add>; +defm SUB : F3R_2RUS<0b00011, 0b10011, "sub", sub>; let neverHasSideEffects = 1 in { -defm EQ : F3R_2RUS_np<"eq">; -def LSS_3r : F3R_np<"lss">; -def LSU_3r : F3R_np<"lsu">; +defm EQ : F3R_2RUS_np<0b00110, 0b10110, "eq">; +def LSS_3r : F3R_np<0b11000, "lss">; +def LSU_3r : F3R_np<0b11001, "lsu">; } -def AND_3r : F3R<"and", and>; -def OR_3r : F3R<"or", or>; +def AND_3r : F3R<0b00111, "and", and>; +def OR_3r : F3R<0b01000, "or", or>; let mayLoad=1 in { -def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "ldw $dst, $addr[$offset]", - []>; +def LDW_3r : _F3R<0b01001, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "ldw $dst, $addr[$offset]", []>; -def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset), - "ldw $dst, $addr[$offset]", - []>; +def LDW_2rus : _F2RUS<0b00001, (outs GRRegs:$dst), + (ins GRRegs:$addr, i32imm:$offset), + "ldw $dst, $addr[$offset]", []>; -def LD16S_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "ld16s $dst, $addr[$offset]", - []>; +def LD16S_3r : _F3R<0b10000, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "ld16s $dst, $addr[$offset]", []>; -def LD8U_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "ld8u $dst, $addr[$offset]", - []>; +def LD8U_3r : _F3R<0b10001, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "ld8u $dst, $addr[$offset]", []>; } let mayStore=1 in { -def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), - "stw $val, $addr[$offset]", - []>; +def STW_l3r : _FL3R<0b000001100, (outs), + (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "stw $val, $addr[$offset]", []>; -def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset), - "stw $val, $addr[$offset]", - []>; +def STW_2rus : _F2RUS<0b0000, (outs), + (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset), + "stw $val, $addr[$offset]", []>; } -defm SHL : F3R_2RBITP<"shl", shl>; -defm SHR : F3R_2RBITP<"shr", srl>; -// TODO tsetr +defm SHL : F3R_2RBITP<0b00100, 0b10100, "shl", shl>; +defm SHR : F3R_2RBITP<0b00101, 0b10101, "shr", srl>; + +// The first operand is treated as an immediate since it refers to a register +// number in another thread. +def TSETR_3r : _F3RImm<0b10111, (outs), (ins i32imm:$a, GRRegs:$b, GRRegs:$c), + "set t[$c]:r$a, $b", []>; // Three operand long -def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "ldaw $dst, $addr[$offset]", - [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>; +def LDAWF_l3r : _FL3R<0b000111100, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "ldaw $dst, $addr[$offset]", + [(set GRRegs:$dst, + (ldawf GRRegs:$addr, GRRegs:$offset))]>; let neverHasSideEffects = 1 in -def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst), - (ins GRRegs:$addr, i32imm:$offset), - "ldaw $dst, $addr[$offset]", - []>; +def LDAWF_l2rus : _FL2RUS<0b100111100, (outs GRRegs:$dst), + (ins GRRegs:$addr, i32imm:$offset), + "ldaw $dst, $addr[$offset]", []>; -def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "ldaw $dst, $addr[-$offset]", - [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>; +def LDAWB_l3r : _FL3R<0b001001100, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "ldaw $dst, $addr[-$offset]", + [(set GRRegs:$dst, + (ldawb GRRegs:$addr, GRRegs:$offset))]>; let neverHasSideEffects = 1 in -def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst), - (ins GRRegs:$addr, i32imm:$offset), - "ldaw $dst, $addr[-$offset]", - []>; - -def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "lda16 $dst, $addr[$offset]", - [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>; - -def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset), - "lda16 $dst, $addr[-$offset]", - [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>; - -def MUL_l3r : FL3R<"mul", mul>; +def LDAWB_l2rus : _FL2RUS<0b101001100, (outs GRRegs:$dst), + (ins GRRegs:$addr, i32imm:$offset), + "ldaw $dst, $addr[-$offset]", []>; + +def LDA16F_l3r : _FL3R<0b001011100, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "lda16 $dst, $addr[$offset]", + [(set GRRegs:$dst, + (lda16f GRRegs:$addr, GRRegs:$offset))]>; + +def LDA16B_l3r : _FL3R<0b001101100, (outs GRRegs:$dst), + (ins GRRegs:$addr, GRRegs:$offset), + "lda16 $dst, $addr[-$offset]", + [(set GRRegs:$dst, + (lda16b GRRegs:$addr, GRRegs:$offset))]>; + +def MUL_l3r : FL3R<0b001111100, "mul", mul>; // Instructions which may trap are marked as side effecting. let hasSideEffects = 1 in { -def DIVS_l3r : FL3R<"divs", sdiv>; -def DIVU_l3r : FL3R<"divu", udiv>; -def REMS_l3r : FL3R<"rems", srem>; -def REMU_l3r : FL3R<"remu", urem>; +def DIVS_l3r : FL3R<0b010001100, "divs", sdiv>; +def DIVU_l3r : FL3R<0b010011100, "divu", udiv>; +def REMS_l3r : FL3R<0b110001100, "rems", srem>; +def REMU_l3r : FL3R<0b110011100, "remu", urem>; } -def XOR_l3r : FL3R<"xor", xor>; -defm ASHR : FL3R_L2RBITP<"ashr", sra>; +def XOR_l3r : FL3R<0b000011100, "xor", xor>; +defm ASHR : FL3R_L2RBITP<0b000101100, 0b100101100, "ashr", sra>; let Constraints = "$src1 = $dst" in -def CRC_l3r : _FL3R<(outs GRRegs:$dst), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "crc32 $dst, $src2, $src3", - [(set GRRegs:$dst, - (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2, - GRRegs:$src3))]>; +def CRC_l3r : _FL3RSrcDst<0b101011100, (outs GRRegs:$dst), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "crc32 $dst, $src2, $src3", + [(set GRRegs:$dst, + (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2, + GRRegs:$src3))]>; -// TODO inpw, outpw let mayStore=1 in { -def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), - "st16 $val, $addr[$offset]", - []>; +def ST16_l3r : _FL3R<0b100001100, (outs), + (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "st16 $val, $addr[$offset]", []>; -def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), - "st8 $val, $addr[$offset]", - []>; +def ST8_l3r : _FL3R<0b100011100, (outs), + (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), + "st8 $val, $addr[$offset]", []>; } -// Four operand long -let Constraints = "$src1 = $dst1,$src2 = $dst2" in { -def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, - GRRegs:$src4), - "maccu $dst1, $dst2, $src3, $src4", - []>; +def INPW_l2rus : _FL2RUSBitp<0b100101110, (outs GRRegs:$a), + (ins GRRegs:$b, i32imm:$c), "inpw $a, res[$b], $c", + []>; -def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, - GRRegs:$src4), - "maccs $dst1, $dst2, $src3, $src4", - []>; +def OUTPW_l2rus : _FL2RUSBitp<0b100101101, (outs), + (ins GRRegs:$a, GRRegs:$b, i32imm:$c), + "outpw res[$b], $a, $c", []>; + +// Four operand long +let Constraints = "$e = $a,$f = $b" in { +def MACCU_l4r : _FL4RSrcDstSrcDst< + 0b000001, (outs GRRegs:$a, GRRegs:$b), + (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccu $a, $b, $c, $d", []>; + +def MACCS_l4r : _FL4RSrcDstSrcDst< + 0b000010, (outs GRRegs:$a, GRRegs:$b), + (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccs $a, $b, $c, $d", []>; } -let Constraints = "$src1 = $dst1" in -def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "crc8 $dst1, $dst2, $src2, $src3", - []>; +let Constraints = "$e = $b" in +def CRC8_l4r : _FL4RSrcDst<0b000000, (outs GRRegs:$a, GRRegs:$b), + (ins GRRegs:$e, GRRegs:$c, GRRegs:$d), + "crc8 $b, $a, $c, $d", []>; // Five operand long -def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "ladd $dst1, $dst2, $src1, $src2, $src3", - []>; +def LADD_l5r : _FL5R<0b000001, (outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "ladd $dst2, $dst1, $src1, $src2, $src3", + []>; -def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "lsub $dst1, $dst2, $src1, $src2, $src3", - []>; +def LSUB_l5r : _FL5R<0b000010, (outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "lsub $dst2, $dst1, $src1, $src2, $src3", []>; -def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), - "ldiv $dst1, $dst2, $src1, $src2, $src3", - []>; +def LDIVU_l5r : _FL5R<0b000000, (outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), + "ldivu $dst1, $dst2, $src3, $src1, $src2", []>; // Six operand long -def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, - GRRegs:$src4), - "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", - []>; +def LMUL_l6r : _FL6R< + 0b00000, (outs GRRegs:$dst1, GRRegs:$dst2), + (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, GRRegs:$src4), + "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", []>; // Register - U6 //let Uses = [DP] in ... let neverHasSideEffects = 1, isReMaterializable = 1 in -def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a), - "ldaw $dst, dp[$a]", - []>; +def LDAWDP_ru6: _FRU6<0b011000, (outs GRRegs:$a), (ins MEMii:$b), + "ldaw $a, dp[$b]", []>; let isReMaterializable = 1 in -def LDAWDP_lru6: _FLRU6< - (outs GRRegs:$dst), (ins MEMii:$a), - "ldaw $dst, dp[$a]", - [(set GRRegs:$dst, ADDRdpii:$a)]>; +def LDAWDP_lru6: _FLRU6<0b011000, (outs GRRegs:$a), (ins MEMii:$b), + "ldaw $a, dp[$b]", + [(set GRRegs:$a, ADDRdpii:$b)]>; let mayLoad=1 in -def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a), - "ldw $dst, dp[$a]", - []>; - -def LDWDP_lru6: _FLRU6< - (outs GRRegs:$dst), (ins MEMii:$a), - "ldw $dst, dp[$a]", - [(set GRRegs:$dst, (load ADDRdpii:$a))]>; +def LDWDP_ru6: _FRU6<0b010110, (outs GRRegs:$a), (ins MEMii:$b), + "ldw $a, dp[$b]", []>; + +def LDWDP_lru6: _FLRU6<0b010110, (outs GRRegs:$a), (ins MEMii:$b), + "ldw $a, dp[$b]", + [(set GRRegs:$a, (load ADDRdpii:$b))]>; let mayStore=1 in -def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr), - "stw $val, dp[$addr]", - []>; +def STWDP_ru6 : _FRU6<0b010100, (outs), (ins GRRegs:$a, MEMii:$b), + "stw $a, dp[$b]", []>; -def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr), - "stw $val, dp[$addr]", - [(store GRRegs:$val, ADDRdpii:$addr)]>; +def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins GRRegs:$a, MEMii:$b), + "stw $a, dp[$b]", + [(store GRRegs:$a, ADDRdpii:$b)]>; //let Uses = [CP] in .. let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in -defm LDWCP : FRU6_LRU6_cp<"ldw">; +defm LDWCP : FRU6_LRU6_cp<0b011011, "ldw">; let Uses = [SP] in { let mayStore=1 in { -def STWSP_ru6 : _FRU6< - (outs), (ins GRRegs:$val, i32imm:$index), - "stw $val, sp[$index]", - [(XCoreStwsp GRRegs:$val, immU6:$index)]>; - -def STWSP_lru6 : _FLRU6< - (outs), (ins GRRegs:$val, i32imm:$index), - "stw $val, sp[$index]", - [(XCoreStwsp GRRegs:$val, immU16:$index)]>; +def STWSP_ru6 : _FRU6<0b010101, (outs), (ins GRRegs:$a, i32imm:$b), + "stw $a, sp[$b]", + [(XCoreStwsp GRRegs:$a, immU6:$b)]>; + +def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins GRRegs:$a, i32imm:$b), + "stw $a, sp[$b]", + [(XCoreStwsp GRRegs:$a, immU16:$b)]>; } let mayLoad=1 in { -def LDWSP_ru6 : _FRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldw $dst, sp[$b]", - []>; +def LDWSP_ru6 : _FRU6<0b010111, (outs GRRegs:$a), (ins i32imm:$b), + "ldw $a, sp[$b]", []>; -def LDWSP_lru6 : _FLRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldw $dst, sp[$b]", - []>; +def LDWSP_lru6 : _FLRU6<0b010111, (outs GRRegs:$a), (ins i32imm:$b), + "ldw $a, sp[$b]", []>; } let neverHasSideEffects = 1 in { -def LDAWSP_ru6 : _FRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldaw $dst, sp[$b]", - []>; +def LDAWSP_ru6 : _FRU6<0b011001, (outs GRRegs:$a), (ins i32imm:$b), + "ldaw $a, sp[$b]", []>; -def LDAWSP_lru6 : _FLRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldaw $dst, sp[$b]", - []>; +def LDAWSP_lru6 : _FLRU6<0b011001, (outs GRRegs:$a), (ins i32imm:$b), + "ldaw $a, sp[$b]", []>; -def LDAWSP_ru6_RRegs : _FRU6< - (outs RRegs:$dst), (ins i32imm:$b), - "ldaw $dst, sp[$b]", - []>; +let isCodeGenOnly = 1 in +def LDAWSP_ru6_RRegs : _FRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b), + "ldaw $a, sp[$b]", []>; -def LDAWSP_lru6_RRegs : _FLRU6< - (outs RRegs:$dst), (ins i32imm:$b), - "ldaw $dst, sp[$b]", - []>; +let isCodeGenOnly = 1 in +def LDAWSP_lru6_RRegs : _FLRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b), + "ldaw $a, sp[$b]", []>; } } let isReMaterializable = 1 in { -def LDC_ru6 : _FRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldc $dst, $b", - [(set GRRegs:$dst, immU6:$b)]>; - -def LDC_lru6 : _FLRU6< - (outs GRRegs:$dst), (ins i32imm:$b), - "ldc $dst, $b", - [(set GRRegs:$dst, immU16:$b)]>; +def LDC_ru6 : _FRU6<0b011010, (outs GRRegs:$a), (ins i32imm:$b), + "ldc $a, $b", [(set GRRegs:$a, immU6:$b)]>; + +def LDC_lru6 : _FLRU6<0b011010, (outs GRRegs:$a), (ins i32imm:$b), + "ldc $a, $b", [(set GRRegs:$a, immU16:$b)]>; } -def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val), - "setc res[$r], $val", - [(int_xcore_setc GRRegs:$r, immU6:$val)]>; +def SETC_ru6 : _FRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b), + "setc res[$a], $b", + [(int_xcore_setc GRRegs:$a, immU6:$b)]>; -def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val), - "setc res[$r], $val", - [(int_xcore_setc GRRegs:$r, immU16:$val)]>; +def SETC_lru6 : _FLRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b), + "setc res[$a], $b", + [(int_xcore_setc GRRegs:$a, immU16:$b)]>; // Operand register - U6 let isBranch = 1, isTerminator = 1 in { -defm BRFT: FRU6_LRU6_branch<"bt">; -defm BRBT: FRU6_LRU6_branch<"bt">; -defm BRFF: FRU6_LRU6_branch<"bf">; -defm BRBF: FRU6_LRU6_branch<"bf">; +defm BRFT: FRU6_LRU6_branch<0b011100, "bt">; +defm BRBT: FRU6_LRU6_backwards_branch<0b011101, "bt">; +defm BRFF: FRU6_LRU6_branch<0b011110, "bf">; +defm BRBF: FRU6_LRU6_backwards_branch<0b011111, "bf">; } // U6 let Defs = [SP], Uses = [SP] in { let neverHasSideEffects = 1 in -defm EXTSP : FU6_LU6_np<"extsp">; +defm EXTSP : FU6_LU6_np<0b0111011110, "extsp">; + let mayStore = 1 in -defm ENTSP : FU6_LU6_np<"entsp">; +defm ENTSP : FU6_LU6_np<0b0111011101, "entsp">; let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in { -defm RETSP : FU6_LU6<"retsp", XCoreRetsp>; +defm RETSP : FU6_LU6<0b0111011111, "retsp", XCoreRetsp>; } } -// TODO extdp, kentsp, krestsp, blat -// getsr, kalli +let neverHasSideEffects = 1 in +defm EXTDP : FU6_LU6_np<0b0111001110, "extdp">; + +let Uses = [R11], isCall=1 in +defm BLAT : FU6_LU6_np<0b0111001101, "blat">; + let isBranch = 1, isTerminator = 1, isBarrier = 1 in { -def BRBU_u6 : _FU6< - (outs), - (ins brtarget:$target), - "bu $target", - []>; +def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>; -def BRBU_lu6 : _FLU6< - (outs), - (ins brtarget:$target), - "bu $target", - []>; +def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>; -def BRFU_u6 : _FU6< - (outs), - (ins brtarget:$target), - "bu $target", - []>; +def BRFU_u6 : _FU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>; -def BRFU_lu6 : _FLU6< - (outs), - (ins brtarget:$target), - "bu $target", - []>; +def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>; } //let Uses = [CP] in ... let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in -def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a), - "ldaw r11, cp[$a]", +def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]", []>; let Defs = [R11], isReMaterializable = 1 in -def LDAWCP_lu6: _FLRU6< - (outs), (ins MEMii:$a), - "ldaw r11, cp[$a]", - [(set R11, ADDRcpii:$a)]>; +def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]", + [(set R11, ADDRcpii:$a)]>; -defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>; +let Defs = [R11] in +defm GETSR : FU6_LU6_np<0b0111111100, "getsr r11,">; -defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>; +defm SETSR : FU6_LU6_int<0b0111101101, "setsr", int_xcore_setsr>; + +defm CLRSR : FU6_LU6_int<0b0111101100, "clrsr", int_xcore_clrsr>; // setsr may cause a branch if it is used to enable events. clrsr may // branch if it is executed while events are enabled. -let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in { -defm SETSR_branch : FU6_LU6_np<"setsr">; -defm CLRSR_branch : FU6_LU6_np<"clrsr">; +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, + isCodeGenOnly = 1 in { +defm SETSR_branch : FU6_LU6_np<0b0111101101, "setsr">; +defm CLRSR_branch : FU6_LU6_np<0b0111101100, "clrsr">; } +defm KCALL : FU6_LU6_np<0b0111001111, "kcall">; + +let Uses = [SP], Defs = [SP], mayStore = 1 in +defm KENTSP : FU6_LU6_np<0b0111101110, "kentsp">; + +let Uses = [SP], Defs = [SP], mayLoad = 1 in +defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">; + // U10 -// TODO ldwcpl, blacp let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in -def LDAP_u10 : _FU10< - (outs), - (ins i32imm:$addr), - "ldap r11, $addr", - []>; +def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>; let Defs = [R11], isReMaterializable = 1 in -def LDAP_lu10 : _FLU10< - (outs), - (ins i32imm:$addr), - "ldap r11, $addr", - [(set R11, (pcrelwrapper tglobaladdr:$addr))]>; +def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", + [(set R11, (pcrelwrapper tglobaladdr:$a))]>; -let Defs = [R11], isReMaterializable = 1 in -def LDAP_lu10_ba : _FLU10<(outs), - (ins i32imm:$addr), - "ldap r11, $addr", - [(set R11, (pcrelwrapper tblockaddress:$addr))]>; +let Defs = [R11], isReMaterializable = 1, isCodeGenOnly = 1 in +def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", + [(set R11, (pcrelwrapper tblockaddress:$a))]>; let isCall=1, // All calls clobber the link register and the non-callee-saved registers: Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { -def BL_u10 : _FU10< - (outs), (ins calltarget:$target), - "bl $target", - [(XCoreBranchLink immU10:$target)]>; - -def BL_lu10 : _FLU10< - (outs), (ins calltarget:$target), - "bl $target", - [(XCoreBranchLink immU20:$target)]>; +def BLACP_u10 : _FU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>; + +def BLACP_lu10 : _FLU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>; + +def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a", + [(XCoreBranchLink immU10:$a)]>; + +def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a", + [(XCoreBranchLink immU20:$a)]>; +} + +let Defs = [R11], mayLoad = 1, isReMaterializable = 1, + neverHasSideEffects = 1 in { +def LDWCP_u10 : _FU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]", []>; + +def LDWCP_lu10 : _FLU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]", + []>; } // Two operand short -// TODO eet, eef, tsetmr def NOT : _F2R<0b100010, (outs GRRegs:$dst), (ins GRRegs:$b), "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>; @@ -867,9 +815,9 @@ def SETD_2r : _FR2R<0b000101, (outs), (ins GRRegs:$r, GRRegs:$val), "setd res[$r], $val", [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; -def SETPSC_l2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2), - "setpsc res[$src1], $src2", - [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>; +def SETPSC_2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2), + "setpsc res[$src1], $src2", + [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>; def GETST_2r : _F2R<0b000001, (outs GRRegs:$dst), (ins GRRegs:$r), "getst $dst, res[$r]", @@ -899,8 +847,16 @@ def ENDIN_2r : _F2R<0b100101, (outs GRRegs:$dst), (ins GRRegs:$src), "endin $dst, res[$src]", [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>; +def EEF_2r : _F2R<0b001011, (outs), (ins GRRegs:$a, GRRegs:$b), + "eef $a, res[$b]", []>; + +def EET_2r : _F2R<0b001001, (outs), (ins GRRegs:$a, GRRegs:$b), + "eet $a, res[$b]", []>; + +def TSETMR_2r : _F2RImm<0b000111, (outs), (ins i32imm:$a, GRRegs:$b), + "tsetmr r$a, $b", []>; + // Two operand long -// getd, testlcl def BITREV_l2r : _FL2R<0b0000011000, (outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>; @@ -913,6 +869,12 @@ def CLZ_l2r : _FL2R<0b000111000, (outs GRRegs:$dst), (ins GRRegs:$src), "clz $dst, $src", [(set GRRegs:$dst, (ctlz GRRegs:$src))]>; +def GETD_l2r : _FL2R<0b0001111001, (outs GRRegs:$dst), (ins GRRegs:$src), + "getd $dst, res[$src]", []>; + +def GETN_l2r : _FL2R<0b0011011001, (outs GRRegs:$dst), (ins GRRegs:$src), + "getn $dst, res[$src]", []>; + def SETC_l2r : _FL2R<0b0010111001, (outs), (ins GRRegs:$r, GRRegs:$val), "setc res[$r], $val", [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>; @@ -937,14 +899,17 @@ def SETCLK_l2r : _FLR2R<0b0000111001, (outs), (ins GRRegs:$src1, GRRegs:$src2), "setclk res[$src1], $src2", [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>; +def SETN_l2r : _FLR2R<0b0011011000, (outs), (ins GRRegs:$src1, GRRegs:$src2), + "setn res[$src1], $src2", []>; + def SETRDY_l2r : _FLR2R<0b0010111000, (outs), (ins GRRegs:$src1, GRRegs:$src2), "setrdy res[$src1], $src2", [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>; +def TESTLCL_l2r : _FL2R<0b0010011000, (outs GRRegs:$dst), (ins GRRegs:$src), + "testlcl $dst, res[$src]", []>; + // One operand short -// TODO edu, eeu, waitet, waitef, tstart, clrtp -// setdp, setcp, setev, kcall -// dgetreg def MSYNC_1r : _F1R<0b000111, (outs), (ins GRRegs:$a), "msync res[$a]", [(int_xcore_msync GRRegs:$a)]>; @@ -968,9 +933,13 @@ def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i), [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>; let Defs=[SP], neverHasSideEffects=1 in -def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), - "set sp, $a", - []>; +def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), "set sp, $a", []>; + +let neverHasSideEffects=1 in +def SETDP_1r : _F1R<0b001100, (outs), (ins GRRegs:$a), "set dp, $a", []>; + +let neverHasSideEffects=1 in +def SETCP_1r : _F1R<0b001101, (outs), (ins GRRegs:$a), "set cp, $a", []>; let hasCtrlDep = 1 in def ECALLT_1r : _F1R<0b010011, (outs), (ins GRRegs:$a), @@ -1008,17 +977,40 @@ def SETEV_1r : _F1R<0b001111, (outs), (ins GRRegs:$a), [(int_xcore_setev GRRegs:$a, R11)]>; } +def DGETREG_1r : _F1R<0b001110, (outs GRRegs:$a), (ins), "dgetreg $a", []>; + +def EDU_1r : _F1R<0b000000, (outs), (ins GRRegs:$a), "edu res[$a]", []>; + def EEU_1r : _F1R<0b000001, (outs), (ins GRRegs:$a), "eeu res[$a]", [(int_xcore_eeu GRRegs:$a)]>; +def KCALL_1r : _F1R<0b010000, (outs), (ins GRRegs:$a), "kcall $a", []>; + +def WAITEF_1R : _F1R<0b000011, (outs), (ins GRRegs:$a), "waitef $a", []>; + +def WAITET_1R : _F1R<0b000010, (outs), (ins GRRegs:$a), "waitet $a", []>; + +def TSTART_1R : _F1R<0b000110, (outs), (ins GRRegs:$a), "start t[$a]", []>; + +def CLRPT_1R : _F1R<0b100000, (outs), (ins GRRegs:$a), "clrpt res[$a]", []>; + // Zero operand short -// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, -// stet, getkep, getksp, setkep, getid, kret, dcall, dret, -// dentsp, drestsp def CLRE_0R : _F0R<0b0000001101, (outs), (ins), "clre", [(int_xcore_clre)]>; +def DCALL_0R : _F0R<0b0000011100, (outs), (ins), "dcall", []>; + +let Defs = [SP], Uses = [SP] in +def DENTSP_0R : _F0R<0b0001001100, (outs), (ins), "dentsp", []>; + +let Defs = [SP] in +def DRESTSP_0R : _F0R<0b0001001101, (outs), (ins), "drestsp", []>; + +def DRET_0R : _F0R<0b0000011110, (outs), (ins), "dret", []>; + +def FREET_0R : _F0R<0b0000001111, (outs), (ins), "freet", []>; + let Defs = [R11] in { def GETID_0R : _F0R<0b0001001110, (outs), (ins), "get r11, id", @@ -1031,12 +1023,44 @@ def GETED_0R : _F0R<0b0000111110, (outs), (ins), def GETET_0R : _F0R<0b0000111111, (outs), (ins), "get r11, et", [(set R11, (int_xcore_getet))]>; + +def GETKEP_0R : _F0R<0b0001001111, (outs), (ins), + "get r11, kep", []>; + +def GETKSP_0R : _F0R<0b0001011100, (outs), (ins), + "get r11, ksp", []>; } +let Defs = [SP] in +def KRET_0R : _F0R<0b0000011101, (outs), (ins), "kret", []>; + +let Uses = [SP], mayLoad = 1 in { +def LDET_0R : _F0R<0b0001011110, (outs), (ins), "ldw et, sp[4]", []>; + +def LDSED_0R : _F0R<0b0001011101, (outs), (ins), "ldw sed, sp[3]", []>; + +def LDSPC_0R : _F0R<0b0000101100, (outs), (ins), "ldw spc, sp[1]", []>; + +def LDSSR_0R : _F0R<0b0000101110, (outs), (ins), "ldw ssr, sp[2]", []>; +} + +let Uses=[R11] in +def SETKEP_0R : _F0R<0b0000011111, (outs), (ins), "set kep, r11", []>; + def SSYNC_0r : _F0R<0b0000001110, (outs), (ins), "ssync", [(int_xcore_ssync)]>; +let Uses = [SP], mayStore = 1 in { +def STET_0R : _F0R<0b0000111101, (outs), (ins), "stw et, sp[4]", []>; + +def STSED_0R : _F0R<0b0000111100, (outs), (ins), "stw sed, sp[3]", []>; + +def STSPC_0R : _F0R<0b0000101101, (outs), (ins), "stw spc, sp[1]", []>; + +def STSSR_0R : _F0R<0b0000101111, (outs), (ins), "stw ssr, sp[2]", []>; +} + let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, hasSideEffects = 1 in def WAITEU_0R : _F0R<0b0000001100, (outs), (ins), @@ -1047,8 +1071,8 @@ def WAITEU_0R : _F0R<0b0000001100, (outs), (ins), // Non-Instruction Patterns //===----------------------------------------------------------------------===// -def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>; -def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>; +def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BLRF_lu10 tglobaladdr:$addr)>; +def : Pat<(XCoreBranchLink texternalsym:$addr), (BLRF_lu10 texternalsym:$addr)>; /// sext_inreg def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>; @@ -1090,7 +1114,7 @@ def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr), (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>; def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)), - (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>; + (STW_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>; def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)), (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>; def : Pat<(store GRRegs:$val, GRRegs:$addr), diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index e637d9a..49b5634 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -101,72 +101,14 @@ XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { return false; } -// This function eliminates ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void XCoreRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - // Turn the adjcallstackdown instruction into 'extsp <amt>' and the - // adjcallstackup instruction into 'ldaw sp, sp[<amt>]' - MachineInstr *Old = I; - uint64_t Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - assert(Amount%4 == 0); - Amount /= 4; - - bool isU6 = isImmU6(Amount); - if (!isU6 && !isImmU16(Amount)) { - // FIX could emit multiple instructions in this case. -#ifndef NDEBUG - errs() << "eliminateCallFramePseudoInstr size too big: " - << Amount << "\n"; -#endif - llvm_unreachable(0); - } - - MachineInstr *New; - if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) { - int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; - New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode)) - .addImm(Amount); - } else { - assert(Old->getOpcode() == XCore::ADJCALLSTACKUP); - int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs; - New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP) - .addImm(Amount); - } - - // Replace the pseudo instruction with a new instruction... - MBB.insert(I, New); - } - } - - MBB.erase(I); -} - void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); - unsigned i = 0; - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - MachineOperand &FrameOp = MI.getOperand(i); + MachineOperand &FrameOp = MI.getOperand(FIOperandNum); int FrameIndex = FrameOp.getIndex(); MachineFunction &MF = *MI.getParent()->getParent(); @@ -190,14 +132,14 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special handling of DBG_VALUE instructions. if (MI.isDebugValue()) { - MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } // fold constant into offset. - Offset += MI.getOperand(i + 1).getImm(); - MI.getOperand(i + 1).ChangeToImmediate(0); + Offset += MI.getOperand(FIOperandNum + 1).getImm(); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); assert(Offset%4 == 0 && "Misaligned stack offset"); @@ -231,7 +173,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, .addReg(ScratchReg, RegState::Kill); break; case XCore::STWFI: - BuildMI(MBB, II, dl, TII.get(XCore::STW_3r)) + BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) .addReg(Reg, getKillRegState(isKill)) .addReg(FrameReg) .addReg(ScratchReg, RegState::Kill); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index c4dcb6b..1db3248 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -54,12 +54,9 @@ public: bool useFPForScavengingIndex(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; |