diff options
Diffstat (limited to 'lib/Target')
273 files changed, 7754 insertions, 2749 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index bb3db4b..9a7d6c8 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -32,9 +32,6 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable ARMv8 CRC-32 checksum instructions">; -def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", - "Enable ARMv8.1a extensions", [FeatureCRC]>; - /// Cyclone has register move instructions which are "free". def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -44,6 +41,13 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; //===----------------------------------------------------------------------===// +// Architectures. +// + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", [FeatureCRC]>; + +//===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -92,10 +96,6 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8, FeatureNEON, FeatureCRC]>; -def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a, - FeatureNEON, - FeatureCrypto]>; - def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; // FIXME: Cortex-A72 is currently modelled as an Cortex-A57. @@ -123,12 +123,14 @@ def AppleAsmParserVariant : AsmParserVariant { // AsmWriter bits get associated with the correct class. def GenericAsmWriter : AsmWriter { string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; int Variant = 0; bit isMCAsmWriter = 1; } def AppleAsmWriter : AsmWriter { let AsmWriterClassName = "AppleInstPrinter"; + int PassSubtarget = 1; int Variant = 1; int isMCAsmWriter = 1; } diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 1b4483a..0821cff 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -131,29 +131,6 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); SM.serializeToStackMapSection(); } - - // Emit a .data.rel section containing any stubs that were created. - if (TT.isOSBinFormatELF()) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } - } - } MachineLocation @@ -371,8 +348,8 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, assert(NOps == 4); OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata())); - OS << V.getName(); + OS << cast<MDLocalVariable>(MI->getOperand(NOps - 2).getMetadata()) + ->getName(); OS << " <- "; // Frame address. Currently handles register +- offset only. assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp index 568f258..efdb2e3 100644 --- a/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -328,7 +328,7 @@ static void initReachingDef(const MachineFunction &MF, const uint32_t *PreservedRegs = MO.getRegMask(); // Set generated regs. - for (const auto Entry : RegToId) { + for (const auto &Entry : RegToId) { unsigned Reg = Entry.second; // Use the global register ID when querying APIs external to this // pass. diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 41b1132..c2470f7 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -698,12 +698,15 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return expandMOVImm(MBB, MBBI, 32); case AArch64::MOVi64imm: return expandMOVImm(MBB, MBBI, 64); - case AArch64::RET_ReallyLR: - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) - .addReg(AArch64::LR); + case AArch64::RET_ReallyLR: { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) + .addReg(AArch64::LR); + transferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; } + } return false; } diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 99cb641..c3f6859 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1917,7 +1917,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { // could select it. Emit a copy to subreg if necessary. FastISel will remove // it when it selects the integer extend. unsigned Reg = lookUpRegForValue(IntExtVal); - if (!Reg) { + auto *MI = MRI.getUniqueVRegDef(Reg); + if (!MI) { if (RetVT == MVT::i64 && VT <= MVT::i32) { if (WantZExt) { // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). @@ -1935,10 +1936,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { // The integer extend has already been emitted - delete all the instructions // that have been emitted by the integer extend lowering code and use the // result from the load instruction directly. - while (Reg) { - auto *MI = MRI.getUniqueVRegDef(Reg); - if (!MI) - break; + while (MI) { Reg = 0; for (auto &Opnd : MI->uses()) { if (Opnd.isReg()) { @@ -1947,6 +1945,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { } } MI->eraseFromParent(); + MI = nullptr; + if (Reg) + MI = MRI.getUniqueVRegDef(Reg); } updateValueMap(IntExtVal, ResultReg); return true; @@ -3034,6 +3035,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, // Copy all of the result registers out of their specified physreg. MVT CopyVT = RVLocs[0].getValVT(); + + // TODO: Handle big-endian results + if (CopyVT.isVector() && !Subtarget->isLittleEndian()) + return false; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 84bf317..bd2af16 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -9,6 +9,82 @@ // // This file contains the AArch64 implementation of TargetFrameLowering class. // +// On AArch64, stack frames are structured as follows: +// +// The stack grows downward. +// +// All of the individual frame areas on the frame below are optional, i.e. it's +// possible to create a function so that the particular area isn't present +// in the frame. +// +// At function entry, the "frame" looks as follows: +// +// | | Higher address +// |-----------------------------------| +// | | +// | arguments passed on the stack | +// | | +// |-----------------------------------| <- sp +// | | Lower address +// +// +// After the prologue has run, the frame has the following general structure. +// Note that this doesn't depict the case where a red-zone is used. Also, +// technically the last frame area (VLAs) doesn't get created until in the +// main function body, after the prologue is run. However, it's depicted here +// for completeness. +// +// | | Higher address +// |-----------------------------------| +// | | +// | arguments passed on the stack | +// | | +// |-----------------------------------| +// | | +// | prev_fp, prev_lr | +// | (a.k.a. "frame record") | +// |-----------------------------------| <- fp(=x29) +// | | +// | other callee-saved registers | +// | | +// |-----------------------------------| +// |.empty.space.to.make.part.below....| +// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at +// |.the.standard.16-byte.alignment....| compile time; if present) +// |-----------------------------------| +// | | +// | local variables of fixed size | +// | including spill slots | +// |-----------------------------------| <- bp(not defined by ABI, +// |.variable-sized.local.variables....| LLVM chooses X19) +// |.(VLAs)............................| (size of this area is unknown at +// |...................................| compile time) +// |-----------------------------------| <- sp +// | | Lower address +// +// +// To access the data in a frame, at-compile time, a constant offset must be +// computable from one of the pointers (fp, bp, sp) to access it. The size +// of the areas with a dotted background cannot be computed at compile-time +// if they are present, making it required to have all three of fp, bp and +// sp to be set up to be able to access all contents in the frame areas, +// assuming all of the frame areas are non-empty. +// +// For most functions, some of the frame areas are empty. For those functions, +// it may not be necessary to set up fp or bp: +// * A base pointer is definitly needed when there are both VLAs and local +// variables with more-than-default alignment requirements. +// * A frame pointer is definitly needed when there are local variables with +// more-than-default alignment requirements. +// +// In some cases when a base pointer is not strictly needed, it is generated +// anyway when offsets from the frame pointer to access local variables become +// so large that the offset can't be encoded in the immediate fields of loads +// or stores. +// +// FIXME: also explain the redzone concept. +// FIXME: also explain the concept of reserved call frames. +// //===----------------------------------------------------------------------===// #include "AArch64FrameLowering.h" @@ -39,26 +115,6 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone", STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); - if (FixedOff > Offset) - Offset = FixedOff; - } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset + Align - 1) / Align * Align; - } - // This does not include the 16 bytes used for fp and lr. - return (unsigned)Offset; -} - bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { /// pointer register. bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - -#ifndef NDEBUG const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - assert(!RegInfo->needsStackRealignment(MF) && - "No stack realignment on AArch64!"); -#endif - return (MFI->hasCalls() || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MFI->hasStackMap() || - MFI->hasPatchPoint()); + MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF)); } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. - if (NumBytes) { - // If we're a leaf function, try using the red zone. - if (!canUseRedZone(MF)) - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + + const unsigned Alignment = MFI->getMaxAlignment(); + const bool NeedsRealignment = (Alignment > 16); + unsigned scratchSPReg = AArch64::SP; + if (NeedsRealignment) { + // Use the first callee-saved register as a scratch register + assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) && + "No scratch register to align SP!"); + scratchSPReg = AArch64::X9; + } + + // If we're a leaf function, try using the red zone. + if (NumBytes && !canUseRedZone(MF)) + // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have + // the correct value here, as NumBytes also includes padding bytes, + // which shouldn't be counted here. + emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, + MachineInstr::FrameSetup); + + assert(!(NeedsRealignment && NumBytes==0) && + "NumBytes should never be 0 when realignment is needed"); + + if (NumBytes && NeedsRealignment) { + const unsigned NrBitsToZero = countTrailingZeros(Alignment); + assert(NrBitsToZero > 1); + assert(scratchSPReg != AArch64::SP); + + // SUB X9, SP, NumBytes + // -- X9 is temporary register, so shouldn't contain any live data here, + // -- free to use. This is already produced by emitFrameOffset above. + // AND SP, X9, 0b11111...0000 + // The logical immediates have a non-trivial encoding. The following + // formula computes the encoded immediate with all ones but + // NrBitsToZero zero bits as least significant bits. + uint32_t andMaskEncoded = + (1 <<12) // = N + | ((64-NrBitsToZero) << 6) // immr + | ((64-NrBitsToZero-1) << 0) // imms + ; + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) + .addReg(scratchSPReg, RegState::Kill) + .addImm(andMaskEncoded); } // If we need a base pointer, set it up here. It's whatever the value of the @@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. - // - if (RegInfo->hasBasePointer(MF)) - TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); + if (RegInfo->hasBasePointer(MF)) { + TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, + false); + } if (needsFrameMoves) { const DataLayout *TD = MF.getTarget().getDataLayout(); const int StackGrowth = -TD->getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); - // An example of the prologue: // // .globl __foo @@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; - // Initial and residual are named for consitency with the prologue. Note that + // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { @@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, bool isFixed = MFI->isFixedObjectIndex(FI); // Use frame pointer to reference fixed objects. Use it for locals if - // there are VLAs (and thus the SP isn't reliable as a base). - // Make sure useFPForScavengingIndex() does the right thing for the emergency - // spill slot. + // there are VLAs or a dynamically realigned SP (and thus the SP isn't + // reliable as a base). Make sure useFPForScavengingIndex() does the + // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame()) { // Note: Keeping the following as multiple 'if' statements rather than @@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); - } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) { + } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && + !RegInfo->needsStackRealignment(MF)) { // Use SP or FP, whichever gives us the best chance of the offset // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. @@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, } } + assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && + "In the presence of dynamic stack pointer realignment, " + "non-argument objects cannot be accessed through the frame pointer"); + if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; @@ -695,6 +787,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre) MIB.addReg(AArch64::SP, RegState::Define); + MBB.addLiveIn(Reg1); + MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) @@ -794,6 +888,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( if (RegInfo->hasBasePointer(MF)) MRI->setPhysRegUsed(RegInfo->getBaseRegister()); + if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) + MRI->setPhysRegUsed(AArch64::X9); + // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; unsigned NumFPRSpilled = 0; @@ -867,7 +964,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); + unsigned CFSize = + MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index df3875f..1439bf3 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() : TargetFrameLowering(StackGrowsDown, 16, 0, 16, - false /*StackRealignable*/) {} + true /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 0a47dcb..f75700d 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -848,7 +848,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, // MOV X0, WideImmediate // LDR X2, [BaseReg, X0] if (isa<ConstantSDNode>(RHS)) { - int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue(); + int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); unsigned Scale = Log2_32(Size); // Skip the immediate can be seleced by load/store addressing mode. // Also skip the immediate can be encoded by a single ADD (SUB is also diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 0c0e856..90a5e5e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -281,14 +281,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); - // f16 is storage-only, so we promote operations to f32 if we know this is - // valid, and ignore them otherwise. The operations not mentioned here will - // fail to select, but this is not a major problem as no source language - // should be emitting native f16 operations yet. - setOperationAction(ISD::FADD, MVT::f16, Promote); - setOperationAction(ISD::FDIV, MVT::f16, Promote); - setOperationAction(ISD::FMUL, MVT::f16, Promote); - setOperationAction(ISD::FSUB, MVT::f16, Promote); + // f16 is a storage-only type, always promote it to f32. + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); // v4f16 is also a storage-only type, so promote it to v4f32 when that is // known to be safe. @@ -481,6 +506,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Enable TBZ/TBNZ MaskAndBranchFoldingIsLegal = true; + EnableExtLdPromotion = true; setMinFunctionAlignment(2); @@ -1557,6 +1583,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, if (Op.getOperand(0).getValueType().isVector()) return LowerVectorFP_TO_INT(Op, DAG); + // f16 conversions are promoted to f32. + if (Op.getOperand(0).getValueType() == MVT::f16) { + SDLoc dl(Op); + return DAG.getNode( + Op.getOpcode(), dl, Op.getValueType(), + DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0))); + } + if (Op.getOperand(0).getValueType() != MVT::f128) { // It's legal except when f128 is involved return Op; @@ -1606,6 +1640,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType().isVector()) return LowerVectorINT_TO_FP(Op, DAG); + // f16 conversions are promoted to f32. + if (Op.getValueType() == MVT::f16) { + SDLoc dl(Op); + return DAG.getNode( + ISD::FP_ROUND, dl, MVT::f16, + DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)), + DAG.getIntPtrConstant(0)); + } + // i128 conversions are libcalls. if (Op.getOperand(0).getValueType() == MVT::i128) return SDValue(); @@ -2701,8 +2744,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64); SDValue Cpy = DAG.getMemcpy( Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(), - /*isVol = */ false, - /*AlwaysInline = */ false, DstInfo, MachinePointerInfo()); + /*isVol = */ false, /*AlwaysInline = */ false, + /*isTailCall = */ false, + DstInfo, MachinePointerInfo()); MemOpChains.push_back(Cpy); } else { @@ -3514,49 +3558,10 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp; } -SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, - SelectionDAG &DAG) const { - SDValue CC = Op->getOperand(0); - SDValue TVal = Op->getOperand(1); - SDValue FVal = Op->getOperand(2); - SDLoc DL(Op); - - unsigned Opc = CC.getOpcode(); - // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select - // instruction. - if (CC.getResNo() == 1 && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { - // Only lower legal XALUO ops. - if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0))) - return SDValue(); - - AArch64CC::CondCode OFCC; - SDValue Value, Overflow; - std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG); - SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - - return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, - CCVal, Overflow); - } - - if (CC.getOpcode() == ISD::SETCC) - return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal, - cast<CondCodeSDNode>(CC.getOperand(2))->get()); - else - return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal, - FVal, ISD::SETNE); -} - -SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, +SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, + SDValue RHS, SDValue TVal, + SDValue FVal, SDLoc dl, SelectionDAG &DAG) const { - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue TVal = Op.getOperand(2); - SDValue FVal = Op.getOperand(3); - SDLoc dl(Op); - // Handle f128 first, because it will result in a comparison of some RTLIB // call result against zero. if (LHS.getValueType() == MVT::f128) { @@ -3664,14 +3669,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - EVT VT = Op.getValueType(); + EVT VT = TVal.getValueType(); return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp); } // Now we know we're dealing with FP values. assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); assert(LHS.getValueType() == RHS.getValueType()); - EVT VT = Op.getValueType(); + EVT VT = TVal.getValueType(); // Try to match this select into a max/min operation, which have dedicated // opcode in the instruction set. @@ -3732,6 +3737,58 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, return CS1; } +SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TVal = Op.getOperand(2); + SDValue FVal = Op.getOperand(3); + SDLoc DL(Op); + return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); +} + +SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, + SelectionDAG &DAG) const { + SDValue CCVal = Op->getOperand(0); + SDValue TVal = Op->getOperand(1); + SDValue FVal = Op->getOperand(2); + SDLoc DL(Op); + + unsigned Opc = CCVal.getOpcode(); + // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select + // instruction. + if (CCVal.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0))) + return SDValue(); + + AArch64CC::CondCode OFCC; + SDValue Value, Overflow; + std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG); + SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); + + return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, + CCVal, Overflow); + } + + // Lower it the same way as we would lower a SELECT_CC node. + ISD::CondCode CC; + SDValue LHS, RHS; + if (CCVal.getOpcode() == ISD::SETCC) { + LHS = CCVal.getOperand(0); + RHS = CCVal.getOperand(1); + CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get(); + } else { + LHS = CCVal; + RHS = DAG.getConstant(0, CCVal.getValueType()); + CC = ISD::SETNE; + } + return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); +} + SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // Jump table entries as PC relative offsets. No additional tweaking @@ -3920,7 +3977,7 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32), - 8, false, false, MachinePointerInfo(DestSV), + 8, false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } @@ -4989,7 +5046,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, unsigned Opcode; if (EltTy == MVT::i8) Opcode = AArch64ISD::DUPLANE8; - else if (EltTy == MVT::i16) + else if (EltTy == MVT::i16 || EltTy == MVT::f16) Opcode = AArch64ISD::DUPLANE16; else if (EltTy == MVT::i32 || EltTy == MVT::f32) Opcode = AArch64ISD::DUPLANE32; @@ -6554,6 +6611,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { VT1.getSizeInBits() <= 32); } +bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { + if (isa<FPExtInst>(Ext)) + return false; + + // Vector types are next free. + if (Ext->getType()->isVectorTy()) + return false; + + for (const Use &U : Ext->uses()) { + // The extension is free if we can fold it with a left shift in an + // addressing mode or an arithmetic operation: add, sub, and cmp. + + // Is there a shift? + const Instruction *Instr = cast<Instruction>(U.getUser()); + + // Is this a constant shift? + switch (Instr->getOpcode()) { + case Instruction::Shl: + if (!isa<ConstantInt>(Instr->getOperand(1))) + return false; + break; + case Instruction::GetElementPtr: { + gep_type_iterator GTI = gep_type_begin(Instr); + std::advance(GTI, U.getOperandNo()); + Type *IdxTy = *GTI; + // This extension will end up with a shift because of the scaling factor. + // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0. + // Get the shift amount based on the scaling factor: + // log2(sizeof(IdxTy)) - log2(8). + uint64_t ShiftAmt = + countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3; + // Is the constant foldable in the shift of the addressing mode? + // I.e., shift amount is between 1 and 4 inclusive. + if (ShiftAmt == 0 || ShiftAmt > 4) + return false; + break; + } + case Instruction::Trunc: + // Check if this is a noop. + // trunc(sext ty1 to ty2) to ty1. + if (Instr->getType() == Ext->getOperand(0)->getType()) + continue; + // FALL THROUGH. + default: + return false; + } + + // At this point we can use the bfm family, so this extension is free + // for that use. + } + return true; +} + bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType, unsigned &RequiredAligment) const { if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy()) @@ -6597,7 +6707,17 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast))) return MVT::f128; - return Size >= 8 ? MVT::i64 : MVT::i32; + if (Size >= 8 && + (memOpAlign(SrcAlign, DstAlign, 8) || + (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast))) + return MVT::i64; + + if (Size >= 4 && + (memOpAlign(SrcAlign, DstAlign, 4) || + (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast))) + return MVT::i32; + + return MVT::Other; } // 12-bit optionally shifted immediates are legal for adds. diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 5ff11e8..820613b 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -355,6 +355,8 @@ public: getPreferredVectorAction(EVT VT) const override; private: + bool isExtFreeImpl(const Instruction *Ext) const override; + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. const AArch64Subtarget *Subtarget; @@ -418,6 +420,9 @@ private: SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, + SDValue TVal, SDValue FVal, SDLoc dl, + SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index d295c02..0c0efaf 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1637,10 +1637,16 @@ multiclass AddSub<bit isSub, string mnemonic, SDPatternOperator OpNode = null_frag> { let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in { // Add/Subtract immediate + // Increase the weight of the immediate variant to try to match it before + // the extended register variant. + // We used to match the register variant before the immediate when the + // register argument could be implicitly zero-extended. + let AddedComplexity = 6 in def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32, mnemonic, OpNode> { let Inst{31} = 0; } + let AddedComplexity = 6 in def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64, mnemonic, OpNode> { let Inst{31} = 1; @@ -3282,6 +3288,10 @@ class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0, : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> { bits<5> Rt; bits<5> Rn; + let Inst{20-16} = 0b11111; + let Unpredictable{20-16} = 0b11111; + let Inst{14-10} = 0b11111; + let Unpredictable{14-10} = 0b11111; let Inst{9-5} = Rn; let Inst{4-0} = Rt; @@ -5298,6 +5308,27 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode, let Inst{4-0} = Rd; } +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode, + dag oops, dag iops, string asm, + list<dag> pattern> + : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = R; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm, SDPatternOperator OpNode> { def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm, @@ -5325,6 +5356,16 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm, def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>; } +multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst), + (ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm), + asm, []>; + def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst), + (ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm), + asm, []>; +} + multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { @@ -5885,7 +5926,7 @@ multiclass SIMDIns { let Inst{20-18} = idx; let Inst{17-16} = 0b10; let Inst{14-12} = idx2; - let Inst{11} = 0; + let Inst{11} = {?}; } def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> { bits<2> idx; @@ -5893,7 +5934,7 @@ multiclass SIMDIns { let Inst{20-19} = idx; let Inst{18-16} = 0b100; let Inst{14-13} = idx2; - let Inst{12-11} = 0; + let Inst{12-11} = {?,?}; } def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> { bits<1> idx; @@ -5901,7 +5942,7 @@ multiclass SIMDIns { let Inst{20} = idx; let Inst{19-16} = 0b1000; let Inst{14} = idx2; - let Inst{13-11} = 0; + let Inst{13-11} = {?,?,?}; } // For all forms of the INS instruction, the "mov" mnemonic is the @@ -8517,6 +8558,174 @@ multiclass SIMDLdSt4SingleAliases<string asm> { } // end of 'let Predicates = [HasNEON]' //---------------------------------------------------------------------------- +// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract +//---------------------------------------------------------------------------- + +let Predicates = [HasNEON, HasV8_1a] in { + +class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode, + RegisterOperand regtype, string asm, + string kind, list<dag> pattern> + : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind, + pattern> { + let Inst{21}=0; +} +multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm, + SDPatternOperator Accum> { + def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn), + (v4i16 V64:$Rm)))))]>; + def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn), + (v8i16 V128:$Rm)))))]>; + def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn), + (v2i32 V64:$Rm)))))]>; + def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn), + (v4i32 V128:$Rm)))))]>; +} + +multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm, + SDPatternOperator Accum> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V64, V64, V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh + (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh + (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V64, V64, V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we + // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..))) + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (insert_subvector + (undef), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i32 0))), + (i64 0))))), + (EXTRACT_SUBREG + (v2i32 (!cast<Instruction>(NAME # v2i32_indexed) + (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V64:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i64 0))))), + (EXTRACT_SUBREG + (v4i32 (!cast<Instruction>(NAME # v4i32_indexed) + (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V128:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, + FPR16Op, FPR16Op, V128_lo, + VectorIndexH, asm, ".h", "", "", ".h", + []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i32 FPR32Op:$dst), + (Accum (i32 FPR32Op:$Rd), + (i32 (int_aarch64_neon_sqrdmulh + (i32 FPR32Op:$Rn), + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} +} // let Predicates = [HasNeon, HasV8_1a] + +//---------------------------------------------------------------------------- // Crypto extensions //---------------------------------------------------------------------------- diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 8e0af2d..db231c4 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1526,7 +1526,7 @@ void AArch64InstrInfo::copyPhysRegTuple( } for (; SubReg != End; SubReg += Incr) { - const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); + const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); @@ -1904,7 +1904,7 @@ void AArch64InstrInfo::storeRegToStackSlot( } assert(Opc && "Unknown register class"); - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) + const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI); @@ -2002,7 +2002,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( } assert(Opc && "Unknown register class"); - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) + const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) .addReg(DestReg, getDefRegState(true)) .addFrameIndex(FI); if (Offset) diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index ec6fa5c..f7db50a 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -14,6 +14,8 @@ //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // +def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, + AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, @@ -22,8 +24,6 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; -def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, - AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsCyclone : Predicate<"Subtarget->isCyclone()">; @@ -2314,6 +2314,20 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; +let Predicates = [HasV8_1a] in { + // v8.1a "Limited Order Region" extension load-acquire instructions + def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; + def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; + def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; + def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; + + // v8.1a "Limited Order Region" extension store-release instructions + def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; + def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; + def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; + def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; +} + //===----------------------------------------------------------------------===// // Scaled floating point to integer conversion instructions. //===----------------------------------------------------------------------===// @@ -2778,6 +2792,10 @@ defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; +defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", + int_aarch64_neon_sqsub>; defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", @@ -2994,6 +3012,20 @@ defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl> defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; +let Predicates = [HasV8_1a] in { + defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; + defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; + def : Pat<(i32 (int_aarch64_neon_sqadd + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; + def : Pat<(i32 (int_aarch64_neon_sqsub + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; +} def : InstAlias<"cmls $dst, $src1, $src2", (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; @@ -3478,13 +3510,13 @@ def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), // AdvSIMD INS/DUP instructions //---------------------------------------------------------------------------- -def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>; -def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>; -def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>; -def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>; -def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>; -def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>; -def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>; +def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; +def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; +def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; +def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; +def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; +def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; +def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; def DUPv2i64lane : SIMDDup64FromElement; def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; @@ -4324,6 +4356,10 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", int_aarch64_neon_sqadd>; defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", int_aarch64_neon_sqsub>; +defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", + int_aarch64_neon_sqsub>; defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 33c11fe..1836682 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { // large enough that referencing from the FP won't result in things being // in range relatively often, we can use a base pointer to allow access // from the other direction like the SP normally works. + // Furthermore, if both variable sized objects are present, and the + // stack needs to be dynamically re-aligned, the base pointer is the only + // reliable way to reference the locals. if (MFI->hasVarSizedObjects()) { + if (needsStackRealignment(MF)) + return true; // Conservatively estimate whether the negative offset from the frame // pointer will be sufficient to reach. If a function has a smallish // frame, it's less likely to have lots of spills and callee saved @@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; } +bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const { + + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) + return false; + + return true; +} + +// FIXME: share this with other backends with identical implementation? +bool +AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl(*MF.getFunction()) + ->getFrameLowering() + ->getStackAlignment(); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); +} + unsigned AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h index c01bfa5..8c379d9 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -93,6 +93,9 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; + // Base pointer (stack realignment) support. + bool canRealignStack(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td index 3ec4157..ca4457a 100644 --- a/lib/Target/AArch64/AArch64SchedA57.td +++ b/lib/Target/AArch64/AArch64SchedA57.td @@ -60,7 +60,12 @@ include "AArch64SchedA57WriteRes.td" // Cortex-A57. The Cortex-A57 types are directly associated with resources, so // defining the aliases precludes the need for mapping them using WriteRes. The // aliases are sufficient for creating a coarse, working model. As the model -// evolves, InstRWs will be used to override these SchedAliases. +// evolves, InstRWs will be used to override some of these SchedAliases. +// +// WARNING: Using SchedAliases is convenient and works well for latency and +// resource lookup for instructions. However, this creates an entry in +// AArch64WriteLatencyTable with a WriteResourceID of 0, breaking +// any SchedReadAdvance since the lookup will fail. def : SchedAlias<WriteImm, A57Write_1cyc_1I>; def : SchedAlias<WriteI, A57Write_1cyc_1I>; @@ -70,8 +75,8 @@ def : SchedAlias<WriteExtr, A57Write_1cyc_1I>; def : SchedAlias<WriteIS, A57Write_1cyc_1I>; def : SchedAlias<WriteID32, A57Write_19cyc_1M>; def : SchedAlias<WriteID64, A57Write_35cyc_1M>; -def : SchedAlias<WriteIM32, A57Write_3cyc_1M>; -def : SchedAlias<WriteIM64, A57Write_5cyc_1M>; +def : WriteRes<WriteIM32, [A57UnitM]> { let Latency = 3; } +def : WriteRes<WriteIM64, [A57UnitM]> { let Latency = 5; } def : SchedAlias<WriteBr, A57Write_1cyc_1B>; def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>; def : SchedAlias<WriteLD, A57Write_4cyc_1L>; @@ -127,6 +132,15 @@ def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>; def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>; +// Shifted Register with Shift == 0 +// ---------------------------------------------------------------------------- + +def A57WriteISReg : SchedWriteVariant<[ + SchedVar<RegShiftedPred, [WriteISReg]>, + SchedVar<NoSchedPred, [WriteI]>]>; +def : InstRW<[A57WriteISReg], (instregex ".*rs$")>; + + // Divide and Multiply Instructions // ----------------------------------------------------------------------------- diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 221d70d..0b97af8 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -47,8 +47,9 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT, const std::string &FS, const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), + HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), - HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), + HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {} diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index bcab97d..5454b20 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -37,11 +37,12 @@ protected: /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. ARMProcFamilyEnum ARMProcFamily; + bool HasV8_1aOps; + bool HasFPARMv8; bool HasNEON; bool HasCrypto; bool HasCRC; - bool HasV8_1a; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove; @@ -93,6 +94,8 @@ public: return isCortexA53() || isCortexA57(); } + bool hasV8_1aOps() const { return HasV8_1aOps; } + bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } @@ -101,7 +104,6 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } - bool hasV8_1a() const { return HasV8_1a; } bool isLittleEndian() const { return IsLittle; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index f902f64..ab28a16 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -87,6 +87,11 @@ EnableGEPOpt("aarch64-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), cl::init(true)); +// FIXME: Unify control over GlobalMerge. +static cl::opt<cl::boolOrDefault> +EnableGlobalMerge("aarch64-global-merge", cl::Hidden, + cl::desc("Enable the global merge pass")); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget); @@ -245,7 +250,9 @@ bool AArch64PassConfig::addPreISel() { // FIXME: On AArch64, this depends on the type. // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). // and the offset has to be a multiple of the related size in bytes. - if (TM->getOptLevel() == CodeGenOpt::Aggressive) + if ((TM->getOptLevel() == CodeGenOpt::Aggressive && + EnableGlobalMerge == cl::BOU_UNSET) || + EnableGlobalMerge == cl::BOU_TRUE) addPass(createGlobalMergePass(TM, 4095)); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 1219ffc..063c714 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1972,7 +1972,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { bool Valid; auto Mapper = AArch64PRFM::PRFMMapper(); - StringRef Name = Mapper.toString(MCE->getValue(), Valid); + StringRef Name = + Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid); Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name, S, getContext())); return MatchOperand_Success; @@ -1985,7 +1986,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { bool Valid; auto Mapper = AArch64PRFM::PRFMMapper(); - unsigned prfop = Mapper.fromString(Tok.getString(), Valid); + unsigned prfop = + Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid); if (!Valid) { TokError("pre-fetch hint expected"); return MatchOperand_ParseFail; @@ -2090,15 +2092,16 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); if (Tok.is(AsmToken::Real)) { APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + if (isNegative) + RealVal.changeSign(); + uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); Parser.Lex(); // Eat the token. // Check for out of range values. As an exception, we let Zero through, // as we handle that special case in post-processing before matching in // order to use the zero register for it. - if (Val == -1 && !RealVal.isZero()) { + if (Val == -1 && !RealVal.isPosZero()) { TokError("expected compatible register or floating-point constant"); return MatchOperand_ParseFail; } @@ -2597,7 +2600,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { } bool Valid; auto Mapper = AArch64DB::DBarrierMapper(); - StringRef Name = Mapper.toString(MCE->getValue(), Valid); + StringRef Name = + Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid); Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name, ExprLoc, getContext())); return MatchOperand_Success; @@ -2610,7 +2614,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { bool Valid; auto Mapper = AArch64DB::DBarrierMapper(); - unsigned Opt = Mapper.fromString(Tok.getString(), Valid); + unsigned Opt = + Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid); if (!Valid) { TokError("invalid barrier option name"); return MatchOperand_ParseFail; @@ -2638,18 +2643,21 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) { return MatchOperand_NoMatch; bool IsKnown; - auto MRSMapper = AArch64SysReg::MRSMapper(STI.getFeatureBits()); - uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), IsKnown); + auto MRSMapper = AArch64SysReg::MRSMapper(); + uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(), + IsKnown); assert(IsKnown == (MRSReg != -1U) && "register should be -1 if and only if it's unknown"); - auto MSRMapper = AArch64SysReg::MSRMapper(STI.getFeatureBits()); - uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), IsKnown); + auto MSRMapper = AArch64SysReg::MSRMapper(); + uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(), + IsKnown); assert(IsKnown == (MSRReg != -1U) && "register should be -1 if and only if it's unknown"); auto PStateMapper = AArch64PState::PStateMapper(); - uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown); + uint32_t PStateField = + PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown); assert(IsKnown == (PStateField != -1U) && "register should be -1 if and only if it's unknown"); diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index fb25089..1c8c0a66 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -1102,6 +1102,12 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, case AArch64::STLRW: case AArch64::STLRB: case AArch64::STLRH: + case AArch64::STLLRW: + case AArch64::STLLRB: + case AArch64::STLLRH: + case AArch64::LDLARW: + case AArch64::LDLARB: + case AArch64::LDLARH: DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); break; case AArch64::STLXRX: @@ -1112,6 +1118,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, case AArch64::LDAXRX: case AArch64::LDXRX: case AArch64::STLRX: + case AArch64::LDLARX: + case AArch64::STLLRX: DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); break; case AArch64::STLXPW: @@ -1504,7 +1512,10 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst, Inst.addOperand(MCOperand::CreateImm(crm)); bool ValidNamed; - (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed); + const AArch64Disassembler *Dis = + static_cast<const AArch64Disassembler *>(Decoder); + (void)AArch64PState::PStateMapper().toString(pstate_field, + Dis->getSubtargetInfo().getFeatureBits(), ValidNamed); return ValidNamed ? Success : Fail; } diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 46a1d79..febd332 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -34,18 +34,13 @@ using namespace llvm; AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : AArch64InstPrinter(MAI, MII, MRI, STI) {} + const MCRegisterInfo &MRI) + : AArch64InstPrinter(MAI, MII, MRI) {} void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { // This is for .cfi directives. @@ -53,7 +48,8 @@ void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { // Check for special encodings and print the canonical alias instead. unsigned Opcode = MI->getOpcode(); @@ -210,8 +206,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - if (!printAliasInstr(MI, O)) - printInstruction(MI, O); + if (!printAliasInstr(MI, STI, O)) + printInstruction(MI, STI, O); printAnnotation(O, Annot); } @@ -614,7 +610,8 @@ static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) { } void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { unsigned Opcode = MI->getOpcode(); StringRef Layout, Mnemonic; @@ -624,7 +621,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", "; unsigned ListOpNum = IsTbx ? 2 : 1; - printVectorList(MI, ListOpNum, O, ""); + printVectorList(MI, ListOpNum, STI, O, ""); O << ", " << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg); @@ -638,7 +635,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // Now onto the operands: first a vector list with possible lane // specifier. E.g. { v0 }[2] int OpNum = LdStDesc->ListOperand; - printVectorList(MI, OpNum++, O, ""); + printVectorList(MI, OpNum++, STI, O, ""); if (LdStDesc->HasLane) O << '[' << MI->getOperand(OpNum++).getImm() << ']'; @@ -662,7 +659,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - AArch64InstPrinter::printInst(MI, O, Annot); + AArch64InstPrinter::printInst(MI, O, Annot, STI); } bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { @@ -889,6 +886,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { } void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { @@ -903,6 +901,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); O << format("#%#llx", Op.getImm()); @@ -922,6 +921,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); assert(Op.isReg() && "Non-register vreg operand!"); @@ -930,6 +930,7 @@ void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); assert(Op.isImm() && "System instruction C[nm] operands must be immediates!"); @@ -937,6 +938,7 @@ void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); if (MO.isImm()) { @@ -946,18 +948,19 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm()); O << '#' << Val; if (Shift != 0) - printShifter(MI, OpNum + 1, O); + printShifter(MI, OpNum + 1, STI, O); if (CommentStream) *CommentStream << '=' << (Val << Shift) << '\n'; } else { assert(MO.isExpr() && "Unexpected operand type!"); O << *MO.getExpr(); - printShifter(MI, OpNum + 1, O); + printShifter(MI, OpNum + 1, STI, O); } } void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { uint64_t Val = MI->getOperand(OpNum).getImm(); O << "#0x"; @@ -965,6 +968,7 @@ void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { uint64_t Val = MI->getOperand(OpNum).getImm(); O << "#0x"; @@ -972,6 +976,7 @@ void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNum).getImm(); // LSL #0 should not be printed. @@ -983,18 +988,21 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << getRegisterName(MI->getOperand(OpNum).getReg()); - printShifter(MI, OpNum + 1, O); + printShifter(MI, OpNum + 1, STI, O); } void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << getRegisterName(MI->getOperand(OpNum).getReg()); - printArithExtend(MI, OpNum + 1, O); + printArithExtend(MI, OpNum + 1, STI, O); } void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNum).getImm(); AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val); @@ -1038,24 +1046,28 @@ void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm(); O << AArch64CC::getCondCodeName(CC); } void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm(); O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC)); } void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']'; } template<int Scale> void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << '#' << Scale * MI->getOperand(OpNum).getImm(); } @@ -1085,10 +1097,12 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned prfop = MI->getOperand(OpNum).getImm(); bool Valid; - StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid); + StringRef Name = + AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid); if (Valid) O << Name; else @@ -1096,6 +1110,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); float FPImm = @@ -1151,6 +1166,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { } void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O, StringRef LayoutSuffix) { unsigned Reg = MI->getOperand(OpNum).getReg(); @@ -1193,14 +1209,17 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, O << " }"; } -void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - printVectorList(MI, OpNum, O, ""); +void +AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, + unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + printVectorList(MI, OpNum, STI, O, ""); } template <unsigned NumLanes, char LaneKind> void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { std::string Suffix("."); if (NumLanes) @@ -1208,15 +1227,17 @@ void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, else Suffix += LaneKind; - printVectorList(MI, OpNum, O, Suffix); + printVectorList(MI, OpNum, STI, O, Suffix); } void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "[" << MI->getOperand(OpNum).getImm() << "]"; } void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); @@ -1241,6 +1262,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); @@ -1256,6 +1278,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, } void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); unsigned Opcode = MI->getOpcode(); @@ -1263,9 +1286,11 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, bool Valid; StringRef Name; if (Opcode == AArch64::ISB) - Name = AArch64ISB::ISBMapper().toString(Val, Valid); + Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(), + Valid); else - Name = AArch64DB::DBarrierMapper().toString(Val, Valid); + Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(), + Valid); if (Valid) O << Name; else @@ -1273,31 +1298,35 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); - auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures()); - std::string Name = Mapper.toString(Val); + auto Mapper = AArch64SysReg::MRSMapper(); + std::string Name = Mapper.toString(Val, STI.getFeatureBits()); O << StringRef(Name).upper(); } void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); - auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures()); - std::string Name = Mapper.toString(Val); + auto Mapper = AArch64SysReg::MSRMapper(); + std::string Name = Mapper.toString(Val, STI.getFeatureBits()); O << StringRef(Name).upper(); } void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); bool Valid; - StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid); + StringRef Name = + AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid); if (Valid) O << StringRef(Name.str()).upper(); else @@ -1305,6 +1334,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, } void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned RawVal = MI->getOperand(OpNo).getImm(); uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal); diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 5f51621..c2077a0 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -26,16 +26,21 @@ class MCOperand; class AArch64InstPrinter : public MCInstPrinter { public: AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; void printRegName(raw_ostream &OS, unsigned RegNo) const override; // Autogenerated by tblgen. - virtual void printInstruction(const MCInst *MI, raw_ostream &O); - virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O); + virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, - unsigned PrintMethodIdx, raw_ostream &O); + unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, + raw_ostream &O); virtual StringRef getRegName(unsigned RegNo) const { return getRegisterName(RegNo); } @@ -45,90 +50,126 @@ public: protected: bool printSysAlias(const MCInst *MI, raw_ostream &O); // Operand printers - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printHexImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printHexImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm, raw_ostream &O); - template<int Amount> - void printPostIncOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + template <int Amount> + void printPostIncOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { printPostIncOperand(MI, OpNo, Amount, O); } - void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVRegOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSysCROperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddSubImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printLogicalImm32(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printLogicalImm64(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printShifter(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printShiftedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printExtendedRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printArithExtend(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O, char SrcRegKind, unsigned Width); template <char SrcRegKind, unsigned Width> - void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + void printMemExtend(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { printMemExtend(MI, OpNum, O, SrcRegKind, Width); } - void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printInverseCondCode(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAlignedLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale, raw_ostream &O); void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale, raw_ostream &O); - template<int Scale> - void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + template <int Scale> + void printUImm12Offset(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { printUImm12Offset(MI, OpNum, Scale, O); } - template<int BitWidth> - void printAMIndexedWB(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + template <int BitWidth> + void printAMIndexedWB(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { printAMIndexedWB(MI, OpNum, BitWidth / 8, O); } - void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAMNoIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - template<int Scale> - void printImmScale(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template <int Scale> + void printImmScale(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPrefetchOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O, + void printVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O, StringRef LayoutSuffix); /// Print a list of vector registers where the type suffix is implicit /// (i.e. attached to the instruction rather than the registers). void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); template <unsigned NumLanes, char LaneKind> - void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printTypedVectorList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + + void printVectorIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAdrpLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printBarrierOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSystemPStateField(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); }; class AArch64AppleInstPrinter : public AArch64InstPrinter { public: AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; - void printInstruction(const MCInst *MI, raw_ostream &O) override; - bool printAliasInstr(const MCInst *MI, raw_ostream &O) override; + void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O) override; + bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O) override; void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, raw_ostream &O) override; StringRef getRegName(unsigned RegNo) const override { return getRegisterName(RegNo); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 84b63a0..e5eb90c 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -313,7 +313,7 @@ public: DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI) : AArch64AsmBackend(T), MRI(MRI) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64, MachO::CPU_SUBTYPE_ARM64_ALL); } @@ -461,7 +461,7 @@ public: ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian) : AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 5ea49c3..1f516d1 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -26,7 +26,7 @@ class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { public: AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian); - virtual ~AArch64ELFObjectWriter(); + ~AArch64ELFObjectWriter() override; protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, @@ -248,9 +248,9 @@ unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, llvm_unreachable("Unimplemented fixup -> relocation"); } -MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, - bool IsLittleEndian) { +MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, + bool IsLittleEndian) { MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI, IsLittleEndian); return createELFObjectWriter(MOTW, OS, IsLittleEndian); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 8f780d2..540d1fc 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -89,12 +89,12 @@ class AArch64ELFStreamer : public MCELFStreamer { public: friend class AArch64TargetELFStreamer; - AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter) + AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter) : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0), LastEMS(EMS_None) {} - ~AArch64ELFStreamer() {} + ~AArch64ELFStreamer() override {} void ChangeSection(const MCSection *Section, const MCExpr *Subsection) override { @@ -211,8 +211,8 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, } MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll) { + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); if (RelaxAll) S->getAssembler().setRelaxAll(true); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h index 71b05cc..ef48203 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -19,8 +19,8 @@ namespace llvm { MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll); + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll); } #endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 9ea49f0..fd4dc47 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -40,7 +40,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter { public: AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {} - ~AArch64MCCodeEmitter() {} + ~AArch64MCCodeEmitter() override {} // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 38b399d..afad674 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -109,29 +109,28 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, +static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) - return new AArch64InstPrinter(MAI, MII, MRI, STI); + return new AArch64InstPrinter(MAI, MII, MRI); if (SyntaxVariant == 1) - return new AArch64AppleInstPrinter(MAI, MII, MRI, STI); + return new AArch64AppleInstPrinter(MAI, MII, MRI); return nullptr; } static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, - MCAsmBackend &TAB, raw_ostream &OS, + MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll); } static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, bool DWARFMustBeAtTheEnd) { return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, DWARFMustBeAtTheEnd, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 7ce303b..4705bdf 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -33,6 +33,7 @@ class StringRef; class Target; class Triple; class raw_ostream; +class raw_pwrite_stream; extern Target TheAArch64leTarget; extern Target TheAArch64beTarget; @@ -48,11 +49,13 @@ MCAsmBackend *createAArch64beAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI, +MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, bool IsLittleEndian); -MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, - uint32_t CPUSubtype); +MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS, + uint32_t CPUType, + uint32_t CPUSubtype); MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 0d9385d..61649c4 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -413,9 +413,9 @@ void AArch64MachObjectWriter::RecordRelocation( Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS, - uint32_t CPUType, - uint32_t CPUSubtype) { +MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS, + uint32_t CPUType, + uint32_t CPUSubtype) { return createMachObjectWriter( new AArch64MachObjectWriter(CPUType, CPUSubtype), OS, /*IsLittleEndian=*/true); diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 160c1c5..8696163 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -18,9 +18,10 @@ using namespace llvm; -StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { +StringRef AArch64NamedImmMapper::toString(uint32_t Value, uint64_t FeatureBits, + bool &Valid) const { for (unsigned i = 0; i < NumMappings; ++i) { - if (Mappings[i].Value == Value) { + if (Mappings[i].isValueEqual(Value, FeatureBits)) { Valid = true; return Mappings[i].Name; } @@ -30,10 +31,11 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { return StringRef(); } -uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const { +uint32_t AArch64NamedImmMapper::fromString(StringRef Name, uint64_t FeatureBits, + bool &Valid) const { std::string LowerCaseName = Name.lower(); for (unsigned i = 0; i < NumMappings; ++i) { - if (Mappings[i].Name == LowerCaseName) { + if (Mappings[i].isNameEqual(LowerCaseName, FeatureBits)) { Valid = true; return Mappings[i].Value; } @@ -48,744 +50,776 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const { } const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = { - {"s1e1r", S1E1R}, - {"s1e2r", S1E2R}, - {"s1e3r", S1E3R}, - {"s1e1w", S1E1W}, - {"s1e2w", S1E2W}, - {"s1e3w", S1E3W}, - {"s1e0r", S1E0R}, - {"s1e0w", S1E0W}, - {"s12e1r", S12E1R}, - {"s12e1w", S12E1W}, - {"s12e0r", S12E0R}, - {"s12e0w", S12E0W}, + {"s1e1r", S1E1R, 0}, + {"s1e2r", S1E2R, 0}, + {"s1e3r", S1E3R, 0}, + {"s1e1w", S1E1W, 0}, + {"s1e2w", S1E2W, 0}, + {"s1e3w", S1E3W, 0}, + {"s1e0r", S1E0R, 0}, + {"s1e0w", S1E0W, 0}, + {"s12e1r", S12E1R, 0}, + {"s12e1w", S12E1W, 0}, + {"s12e0r", S12E0R, 0}, + {"s12e0w", S12E0W, 0}, }; AArch64AT::ATMapper::ATMapper() : AArch64NamedImmMapper(ATMappings, 0) {} const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = { - {"oshld", OSHLD}, - {"oshst", OSHST}, - {"osh", OSH}, - {"nshld", NSHLD}, - {"nshst", NSHST}, - {"nsh", NSH}, - {"ishld", ISHLD}, - {"ishst", ISHST}, - {"ish", ISH}, - {"ld", LD}, - {"st", ST}, - {"sy", SY} + {"oshld", OSHLD, 0}, + {"oshst", OSHST, 0}, + {"osh", OSH, 0}, + {"nshld", NSHLD, 0}, + {"nshst", NSHST, 0}, + {"nsh", NSH, 0}, + {"ishld", ISHLD, 0}, + {"ishst", ISHST, 0}, + {"ish", ISH, 0}, + {"ld", LD, 0}, + {"st", ST, 0}, + {"sy", SY, 0} }; AArch64DB::DBarrierMapper::DBarrierMapper() : AArch64NamedImmMapper(DBarrierMappings, 16u) {} const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = { - {"zva", ZVA}, - {"ivac", IVAC}, - {"isw", ISW}, - {"cvac", CVAC}, - {"csw", CSW}, - {"cvau", CVAU}, - {"civac", CIVAC}, - {"cisw", CISW} + {"zva", ZVA, 0}, + {"ivac", IVAC, 0}, + {"isw", ISW, 0}, + {"cvac", CVAC, 0}, + {"csw", CSW, 0}, + {"cvau", CVAU, 0}, + {"civac", CIVAC, 0}, + {"cisw", CISW, 0} }; AArch64DC::DCMapper::DCMapper() : AArch64NamedImmMapper(DCMappings, 0) {} const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = { - {"ialluis", IALLUIS}, - {"iallu", IALLU}, - {"ivau", IVAU} + {"ialluis", IALLUIS, 0}, + {"iallu", IALLU, 0}, + {"ivau", IVAU, 0} }; AArch64IC::ICMapper::ICMapper() : AArch64NamedImmMapper(ICMappings, 0) {} const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = { - {"sy", SY}, + {"sy", SY, 0}, }; AArch64ISB::ISBMapper::ISBMapper() : AArch64NamedImmMapper(ISBMappings, 16) {} const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = { - {"pldl1keep", PLDL1KEEP}, - {"pldl1strm", PLDL1STRM}, - {"pldl2keep", PLDL2KEEP}, - {"pldl2strm", PLDL2STRM}, - {"pldl3keep", PLDL3KEEP}, - {"pldl3strm", PLDL3STRM}, - {"plil1keep", PLIL1KEEP}, - {"plil1strm", PLIL1STRM}, - {"plil2keep", PLIL2KEEP}, - {"plil2strm", PLIL2STRM}, - {"plil3keep", PLIL3KEEP}, - {"plil3strm", PLIL3STRM}, - {"pstl1keep", PSTL1KEEP}, - {"pstl1strm", PSTL1STRM}, - {"pstl2keep", PSTL2KEEP}, - {"pstl2strm", PSTL2STRM}, - {"pstl3keep", PSTL3KEEP}, - {"pstl3strm", PSTL3STRM} + {"pldl1keep", PLDL1KEEP, 0}, + {"pldl1strm", PLDL1STRM, 0}, + {"pldl2keep", PLDL2KEEP, 0}, + {"pldl2strm", PLDL2STRM, 0}, + {"pldl3keep", PLDL3KEEP, 0}, + {"pldl3strm", PLDL3STRM, 0}, + {"plil1keep", PLIL1KEEP, 0}, + {"plil1strm", PLIL1STRM, 0}, + {"plil2keep", PLIL2KEEP, 0}, + {"plil2strm", PLIL2STRM, 0}, + {"plil3keep", PLIL3KEEP, 0}, + {"plil3strm", PLIL3STRM, 0}, + {"pstl1keep", PSTL1KEEP, 0}, + {"pstl1strm", PSTL1STRM, 0}, + {"pstl2keep", PSTL2KEEP, 0}, + {"pstl2strm", PSTL2STRM, 0}, + {"pstl3keep", PSTL3KEEP, 0}, + {"pstl3strm", PSTL3STRM, 0} }; AArch64PRFM::PRFMMapper::PRFMMapper() : AArch64NamedImmMapper(PRFMMappings, 32) {} const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = { - {"spsel", SPSel}, - {"daifset", DAIFSet}, - {"daifclr", DAIFClr} + {"spsel", SPSel, 0}, + {"daifset", DAIFSet, 0}, + {"daifclr", DAIFClr, 0}, + + // v8.1a "Privileged Access Never" extension-specific PStates + {"pan", PAN, AArch64::HasV8_1aOps}, }; AArch64PState::PStateMapper::PStateMapper() : AArch64NamedImmMapper(PStateMappings, 0) {} const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = { - {"mdccsr_el0", MDCCSR_EL0}, - {"dbgdtrrx_el0", DBGDTRRX_EL0}, - {"mdrar_el1", MDRAR_EL1}, - {"oslsr_el1", OSLSR_EL1}, - {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1}, - {"pmceid0_el0", PMCEID0_EL0}, - {"pmceid1_el0", PMCEID1_EL0}, - {"midr_el1", MIDR_EL1}, - {"ccsidr_el1", CCSIDR_EL1}, - {"clidr_el1", CLIDR_EL1}, - {"ctr_el0", CTR_EL0}, - {"mpidr_el1", MPIDR_EL1}, - {"revidr_el1", REVIDR_EL1}, - {"aidr_el1", AIDR_EL1}, - {"dczid_el0", DCZID_EL0}, - {"id_pfr0_el1", ID_PFR0_EL1}, - {"id_pfr1_el1", ID_PFR1_EL1}, - {"id_dfr0_el1", ID_DFR0_EL1}, - {"id_afr0_el1", ID_AFR0_EL1}, - {"id_mmfr0_el1", ID_MMFR0_EL1}, - {"id_mmfr1_el1", ID_MMFR1_EL1}, - {"id_mmfr2_el1", ID_MMFR2_EL1}, - {"id_mmfr3_el1", ID_MMFR3_EL1}, - {"id_isar0_el1", ID_ISAR0_EL1}, - {"id_isar1_el1", ID_ISAR1_EL1}, - {"id_isar2_el1", ID_ISAR2_EL1}, - {"id_isar3_el1", ID_ISAR3_EL1}, - {"id_isar4_el1", ID_ISAR4_EL1}, - {"id_isar5_el1", ID_ISAR5_EL1}, - {"id_aa64pfr0_el1", ID_A64PFR0_EL1}, - {"id_aa64pfr1_el1", ID_A64PFR1_EL1}, - {"id_aa64dfr0_el1", ID_A64DFR0_EL1}, - {"id_aa64dfr1_el1", ID_A64DFR1_EL1}, - {"id_aa64afr0_el1", ID_A64AFR0_EL1}, - {"id_aa64afr1_el1", ID_A64AFR1_EL1}, - {"id_aa64isar0_el1", ID_A64ISAR0_EL1}, - {"id_aa64isar1_el1", ID_A64ISAR1_EL1}, - {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1}, - {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1}, - {"mvfr0_el1", MVFR0_EL1}, - {"mvfr1_el1", MVFR1_EL1}, - {"mvfr2_el1", MVFR2_EL1}, - {"rvbar_el1", RVBAR_EL1}, - {"rvbar_el2", RVBAR_EL2}, - {"rvbar_el3", RVBAR_EL3}, - {"isr_el1", ISR_EL1}, - {"cntpct_el0", CNTPCT_EL0}, - {"cntvct_el0", CNTVCT_EL0}, + {"mdccsr_el0", MDCCSR_EL0, 0}, + {"dbgdtrrx_el0", DBGDTRRX_EL0, 0}, + {"mdrar_el1", MDRAR_EL1, 0}, + {"oslsr_el1", OSLSR_EL1, 0}, + {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1, 0}, + {"pmceid0_el0", PMCEID0_EL0, 0}, + {"pmceid1_el0", PMCEID1_EL0, 0}, + {"midr_el1", MIDR_EL1, 0}, + {"ccsidr_el1", CCSIDR_EL1, 0}, + {"clidr_el1", CLIDR_EL1, 0}, + {"ctr_el0", CTR_EL0, 0}, + {"mpidr_el1", MPIDR_EL1, 0}, + {"revidr_el1", REVIDR_EL1, 0}, + {"aidr_el1", AIDR_EL1, 0}, + {"dczid_el0", DCZID_EL0, 0}, + {"id_pfr0_el1", ID_PFR0_EL1, 0}, + {"id_pfr1_el1", ID_PFR1_EL1, 0}, + {"id_dfr0_el1", ID_DFR0_EL1, 0}, + {"id_afr0_el1", ID_AFR0_EL1, 0}, + {"id_mmfr0_el1", ID_MMFR0_EL1, 0}, + {"id_mmfr1_el1", ID_MMFR1_EL1, 0}, + {"id_mmfr2_el1", ID_MMFR2_EL1, 0}, + {"id_mmfr3_el1", ID_MMFR3_EL1, 0}, + {"id_isar0_el1", ID_ISAR0_EL1, 0}, + {"id_isar1_el1", ID_ISAR1_EL1, 0}, + {"id_isar2_el1", ID_ISAR2_EL1, 0}, + {"id_isar3_el1", ID_ISAR3_EL1, 0}, + {"id_isar4_el1", ID_ISAR4_EL1, 0}, + {"id_isar5_el1", ID_ISAR5_EL1, 0}, + {"id_aa64pfr0_el1", ID_A64PFR0_EL1, 0}, + {"id_aa64pfr1_el1", ID_A64PFR1_EL1, 0}, + {"id_aa64dfr0_el1", ID_A64DFR0_EL1, 0}, + {"id_aa64dfr1_el1", ID_A64DFR1_EL1, 0}, + {"id_aa64afr0_el1", ID_A64AFR0_EL1, 0}, + {"id_aa64afr1_el1", ID_A64AFR1_EL1, 0}, + {"id_aa64isar0_el1", ID_A64ISAR0_EL1, 0}, + {"id_aa64isar1_el1", ID_A64ISAR1_EL1, 0}, + {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, 0}, + {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, 0}, + {"mvfr0_el1", MVFR0_EL1, 0}, + {"mvfr1_el1", MVFR1_EL1, 0}, + {"mvfr2_el1", MVFR2_EL1, 0}, + {"rvbar_el1", RVBAR_EL1, 0}, + {"rvbar_el2", RVBAR_EL2, 0}, + {"rvbar_el3", RVBAR_EL3, 0}, + {"isr_el1", ISR_EL1, 0}, + {"cntpct_el0", CNTPCT_EL0, 0}, + {"cntvct_el0", CNTVCT_EL0, 0}, // Trace registers - {"trcstatr", TRCSTATR}, - {"trcidr8", TRCIDR8}, - {"trcidr9", TRCIDR9}, - {"trcidr10", TRCIDR10}, - {"trcidr11", TRCIDR11}, - {"trcidr12", TRCIDR12}, - {"trcidr13", TRCIDR13}, - {"trcidr0", TRCIDR0}, - {"trcidr1", TRCIDR1}, - {"trcidr2", TRCIDR2}, - {"trcidr3", TRCIDR3}, - {"trcidr4", TRCIDR4}, - {"trcidr5", TRCIDR5}, - {"trcidr6", TRCIDR6}, - {"trcidr7", TRCIDR7}, - {"trcoslsr", TRCOSLSR}, - {"trcpdsr", TRCPDSR}, - {"trcdevaff0", TRCDEVAFF0}, - {"trcdevaff1", TRCDEVAFF1}, - {"trclsr", TRCLSR}, - {"trcauthstatus", TRCAUTHSTATUS}, - {"trcdevarch", TRCDEVARCH}, - {"trcdevid", TRCDEVID}, - {"trcdevtype", TRCDEVTYPE}, - {"trcpidr4", TRCPIDR4}, - {"trcpidr5", TRCPIDR5}, - {"trcpidr6", TRCPIDR6}, - {"trcpidr7", TRCPIDR7}, - {"trcpidr0", TRCPIDR0}, - {"trcpidr1", TRCPIDR1}, - {"trcpidr2", TRCPIDR2}, - {"trcpidr3", TRCPIDR3}, - {"trccidr0", TRCCIDR0}, - {"trccidr1", TRCCIDR1}, - {"trccidr2", TRCCIDR2}, - {"trccidr3", TRCCIDR3}, + {"trcstatr", TRCSTATR, 0}, + {"trcidr8", TRCIDR8, 0}, + {"trcidr9", TRCIDR9, 0}, + {"trcidr10", TRCIDR10, 0}, + {"trcidr11", TRCIDR11, 0}, + {"trcidr12", TRCIDR12, 0}, + {"trcidr13", TRCIDR13, 0}, + {"trcidr0", TRCIDR0, 0}, + {"trcidr1", TRCIDR1, 0}, + {"trcidr2", TRCIDR2, 0}, + {"trcidr3", TRCIDR3, 0}, + {"trcidr4", TRCIDR4, 0}, + {"trcidr5", TRCIDR5, 0}, + {"trcidr6", TRCIDR6, 0}, + {"trcidr7", TRCIDR7, 0}, + {"trcoslsr", TRCOSLSR, 0}, + {"trcpdsr", TRCPDSR, 0}, + {"trcdevaff0", TRCDEVAFF0, 0}, + {"trcdevaff1", TRCDEVAFF1, 0}, + {"trclsr", TRCLSR, 0}, + {"trcauthstatus", TRCAUTHSTATUS, 0}, + {"trcdevarch", TRCDEVARCH, 0}, + {"trcdevid", TRCDEVID, 0}, + {"trcdevtype", TRCDEVTYPE, 0}, + {"trcpidr4", TRCPIDR4, 0}, + {"trcpidr5", TRCPIDR5, 0}, + {"trcpidr6", TRCPIDR6, 0}, + {"trcpidr7", TRCPIDR7, 0}, + {"trcpidr0", TRCPIDR0, 0}, + {"trcpidr1", TRCPIDR1, 0}, + {"trcpidr2", TRCPIDR2, 0}, + {"trcpidr3", TRCPIDR3, 0}, + {"trccidr0", TRCCIDR0, 0}, + {"trccidr1", TRCCIDR1, 0}, + {"trccidr2", TRCCIDR2, 0}, + {"trccidr3", TRCCIDR3, 0}, // GICv3 registers - {"icc_iar1_el1", ICC_IAR1_EL1}, - {"icc_iar0_el1", ICC_IAR0_EL1}, - {"icc_hppir1_el1", ICC_HPPIR1_EL1}, - {"icc_hppir0_el1", ICC_HPPIR0_EL1}, - {"icc_rpr_el1", ICC_RPR_EL1}, - {"ich_vtr_el2", ICH_VTR_EL2}, - {"ich_eisr_el2", ICH_EISR_EL2}, - {"ich_elsr_el2", ICH_ELSR_EL2} + {"icc_iar1_el1", ICC_IAR1_EL1, 0}, + {"icc_iar0_el1", ICC_IAR0_EL1, 0}, + {"icc_hppir1_el1", ICC_HPPIR1_EL1, 0}, + {"icc_hppir0_el1", ICC_HPPIR0_EL1, 0}, + {"icc_rpr_el1", ICC_RPR_EL1, 0}, + {"ich_vtr_el2", ICH_VTR_EL2, 0}, + {"ich_eisr_el2", ICH_EISR_EL2, 0}, + {"ich_elsr_el2", ICH_ELSR_EL2, 0} }; -AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits) - : SysRegMapper(FeatureBits) { +AArch64SysReg::MRSMapper::MRSMapper() { InstMappings = &MRSMappings[0]; NumInstMappings = llvm::array_lengthof(MRSMappings); } const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = { - {"dbgdtrtx_el0", DBGDTRTX_EL0}, - {"oslar_el1", OSLAR_EL1}, - {"pmswinc_el0", PMSWINC_EL0}, + {"dbgdtrtx_el0", DBGDTRTX_EL0, 0}, + {"oslar_el1", OSLAR_EL1, 0}, + {"pmswinc_el0", PMSWINC_EL0, 0}, // Trace registers - {"trcoslar", TRCOSLAR}, - {"trclar", TRCLAR}, + {"trcoslar", TRCOSLAR, 0}, + {"trclar", TRCLAR, 0}, // GICv3 registers - {"icc_eoir1_el1", ICC_EOIR1_EL1}, - {"icc_eoir0_el1", ICC_EOIR0_EL1}, - {"icc_dir_el1", ICC_DIR_EL1}, - {"icc_sgi1r_el1", ICC_SGI1R_EL1}, - {"icc_asgi1r_el1", ICC_ASGI1R_EL1}, - {"icc_sgi0r_el1", ICC_SGI0R_EL1} + {"icc_eoir1_el1", ICC_EOIR1_EL1, 0}, + {"icc_eoir0_el1", ICC_EOIR0_EL1, 0}, + {"icc_dir_el1", ICC_DIR_EL1, 0}, + {"icc_sgi1r_el1", ICC_SGI1R_EL1, 0}, + {"icc_asgi1r_el1", ICC_ASGI1R_EL1, 0}, + {"icc_sgi0r_el1", ICC_SGI0R_EL1, 0}, + + // v8.1a "Privileged Access Never" extension-specific system registers + {"pan", PAN, AArch64::HasV8_1aOps}, }; -AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits) - : SysRegMapper(FeatureBits) { +AArch64SysReg::MSRMapper::MSRMapper() { InstMappings = &MSRMappings[0]; NumInstMappings = llvm::array_lengthof(MSRMappings); } const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = { - {"osdtrrx_el1", OSDTRRX_EL1}, - {"osdtrtx_el1", OSDTRTX_EL1}, - {"teecr32_el1", TEECR32_EL1}, - {"mdccint_el1", MDCCINT_EL1}, - {"mdscr_el1", MDSCR_EL1}, - {"dbgdtr_el0", DBGDTR_EL0}, - {"oseccr_el1", OSECCR_EL1}, - {"dbgvcr32_el2", DBGVCR32_EL2}, - {"dbgbvr0_el1", DBGBVR0_EL1}, - {"dbgbvr1_el1", DBGBVR1_EL1}, - {"dbgbvr2_el1", DBGBVR2_EL1}, - {"dbgbvr3_el1", DBGBVR3_EL1}, - {"dbgbvr4_el1", DBGBVR4_EL1}, - {"dbgbvr5_el1", DBGBVR5_EL1}, - {"dbgbvr6_el1", DBGBVR6_EL1}, - {"dbgbvr7_el1", DBGBVR7_EL1}, - {"dbgbvr8_el1", DBGBVR8_EL1}, - {"dbgbvr9_el1", DBGBVR9_EL1}, - {"dbgbvr10_el1", DBGBVR10_EL1}, - {"dbgbvr11_el1", DBGBVR11_EL1}, - {"dbgbvr12_el1", DBGBVR12_EL1}, - {"dbgbvr13_el1", DBGBVR13_EL1}, - {"dbgbvr14_el1", DBGBVR14_EL1}, - {"dbgbvr15_el1", DBGBVR15_EL1}, - {"dbgbcr0_el1", DBGBCR0_EL1}, - {"dbgbcr1_el1", DBGBCR1_EL1}, - {"dbgbcr2_el1", DBGBCR2_EL1}, - {"dbgbcr3_el1", DBGBCR3_EL1}, - {"dbgbcr4_el1", DBGBCR4_EL1}, - {"dbgbcr5_el1", DBGBCR5_EL1}, - {"dbgbcr6_el1", DBGBCR6_EL1}, - {"dbgbcr7_el1", DBGBCR7_EL1}, - {"dbgbcr8_el1", DBGBCR8_EL1}, - {"dbgbcr9_el1", DBGBCR9_EL1}, - {"dbgbcr10_el1", DBGBCR10_EL1}, - {"dbgbcr11_el1", DBGBCR11_EL1}, - {"dbgbcr12_el1", DBGBCR12_EL1}, - {"dbgbcr13_el1", DBGBCR13_EL1}, - {"dbgbcr14_el1", DBGBCR14_EL1}, - {"dbgbcr15_el1", DBGBCR15_EL1}, - {"dbgwvr0_el1", DBGWVR0_EL1}, - {"dbgwvr1_el1", DBGWVR1_EL1}, - {"dbgwvr2_el1", DBGWVR2_EL1}, - {"dbgwvr3_el1", DBGWVR3_EL1}, - {"dbgwvr4_el1", DBGWVR4_EL1}, - {"dbgwvr5_el1", DBGWVR5_EL1}, - {"dbgwvr6_el1", DBGWVR6_EL1}, - {"dbgwvr7_el1", DBGWVR7_EL1}, - {"dbgwvr8_el1", DBGWVR8_EL1}, - {"dbgwvr9_el1", DBGWVR9_EL1}, - {"dbgwvr10_el1", DBGWVR10_EL1}, - {"dbgwvr11_el1", DBGWVR11_EL1}, - {"dbgwvr12_el1", DBGWVR12_EL1}, - {"dbgwvr13_el1", DBGWVR13_EL1}, - {"dbgwvr14_el1", DBGWVR14_EL1}, - {"dbgwvr15_el1", DBGWVR15_EL1}, - {"dbgwcr0_el1", DBGWCR0_EL1}, - {"dbgwcr1_el1", DBGWCR1_EL1}, - {"dbgwcr2_el1", DBGWCR2_EL1}, - {"dbgwcr3_el1", DBGWCR3_EL1}, - {"dbgwcr4_el1", DBGWCR4_EL1}, - {"dbgwcr5_el1", DBGWCR5_EL1}, - {"dbgwcr6_el1", DBGWCR6_EL1}, - {"dbgwcr7_el1", DBGWCR7_EL1}, - {"dbgwcr8_el1", DBGWCR8_EL1}, - {"dbgwcr9_el1", DBGWCR9_EL1}, - {"dbgwcr10_el1", DBGWCR10_EL1}, - {"dbgwcr11_el1", DBGWCR11_EL1}, - {"dbgwcr12_el1", DBGWCR12_EL1}, - {"dbgwcr13_el1", DBGWCR13_EL1}, - {"dbgwcr14_el1", DBGWCR14_EL1}, - {"dbgwcr15_el1", DBGWCR15_EL1}, - {"teehbr32_el1", TEEHBR32_EL1}, - {"osdlr_el1", OSDLR_EL1}, - {"dbgprcr_el1", DBGPRCR_EL1}, - {"dbgclaimset_el1", DBGCLAIMSET_EL1}, - {"dbgclaimclr_el1", DBGCLAIMCLR_EL1}, - {"csselr_el1", CSSELR_EL1}, - {"vpidr_el2", VPIDR_EL2}, - {"vmpidr_el2", VMPIDR_EL2}, - {"sctlr_el1", SCTLR_EL1}, - {"sctlr_el2", SCTLR_EL2}, - {"sctlr_el3", SCTLR_EL3}, - {"actlr_el1", ACTLR_EL1}, - {"actlr_el2", ACTLR_EL2}, - {"actlr_el3", ACTLR_EL3}, - {"cpacr_el1", CPACR_EL1}, - {"hcr_el2", HCR_EL2}, - {"scr_el3", SCR_EL3}, - {"mdcr_el2", MDCR_EL2}, - {"sder32_el3", SDER32_EL3}, - {"cptr_el2", CPTR_EL2}, - {"cptr_el3", CPTR_EL3}, - {"hstr_el2", HSTR_EL2}, - {"hacr_el2", HACR_EL2}, - {"mdcr_el3", MDCR_EL3}, - {"ttbr0_el1", TTBR0_EL1}, - {"ttbr0_el2", TTBR0_EL2}, - {"ttbr0_el3", TTBR0_EL3}, - {"ttbr1_el1", TTBR1_EL1}, - {"tcr_el1", TCR_EL1}, - {"tcr_el2", TCR_EL2}, - {"tcr_el3", TCR_EL3}, - {"vttbr_el2", VTTBR_EL2}, - {"vtcr_el2", VTCR_EL2}, - {"dacr32_el2", DACR32_EL2}, - {"spsr_el1", SPSR_EL1}, - {"spsr_el2", SPSR_EL2}, - {"spsr_el3", SPSR_EL3}, - {"elr_el1", ELR_EL1}, - {"elr_el2", ELR_EL2}, - {"elr_el3", ELR_EL3}, - {"sp_el0", SP_EL0}, - {"sp_el1", SP_EL1}, - {"sp_el2", SP_EL2}, - {"spsel", SPSel}, - {"nzcv", NZCV}, - {"daif", DAIF}, - {"currentel", CurrentEL}, - {"spsr_irq", SPSR_irq}, - {"spsr_abt", SPSR_abt}, - {"spsr_und", SPSR_und}, - {"spsr_fiq", SPSR_fiq}, - {"fpcr", FPCR}, - {"fpsr", FPSR}, - {"dspsr_el0", DSPSR_EL0}, - {"dlr_el0", DLR_EL0}, - {"ifsr32_el2", IFSR32_EL2}, - {"afsr0_el1", AFSR0_EL1}, - {"afsr0_el2", AFSR0_EL2}, - {"afsr0_el3", AFSR0_EL3}, - {"afsr1_el1", AFSR1_EL1}, - {"afsr1_el2", AFSR1_EL2}, - {"afsr1_el3", AFSR1_EL3}, - {"esr_el1", ESR_EL1}, - {"esr_el2", ESR_EL2}, - {"esr_el3", ESR_EL3}, - {"fpexc32_el2", FPEXC32_EL2}, - {"far_el1", FAR_EL1}, - {"far_el2", FAR_EL2}, - {"far_el3", FAR_EL3}, - {"hpfar_el2", HPFAR_EL2}, - {"par_el1", PAR_EL1}, - {"pmcr_el0", PMCR_EL0}, - {"pmcntenset_el0", PMCNTENSET_EL0}, - {"pmcntenclr_el0", PMCNTENCLR_EL0}, - {"pmovsclr_el0", PMOVSCLR_EL0}, - {"pmselr_el0", PMSELR_EL0}, - {"pmccntr_el0", PMCCNTR_EL0}, - {"pmxevtyper_el0", PMXEVTYPER_EL0}, - {"pmxevcntr_el0", PMXEVCNTR_EL0}, - {"pmuserenr_el0", PMUSERENR_EL0}, - {"pmintenset_el1", PMINTENSET_EL1}, - {"pmintenclr_el1", PMINTENCLR_EL1}, - {"pmovsset_el0", PMOVSSET_EL0}, - {"mair_el1", MAIR_EL1}, - {"mair_el2", MAIR_EL2}, - {"mair_el3", MAIR_EL3}, - {"amair_el1", AMAIR_EL1}, - {"amair_el2", AMAIR_EL2}, - {"amair_el3", AMAIR_EL3}, - {"vbar_el1", VBAR_EL1}, - {"vbar_el2", VBAR_EL2}, - {"vbar_el3", VBAR_EL3}, - {"rmr_el1", RMR_EL1}, - {"rmr_el2", RMR_EL2}, - {"rmr_el3", RMR_EL3}, - {"contextidr_el1", CONTEXTIDR_EL1}, - {"tpidr_el0", TPIDR_EL0}, - {"tpidr_el2", TPIDR_EL2}, - {"tpidr_el3", TPIDR_EL3}, - {"tpidrro_el0", TPIDRRO_EL0}, - {"tpidr_el1", TPIDR_EL1}, - {"cntfrq_el0", CNTFRQ_EL0}, - {"cntvoff_el2", CNTVOFF_EL2}, - {"cntkctl_el1", CNTKCTL_EL1}, - {"cnthctl_el2", CNTHCTL_EL2}, - {"cntp_tval_el0", CNTP_TVAL_EL0}, - {"cnthp_tval_el2", CNTHP_TVAL_EL2}, - {"cntps_tval_el1", CNTPS_TVAL_EL1}, - {"cntp_ctl_el0", CNTP_CTL_EL0}, - {"cnthp_ctl_el2", CNTHP_CTL_EL2}, - {"cntps_ctl_el1", CNTPS_CTL_EL1}, - {"cntp_cval_el0", CNTP_CVAL_EL0}, - {"cnthp_cval_el2", CNTHP_CVAL_EL2}, - {"cntps_cval_el1", CNTPS_CVAL_EL1}, - {"cntv_tval_el0", CNTV_TVAL_EL0}, - {"cntv_ctl_el0", CNTV_CTL_EL0}, - {"cntv_cval_el0", CNTV_CVAL_EL0}, - {"pmevcntr0_el0", PMEVCNTR0_EL0}, - {"pmevcntr1_el0", PMEVCNTR1_EL0}, - {"pmevcntr2_el0", PMEVCNTR2_EL0}, - {"pmevcntr3_el0", PMEVCNTR3_EL0}, - {"pmevcntr4_el0", PMEVCNTR4_EL0}, - {"pmevcntr5_el0", PMEVCNTR5_EL0}, - {"pmevcntr6_el0", PMEVCNTR6_EL0}, - {"pmevcntr7_el0", PMEVCNTR7_EL0}, - {"pmevcntr8_el0", PMEVCNTR8_EL0}, - {"pmevcntr9_el0", PMEVCNTR9_EL0}, - {"pmevcntr10_el0", PMEVCNTR10_EL0}, - {"pmevcntr11_el0", PMEVCNTR11_EL0}, - {"pmevcntr12_el0", PMEVCNTR12_EL0}, - {"pmevcntr13_el0", PMEVCNTR13_EL0}, - {"pmevcntr14_el0", PMEVCNTR14_EL0}, - {"pmevcntr15_el0", PMEVCNTR15_EL0}, - {"pmevcntr16_el0", PMEVCNTR16_EL0}, - {"pmevcntr17_el0", PMEVCNTR17_EL0}, - {"pmevcntr18_el0", PMEVCNTR18_EL0}, - {"pmevcntr19_el0", PMEVCNTR19_EL0}, - {"pmevcntr20_el0", PMEVCNTR20_EL0}, - {"pmevcntr21_el0", PMEVCNTR21_EL0}, - {"pmevcntr22_el0", PMEVCNTR22_EL0}, - {"pmevcntr23_el0", PMEVCNTR23_EL0}, - {"pmevcntr24_el0", PMEVCNTR24_EL0}, - {"pmevcntr25_el0", PMEVCNTR25_EL0}, - {"pmevcntr26_el0", PMEVCNTR26_EL0}, - {"pmevcntr27_el0", PMEVCNTR27_EL0}, - {"pmevcntr28_el0", PMEVCNTR28_EL0}, - {"pmevcntr29_el0", PMEVCNTR29_EL0}, - {"pmevcntr30_el0", PMEVCNTR30_EL0}, - {"pmccfiltr_el0", PMCCFILTR_EL0}, - {"pmevtyper0_el0", PMEVTYPER0_EL0}, - {"pmevtyper1_el0", PMEVTYPER1_EL0}, - {"pmevtyper2_el0", PMEVTYPER2_EL0}, - {"pmevtyper3_el0", PMEVTYPER3_EL0}, - {"pmevtyper4_el0", PMEVTYPER4_EL0}, - {"pmevtyper5_el0", PMEVTYPER5_EL0}, - {"pmevtyper6_el0", PMEVTYPER6_EL0}, - {"pmevtyper7_el0", PMEVTYPER7_EL0}, - {"pmevtyper8_el0", PMEVTYPER8_EL0}, - {"pmevtyper9_el0", PMEVTYPER9_EL0}, - {"pmevtyper10_el0", PMEVTYPER10_EL0}, - {"pmevtyper11_el0", PMEVTYPER11_EL0}, - {"pmevtyper12_el0", PMEVTYPER12_EL0}, - {"pmevtyper13_el0", PMEVTYPER13_EL0}, - {"pmevtyper14_el0", PMEVTYPER14_EL0}, - {"pmevtyper15_el0", PMEVTYPER15_EL0}, - {"pmevtyper16_el0", PMEVTYPER16_EL0}, - {"pmevtyper17_el0", PMEVTYPER17_EL0}, - {"pmevtyper18_el0", PMEVTYPER18_EL0}, - {"pmevtyper19_el0", PMEVTYPER19_EL0}, - {"pmevtyper20_el0", PMEVTYPER20_EL0}, - {"pmevtyper21_el0", PMEVTYPER21_EL0}, - {"pmevtyper22_el0", PMEVTYPER22_EL0}, - {"pmevtyper23_el0", PMEVTYPER23_EL0}, - {"pmevtyper24_el0", PMEVTYPER24_EL0}, - {"pmevtyper25_el0", PMEVTYPER25_EL0}, - {"pmevtyper26_el0", PMEVTYPER26_EL0}, - {"pmevtyper27_el0", PMEVTYPER27_EL0}, - {"pmevtyper28_el0", PMEVTYPER28_EL0}, - {"pmevtyper29_el0", PMEVTYPER29_EL0}, - {"pmevtyper30_el0", PMEVTYPER30_EL0}, + {"osdtrrx_el1", OSDTRRX_EL1, 0}, + {"osdtrtx_el1", OSDTRTX_EL1, 0}, + {"teecr32_el1", TEECR32_EL1, 0}, + {"mdccint_el1", MDCCINT_EL1, 0}, + {"mdscr_el1", MDSCR_EL1, 0}, + {"dbgdtr_el0", DBGDTR_EL0, 0}, + {"oseccr_el1", OSECCR_EL1, 0}, + {"dbgvcr32_el2", DBGVCR32_EL2, 0}, + {"dbgbvr0_el1", DBGBVR0_EL1, 0}, + {"dbgbvr1_el1", DBGBVR1_EL1, 0}, + {"dbgbvr2_el1", DBGBVR2_EL1, 0}, + {"dbgbvr3_el1", DBGBVR3_EL1, 0}, + {"dbgbvr4_el1", DBGBVR4_EL1, 0}, + {"dbgbvr5_el1", DBGBVR5_EL1, 0}, + {"dbgbvr6_el1", DBGBVR6_EL1, 0}, + {"dbgbvr7_el1", DBGBVR7_EL1, 0}, + {"dbgbvr8_el1", DBGBVR8_EL1, 0}, + {"dbgbvr9_el1", DBGBVR9_EL1, 0}, + {"dbgbvr10_el1", DBGBVR10_EL1, 0}, + {"dbgbvr11_el1", DBGBVR11_EL1, 0}, + {"dbgbvr12_el1", DBGBVR12_EL1, 0}, + {"dbgbvr13_el1", DBGBVR13_EL1, 0}, + {"dbgbvr14_el1", DBGBVR14_EL1, 0}, + {"dbgbvr15_el1", DBGBVR15_EL1, 0}, + {"dbgbcr0_el1", DBGBCR0_EL1, 0}, + {"dbgbcr1_el1", DBGBCR1_EL1, 0}, + {"dbgbcr2_el1", DBGBCR2_EL1, 0}, + {"dbgbcr3_el1", DBGBCR3_EL1, 0}, + {"dbgbcr4_el1", DBGBCR4_EL1, 0}, + {"dbgbcr5_el1", DBGBCR5_EL1, 0}, + {"dbgbcr6_el1", DBGBCR6_EL1, 0}, + {"dbgbcr7_el1", DBGBCR7_EL1, 0}, + {"dbgbcr8_el1", DBGBCR8_EL1, 0}, + {"dbgbcr9_el1", DBGBCR9_EL1, 0}, + {"dbgbcr10_el1", DBGBCR10_EL1, 0}, + {"dbgbcr11_el1", DBGBCR11_EL1, 0}, + {"dbgbcr12_el1", DBGBCR12_EL1, 0}, + {"dbgbcr13_el1", DBGBCR13_EL1, 0}, + {"dbgbcr14_el1", DBGBCR14_EL1, 0}, + {"dbgbcr15_el1", DBGBCR15_EL1, 0}, + {"dbgwvr0_el1", DBGWVR0_EL1, 0}, + {"dbgwvr1_el1", DBGWVR1_EL1, 0}, + {"dbgwvr2_el1", DBGWVR2_EL1, 0}, + {"dbgwvr3_el1", DBGWVR3_EL1, 0}, + {"dbgwvr4_el1", DBGWVR4_EL1, 0}, + {"dbgwvr5_el1", DBGWVR5_EL1, 0}, + {"dbgwvr6_el1", DBGWVR6_EL1, 0}, + {"dbgwvr7_el1", DBGWVR7_EL1, 0}, + {"dbgwvr8_el1", DBGWVR8_EL1, 0}, + {"dbgwvr9_el1", DBGWVR9_EL1, 0}, + {"dbgwvr10_el1", DBGWVR10_EL1, 0}, + {"dbgwvr11_el1", DBGWVR11_EL1, 0}, + {"dbgwvr12_el1", DBGWVR12_EL1, 0}, + {"dbgwvr13_el1", DBGWVR13_EL1, 0}, + {"dbgwvr14_el1", DBGWVR14_EL1, 0}, + {"dbgwvr15_el1", DBGWVR15_EL1, 0}, + {"dbgwcr0_el1", DBGWCR0_EL1, 0}, + {"dbgwcr1_el1", DBGWCR1_EL1, 0}, + {"dbgwcr2_el1", DBGWCR2_EL1, 0}, + {"dbgwcr3_el1", DBGWCR3_EL1, 0}, + {"dbgwcr4_el1", DBGWCR4_EL1, 0}, + {"dbgwcr5_el1", DBGWCR5_EL1, 0}, + {"dbgwcr6_el1", DBGWCR6_EL1, 0}, + {"dbgwcr7_el1", DBGWCR7_EL1, 0}, + {"dbgwcr8_el1", DBGWCR8_EL1, 0}, + {"dbgwcr9_el1", DBGWCR9_EL1, 0}, + {"dbgwcr10_el1", DBGWCR10_EL1, 0}, + {"dbgwcr11_el1", DBGWCR11_EL1, 0}, + {"dbgwcr12_el1", DBGWCR12_EL1, 0}, + {"dbgwcr13_el1", DBGWCR13_EL1, 0}, + {"dbgwcr14_el1", DBGWCR14_EL1, 0}, + {"dbgwcr15_el1", DBGWCR15_EL1, 0}, + {"teehbr32_el1", TEEHBR32_EL1, 0}, + {"osdlr_el1", OSDLR_EL1, 0}, + {"dbgprcr_el1", DBGPRCR_EL1, 0}, + {"dbgclaimset_el1", DBGCLAIMSET_EL1, 0}, + {"dbgclaimclr_el1", DBGCLAIMCLR_EL1, 0}, + {"csselr_el1", CSSELR_EL1, 0}, + {"vpidr_el2", VPIDR_EL2, 0}, + {"vmpidr_el2", VMPIDR_EL2, 0}, + {"sctlr_el1", SCTLR_EL1, 0}, + {"sctlr_el2", SCTLR_EL2, 0}, + {"sctlr_el3", SCTLR_EL3, 0}, + {"actlr_el1", ACTLR_EL1, 0}, + {"actlr_el2", ACTLR_EL2, 0}, + {"actlr_el3", ACTLR_EL3, 0}, + {"cpacr_el1", CPACR_EL1, 0}, + {"hcr_el2", HCR_EL2, 0}, + {"scr_el3", SCR_EL3, 0}, + {"mdcr_el2", MDCR_EL2, 0}, + {"sder32_el3", SDER32_EL3, 0}, + {"cptr_el2", CPTR_EL2, 0}, + {"cptr_el3", CPTR_EL3, 0}, + {"hstr_el2", HSTR_EL2, 0}, + {"hacr_el2", HACR_EL2, 0}, + {"mdcr_el3", MDCR_EL3, 0}, + {"ttbr0_el1", TTBR0_EL1, 0}, + {"ttbr0_el2", TTBR0_EL2, 0}, + {"ttbr0_el3", TTBR0_EL3, 0}, + {"ttbr1_el1", TTBR1_EL1, 0}, + {"tcr_el1", TCR_EL1, 0}, + {"tcr_el2", TCR_EL2, 0}, + {"tcr_el3", TCR_EL3, 0}, + {"vttbr_el2", VTTBR_EL2, 0}, + {"vtcr_el2", VTCR_EL2, 0}, + {"dacr32_el2", DACR32_EL2, 0}, + {"spsr_el1", SPSR_EL1, 0}, + {"spsr_el2", SPSR_EL2, 0}, + {"spsr_el3", SPSR_EL3, 0}, + {"elr_el1", ELR_EL1, 0}, + {"elr_el2", ELR_EL2, 0}, + {"elr_el3", ELR_EL3, 0}, + {"sp_el0", SP_EL0, 0}, + {"sp_el1", SP_EL1, 0}, + {"sp_el2", SP_EL2, 0}, + {"spsel", SPSel, 0}, + {"nzcv", NZCV, 0}, + {"daif", DAIF, 0}, + {"currentel", CurrentEL, 0}, + {"spsr_irq", SPSR_irq, 0}, + {"spsr_abt", SPSR_abt, 0}, + {"spsr_und", SPSR_und, 0}, + {"spsr_fiq", SPSR_fiq, 0}, + {"fpcr", FPCR, 0}, + {"fpsr", FPSR, 0}, + {"dspsr_el0", DSPSR_EL0, 0}, + {"dlr_el0", DLR_EL0, 0}, + {"ifsr32_el2", IFSR32_EL2, 0}, + {"afsr0_el1", AFSR0_EL1, 0}, + {"afsr0_el2", AFSR0_EL2, 0}, + {"afsr0_el3", AFSR0_EL3, 0}, + {"afsr1_el1", AFSR1_EL1, 0}, + {"afsr1_el2", AFSR1_EL2, 0}, + {"afsr1_el3", AFSR1_EL3, 0}, + {"esr_el1", ESR_EL1, 0}, + {"esr_el2", ESR_EL2, 0}, + {"esr_el3", ESR_EL3, 0}, + {"fpexc32_el2", FPEXC32_EL2, 0}, + {"far_el1", FAR_EL1, 0}, + {"far_el2", FAR_EL2, 0}, + {"far_el3", FAR_EL3, 0}, + {"hpfar_el2", HPFAR_EL2, 0}, + {"par_el1", PAR_EL1, 0}, + {"pmcr_el0", PMCR_EL0, 0}, + {"pmcntenset_el0", PMCNTENSET_EL0, 0}, + {"pmcntenclr_el0", PMCNTENCLR_EL0, 0}, + {"pmovsclr_el0", PMOVSCLR_EL0, 0}, + {"pmselr_el0", PMSELR_EL0, 0}, + {"pmccntr_el0", PMCCNTR_EL0, 0}, + {"pmxevtyper_el0", PMXEVTYPER_EL0, 0}, + {"pmxevcntr_el0", PMXEVCNTR_EL0, 0}, + {"pmuserenr_el0", PMUSERENR_EL0, 0}, + {"pmintenset_el1", PMINTENSET_EL1, 0}, + {"pmintenclr_el1", PMINTENCLR_EL1, 0}, + {"pmovsset_el0", PMOVSSET_EL0, 0}, + {"mair_el1", MAIR_EL1, 0}, + {"mair_el2", MAIR_EL2, 0}, + {"mair_el3", MAIR_EL3, 0}, + {"amair_el1", AMAIR_EL1, 0}, + {"amair_el2", AMAIR_EL2, 0}, + {"amair_el3", AMAIR_EL3, 0}, + {"vbar_el1", VBAR_EL1, 0}, + {"vbar_el2", VBAR_EL2, 0}, + {"vbar_el3", VBAR_EL3, 0}, + {"rmr_el1", RMR_EL1, 0}, + {"rmr_el2", RMR_EL2, 0}, + {"rmr_el3", RMR_EL3, 0}, + {"contextidr_el1", CONTEXTIDR_EL1, 0}, + {"tpidr_el0", TPIDR_EL0, 0}, + {"tpidr_el2", TPIDR_EL2, 0}, + {"tpidr_el3", TPIDR_EL3, 0}, + {"tpidrro_el0", TPIDRRO_EL0, 0}, + {"tpidr_el1", TPIDR_EL1, 0}, + {"cntfrq_el0", CNTFRQ_EL0, 0}, + {"cntvoff_el2", CNTVOFF_EL2, 0}, + {"cntkctl_el1", CNTKCTL_EL1, 0}, + {"cnthctl_el2", CNTHCTL_EL2, 0}, + {"cntp_tval_el0", CNTP_TVAL_EL0, 0}, + {"cnthp_tval_el2", CNTHP_TVAL_EL2, 0}, + {"cntps_tval_el1", CNTPS_TVAL_EL1, 0}, + {"cntp_ctl_el0", CNTP_CTL_EL0, 0}, + {"cnthp_ctl_el2", CNTHP_CTL_EL2, 0}, + {"cntps_ctl_el1", CNTPS_CTL_EL1, 0}, + {"cntp_cval_el0", CNTP_CVAL_EL0, 0}, + {"cnthp_cval_el2", CNTHP_CVAL_EL2, 0}, + {"cntps_cval_el1", CNTPS_CVAL_EL1, 0}, + {"cntv_tval_el0", CNTV_TVAL_EL0, 0}, + {"cntv_ctl_el0", CNTV_CTL_EL0, 0}, + {"cntv_cval_el0", CNTV_CVAL_EL0, 0}, + {"pmevcntr0_el0", PMEVCNTR0_EL0, 0}, + {"pmevcntr1_el0", PMEVCNTR1_EL0, 0}, + {"pmevcntr2_el0", PMEVCNTR2_EL0, 0}, + {"pmevcntr3_el0", PMEVCNTR3_EL0, 0}, + {"pmevcntr4_el0", PMEVCNTR4_EL0, 0}, + {"pmevcntr5_el0", PMEVCNTR5_EL0, 0}, + {"pmevcntr6_el0", PMEVCNTR6_EL0, 0}, + {"pmevcntr7_el0", PMEVCNTR7_EL0, 0}, + {"pmevcntr8_el0", PMEVCNTR8_EL0, 0}, + {"pmevcntr9_el0", PMEVCNTR9_EL0, 0}, + {"pmevcntr10_el0", PMEVCNTR10_EL0, 0}, + {"pmevcntr11_el0", PMEVCNTR11_EL0, 0}, + {"pmevcntr12_el0", PMEVCNTR12_EL0, 0}, + {"pmevcntr13_el0", PMEVCNTR13_EL0, 0}, + {"pmevcntr14_el0", PMEVCNTR14_EL0, 0}, + {"pmevcntr15_el0", PMEVCNTR15_EL0, 0}, + {"pmevcntr16_el0", PMEVCNTR16_EL0, 0}, + {"pmevcntr17_el0", PMEVCNTR17_EL0, 0}, + {"pmevcntr18_el0", PMEVCNTR18_EL0, 0}, + {"pmevcntr19_el0", PMEVCNTR19_EL0, 0}, + {"pmevcntr20_el0", PMEVCNTR20_EL0, 0}, + {"pmevcntr21_el0", PMEVCNTR21_EL0, 0}, + {"pmevcntr22_el0", PMEVCNTR22_EL0, 0}, + {"pmevcntr23_el0", PMEVCNTR23_EL0, 0}, + {"pmevcntr24_el0", PMEVCNTR24_EL0, 0}, + {"pmevcntr25_el0", PMEVCNTR25_EL0, 0}, + {"pmevcntr26_el0", PMEVCNTR26_EL0, 0}, + {"pmevcntr27_el0", PMEVCNTR27_EL0, 0}, + {"pmevcntr28_el0", PMEVCNTR28_EL0, 0}, + {"pmevcntr29_el0", PMEVCNTR29_EL0, 0}, + {"pmevcntr30_el0", PMEVCNTR30_EL0, 0}, + {"pmccfiltr_el0", PMCCFILTR_EL0, 0}, + {"pmevtyper0_el0", PMEVTYPER0_EL0, 0}, + {"pmevtyper1_el0", PMEVTYPER1_EL0, 0}, + {"pmevtyper2_el0", PMEVTYPER2_EL0, 0}, + {"pmevtyper3_el0", PMEVTYPER3_EL0, 0}, + {"pmevtyper4_el0", PMEVTYPER4_EL0, 0}, + {"pmevtyper5_el0", PMEVTYPER5_EL0, 0}, + {"pmevtyper6_el0", PMEVTYPER6_EL0, 0}, + {"pmevtyper7_el0", PMEVTYPER7_EL0, 0}, + {"pmevtyper8_el0", PMEVTYPER8_EL0, 0}, + {"pmevtyper9_el0", PMEVTYPER9_EL0, 0}, + {"pmevtyper10_el0", PMEVTYPER10_EL0, 0}, + {"pmevtyper11_el0", PMEVTYPER11_EL0, 0}, + {"pmevtyper12_el0", PMEVTYPER12_EL0, 0}, + {"pmevtyper13_el0", PMEVTYPER13_EL0, 0}, + {"pmevtyper14_el0", PMEVTYPER14_EL0, 0}, + {"pmevtyper15_el0", PMEVTYPER15_EL0, 0}, + {"pmevtyper16_el0", PMEVTYPER16_EL0, 0}, + {"pmevtyper17_el0", PMEVTYPER17_EL0, 0}, + {"pmevtyper18_el0", PMEVTYPER18_EL0, 0}, + {"pmevtyper19_el0", PMEVTYPER19_EL0, 0}, + {"pmevtyper20_el0", PMEVTYPER20_EL0, 0}, + {"pmevtyper21_el0", PMEVTYPER21_EL0, 0}, + {"pmevtyper22_el0", PMEVTYPER22_EL0, 0}, + {"pmevtyper23_el0", PMEVTYPER23_EL0, 0}, + {"pmevtyper24_el0", PMEVTYPER24_EL0, 0}, + {"pmevtyper25_el0", PMEVTYPER25_EL0, 0}, + {"pmevtyper26_el0", PMEVTYPER26_EL0, 0}, + {"pmevtyper27_el0", PMEVTYPER27_EL0, 0}, + {"pmevtyper28_el0", PMEVTYPER28_EL0, 0}, + {"pmevtyper29_el0", PMEVTYPER29_EL0, 0}, + {"pmevtyper30_el0", PMEVTYPER30_EL0, 0}, // Trace registers - {"trcprgctlr", TRCPRGCTLR}, - {"trcprocselr", TRCPROCSELR}, - {"trcconfigr", TRCCONFIGR}, - {"trcauxctlr", TRCAUXCTLR}, - {"trceventctl0r", TRCEVENTCTL0R}, - {"trceventctl1r", TRCEVENTCTL1R}, - {"trcstallctlr", TRCSTALLCTLR}, - {"trctsctlr", TRCTSCTLR}, - {"trcsyncpr", TRCSYNCPR}, - {"trcccctlr", TRCCCCTLR}, - {"trcbbctlr", TRCBBCTLR}, - {"trctraceidr", TRCTRACEIDR}, - {"trcqctlr", TRCQCTLR}, - {"trcvictlr", TRCVICTLR}, - {"trcviiectlr", TRCVIIECTLR}, - {"trcvissctlr", TRCVISSCTLR}, - {"trcvipcssctlr", TRCVIPCSSCTLR}, - {"trcvdctlr", TRCVDCTLR}, - {"trcvdsacctlr", TRCVDSACCTLR}, - {"trcvdarcctlr", TRCVDARCCTLR}, - {"trcseqevr0", TRCSEQEVR0}, - {"trcseqevr1", TRCSEQEVR1}, - {"trcseqevr2", TRCSEQEVR2}, - {"trcseqrstevr", TRCSEQRSTEVR}, - {"trcseqstr", TRCSEQSTR}, - {"trcextinselr", TRCEXTINSELR}, - {"trccntrldvr0", TRCCNTRLDVR0}, - {"trccntrldvr1", TRCCNTRLDVR1}, - {"trccntrldvr2", TRCCNTRLDVR2}, - {"trccntrldvr3", TRCCNTRLDVR3}, - {"trccntctlr0", TRCCNTCTLR0}, - {"trccntctlr1", TRCCNTCTLR1}, - {"trccntctlr2", TRCCNTCTLR2}, - {"trccntctlr3", TRCCNTCTLR3}, - {"trccntvr0", TRCCNTVR0}, - {"trccntvr1", TRCCNTVR1}, - {"trccntvr2", TRCCNTVR2}, - {"trccntvr3", TRCCNTVR3}, - {"trcimspec0", TRCIMSPEC0}, - {"trcimspec1", TRCIMSPEC1}, - {"trcimspec2", TRCIMSPEC2}, - {"trcimspec3", TRCIMSPEC3}, - {"trcimspec4", TRCIMSPEC4}, - {"trcimspec5", TRCIMSPEC5}, - {"trcimspec6", TRCIMSPEC6}, - {"trcimspec7", TRCIMSPEC7}, - {"trcrsctlr2", TRCRSCTLR2}, - {"trcrsctlr3", TRCRSCTLR3}, - {"trcrsctlr4", TRCRSCTLR4}, - {"trcrsctlr5", TRCRSCTLR5}, - {"trcrsctlr6", TRCRSCTLR6}, - {"trcrsctlr7", TRCRSCTLR7}, - {"trcrsctlr8", TRCRSCTLR8}, - {"trcrsctlr9", TRCRSCTLR9}, - {"trcrsctlr10", TRCRSCTLR10}, - {"trcrsctlr11", TRCRSCTLR11}, - {"trcrsctlr12", TRCRSCTLR12}, - {"trcrsctlr13", TRCRSCTLR13}, - {"trcrsctlr14", TRCRSCTLR14}, - {"trcrsctlr15", TRCRSCTLR15}, - {"trcrsctlr16", TRCRSCTLR16}, - {"trcrsctlr17", TRCRSCTLR17}, - {"trcrsctlr18", TRCRSCTLR18}, - {"trcrsctlr19", TRCRSCTLR19}, - {"trcrsctlr20", TRCRSCTLR20}, - {"trcrsctlr21", TRCRSCTLR21}, - {"trcrsctlr22", TRCRSCTLR22}, - {"trcrsctlr23", TRCRSCTLR23}, - {"trcrsctlr24", TRCRSCTLR24}, - {"trcrsctlr25", TRCRSCTLR25}, - {"trcrsctlr26", TRCRSCTLR26}, - {"trcrsctlr27", TRCRSCTLR27}, - {"trcrsctlr28", TRCRSCTLR28}, - {"trcrsctlr29", TRCRSCTLR29}, - {"trcrsctlr30", TRCRSCTLR30}, - {"trcrsctlr31", TRCRSCTLR31}, - {"trcssccr0", TRCSSCCR0}, - {"trcssccr1", TRCSSCCR1}, - {"trcssccr2", TRCSSCCR2}, - {"trcssccr3", TRCSSCCR3}, - {"trcssccr4", TRCSSCCR4}, - {"trcssccr5", TRCSSCCR5}, - {"trcssccr6", TRCSSCCR6}, - {"trcssccr7", TRCSSCCR7}, - {"trcsscsr0", TRCSSCSR0}, - {"trcsscsr1", TRCSSCSR1}, - {"trcsscsr2", TRCSSCSR2}, - {"trcsscsr3", TRCSSCSR3}, - {"trcsscsr4", TRCSSCSR4}, - {"trcsscsr5", TRCSSCSR5}, - {"trcsscsr6", TRCSSCSR6}, - {"trcsscsr7", TRCSSCSR7}, - {"trcsspcicr0", TRCSSPCICR0}, - {"trcsspcicr1", TRCSSPCICR1}, - {"trcsspcicr2", TRCSSPCICR2}, - {"trcsspcicr3", TRCSSPCICR3}, - {"trcsspcicr4", TRCSSPCICR4}, - {"trcsspcicr5", TRCSSPCICR5}, - {"trcsspcicr6", TRCSSPCICR6}, - {"trcsspcicr7", TRCSSPCICR7}, - {"trcpdcr", TRCPDCR}, - {"trcacvr0", TRCACVR0}, - {"trcacvr1", TRCACVR1}, - {"trcacvr2", TRCACVR2}, - {"trcacvr3", TRCACVR3}, - {"trcacvr4", TRCACVR4}, - {"trcacvr5", TRCACVR5}, - {"trcacvr6", TRCACVR6}, - {"trcacvr7", TRCACVR7}, - {"trcacvr8", TRCACVR8}, - {"trcacvr9", TRCACVR9}, - {"trcacvr10", TRCACVR10}, - {"trcacvr11", TRCACVR11}, - {"trcacvr12", TRCACVR12}, - {"trcacvr13", TRCACVR13}, - {"trcacvr14", TRCACVR14}, - {"trcacvr15", TRCACVR15}, - {"trcacatr0", TRCACATR0}, - {"trcacatr1", TRCACATR1}, - {"trcacatr2", TRCACATR2}, - {"trcacatr3", TRCACATR3}, - {"trcacatr4", TRCACATR4}, - {"trcacatr5", TRCACATR5}, - {"trcacatr6", TRCACATR6}, - {"trcacatr7", TRCACATR7}, - {"trcacatr8", TRCACATR8}, - {"trcacatr9", TRCACATR9}, - {"trcacatr10", TRCACATR10}, - {"trcacatr11", TRCACATR11}, - {"trcacatr12", TRCACATR12}, - {"trcacatr13", TRCACATR13}, - {"trcacatr14", TRCACATR14}, - {"trcacatr15", TRCACATR15}, - {"trcdvcvr0", TRCDVCVR0}, - {"trcdvcvr1", TRCDVCVR1}, - {"trcdvcvr2", TRCDVCVR2}, - {"trcdvcvr3", TRCDVCVR3}, - {"trcdvcvr4", TRCDVCVR4}, - {"trcdvcvr5", TRCDVCVR5}, - {"trcdvcvr6", TRCDVCVR6}, - {"trcdvcvr7", TRCDVCVR7}, - {"trcdvcmr0", TRCDVCMR0}, - {"trcdvcmr1", TRCDVCMR1}, - {"trcdvcmr2", TRCDVCMR2}, - {"trcdvcmr3", TRCDVCMR3}, - {"trcdvcmr4", TRCDVCMR4}, - {"trcdvcmr5", TRCDVCMR5}, - {"trcdvcmr6", TRCDVCMR6}, - {"trcdvcmr7", TRCDVCMR7}, - {"trccidcvr0", TRCCIDCVR0}, - {"trccidcvr1", TRCCIDCVR1}, - {"trccidcvr2", TRCCIDCVR2}, - {"trccidcvr3", TRCCIDCVR3}, - {"trccidcvr4", TRCCIDCVR4}, - {"trccidcvr5", TRCCIDCVR5}, - {"trccidcvr6", TRCCIDCVR6}, - {"trccidcvr7", TRCCIDCVR7}, - {"trcvmidcvr0", TRCVMIDCVR0}, - {"trcvmidcvr1", TRCVMIDCVR1}, - {"trcvmidcvr2", TRCVMIDCVR2}, - {"trcvmidcvr3", TRCVMIDCVR3}, - {"trcvmidcvr4", TRCVMIDCVR4}, - {"trcvmidcvr5", TRCVMIDCVR5}, - {"trcvmidcvr6", TRCVMIDCVR6}, - {"trcvmidcvr7", TRCVMIDCVR7}, - {"trccidcctlr0", TRCCIDCCTLR0}, - {"trccidcctlr1", TRCCIDCCTLR1}, - {"trcvmidcctlr0", TRCVMIDCCTLR0}, - {"trcvmidcctlr1", TRCVMIDCCTLR1}, - {"trcitctrl", TRCITCTRL}, - {"trcclaimset", TRCCLAIMSET}, - {"trcclaimclr", TRCCLAIMCLR}, + {"trcprgctlr", TRCPRGCTLR, 0}, + {"trcprocselr", TRCPROCSELR, 0}, + {"trcconfigr", TRCCONFIGR, 0}, + {"trcauxctlr", TRCAUXCTLR, 0}, + {"trceventctl0r", TRCEVENTCTL0R, 0}, + {"trceventctl1r", TRCEVENTCTL1R, 0}, + {"trcstallctlr", TRCSTALLCTLR, 0}, + {"trctsctlr", TRCTSCTLR, 0}, + {"trcsyncpr", TRCSYNCPR, 0}, + {"trcccctlr", TRCCCCTLR, 0}, + {"trcbbctlr", TRCBBCTLR, 0}, + {"trctraceidr", TRCTRACEIDR, 0}, + {"trcqctlr", TRCQCTLR, 0}, + {"trcvictlr", TRCVICTLR, 0}, + {"trcviiectlr", TRCVIIECTLR, 0}, + {"trcvissctlr", TRCVISSCTLR, 0}, + {"trcvipcssctlr", TRCVIPCSSCTLR, 0}, + {"trcvdctlr", TRCVDCTLR, 0}, + {"trcvdsacctlr", TRCVDSACCTLR, 0}, + {"trcvdarcctlr", TRCVDARCCTLR, 0}, + {"trcseqevr0", TRCSEQEVR0, 0}, + {"trcseqevr1", TRCSEQEVR1, 0}, + {"trcseqevr2", TRCSEQEVR2, 0}, + {"trcseqrstevr", TRCSEQRSTEVR, 0}, + {"trcseqstr", TRCSEQSTR, 0}, + {"trcextinselr", TRCEXTINSELR, 0}, + {"trccntrldvr0", TRCCNTRLDVR0, 0}, + {"trccntrldvr1", TRCCNTRLDVR1, 0}, + {"trccntrldvr2", TRCCNTRLDVR2, 0}, + {"trccntrldvr3", TRCCNTRLDVR3, 0}, + {"trccntctlr0", TRCCNTCTLR0, 0}, + {"trccntctlr1", TRCCNTCTLR1, 0}, + {"trccntctlr2", TRCCNTCTLR2, 0}, + {"trccntctlr3", TRCCNTCTLR3, 0}, + {"trccntvr0", TRCCNTVR0, 0}, + {"trccntvr1", TRCCNTVR1, 0}, + {"trccntvr2", TRCCNTVR2, 0}, + {"trccntvr3", TRCCNTVR3, 0}, + {"trcimspec0", TRCIMSPEC0, 0}, + {"trcimspec1", TRCIMSPEC1, 0}, + {"trcimspec2", TRCIMSPEC2, 0}, + {"trcimspec3", TRCIMSPEC3, 0}, + {"trcimspec4", TRCIMSPEC4, 0}, + {"trcimspec5", TRCIMSPEC5, 0}, + {"trcimspec6", TRCIMSPEC6, 0}, + {"trcimspec7", TRCIMSPEC7, 0}, + {"trcrsctlr2", TRCRSCTLR2, 0}, + {"trcrsctlr3", TRCRSCTLR3, 0}, + {"trcrsctlr4", TRCRSCTLR4, 0}, + {"trcrsctlr5", TRCRSCTLR5, 0}, + {"trcrsctlr6", TRCRSCTLR6, 0}, + {"trcrsctlr7", TRCRSCTLR7, 0}, + {"trcrsctlr8", TRCRSCTLR8, 0}, + {"trcrsctlr9", TRCRSCTLR9, 0}, + {"trcrsctlr10", TRCRSCTLR10, 0}, + {"trcrsctlr11", TRCRSCTLR11, 0}, + {"trcrsctlr12", TRCRSCTLR12, 0}, + {"trcrsctlr13", TRCRSCTLR13, 0}, + {"trcrsctlr14", TRCRSCTLR14, 0}, + {"trcrsctlr15", TRCRSCTLR15, 0}, + {"trcrsctlr16", TRCRSCTLR16, 0}, + {"trcrsctlr17", TRCRSCTLR17, 0}, + {"trcrsctlr18", TRCRSCTLR18, 0}, + {"trcrsctlr19", TRCRSCTLR19, 0}, + {"trcrsctlr20", TRCRSCTLR20, 0}, + {"trcrsctlr21", TRCRSCTLR21, 0}, + {"trcrsctlr22", TRCRSCTLR22, 0}, + {"trcrsctlr23", TRCRSCTLR23, 0}, + {"trcrsctlr24", TRCRSCTLR24, 0}, + {"trcrsctlr25", TRCRSCTLR25, 0}, + {"trcrsctlr26", TRCRSCTLR26, 0}, + {"trcrsctlr27", TRCRSCTLR27, 0}, + {"trcrsctlr28", TRCRSCTLR28, 0}, + {"trcrsctlr29", TRCRSCTLR29, 0}, + {"trcrsctlr30", TRCRSCTLR30, 0}, + {"trcrsctlr31", TRCRSCTLR31, 0}, + {"trcssccr0", TRCSSCCR0, 0}, + {"trcssccr1", TRCSSCCR1, 0}, + {"trcssccr2", TRCSSCCR2, 0}, + {"trcssccr3", TRCSSCCR3, 0}, + {"trcssccr4", TRCSSCCR4, 0}, + {"trcssccr5", TRCSSCCR5, 0}, + {"trcssccr6", TRCSSCCR6, 0}, + {"trcssccr7", TRCSSCCR7, 0}, + {"trcsscsr0", TRCSSCSR0, 0}, + {"trcsscsr1", TRCSSCSR1, 0}, + {"trcsscsr2", TRCSSCSR2, 0}, + {"trcsscsr3", TRCSSCSR3, 0}, + {"trcsscsr4", TRCSSCSR4, 0}, + {"trcsscsr5", TRCSSCSR5, 0}, + {"trcsscsr6", TRCSSCSR6, 0}, + {"trcsscsr7", TRCSSCSR7, 0}, + {"trcsspcicr0", TRCSSPCICR0, 0}, + {"trcsspcicr1", TRCSSPCICR1, 0}, + {"trcsspcicr2", TRCSSPCICR2, 0}, + {"trcsspcicr3", TRCSSPCICR3, 0}, + {"trcsspcicr4", TRCSSPCICR4, 0}, + {"trcsspcicr5", TRCSSPCICR5, 0}, + {"trcsspcicr6", TRCSSPCICR6, 0}, + {"trcsspcicr7", TRCSSPCICR7, 0}, + {"trcpdcr", TRCPDCR, 0}, + {"trcacvr0", TRCACVR0, 0}, + {"trcacvr1", TRCACVR1, 0}, + {"trcacvr2", TRCACVR2, 0}, + {"trcacvr3", TRCACVR3, 0}, + {"trcacvr4", TRCACVR4, 0}, + {"trcacvr5", TRCACVR5, 0}, + {"trcacvr6", TRCACVR6, 0}, + {"trcacvr7", TRCACVR7, 0}, + {"trcacvr8", TRCACVR8, 0}, + {"trcacvr9", TRCACVR9, 0}, + {"trcacvr10", TRCACVR10, 0}, + {"trcacvr11", TRCACVR11, 0}, + {"trcacvr12", TRCACVR12, 0}, + {"trcacvr13", TRCACVR13, 0}, + {"trcacvr14", TRCACVR14, 0}, + {"trcacvr15", TRCACVR15, 0}, + {"trcacatr0", TRCACATR0, 0}, + {"trcacatr1", TRCACATR1, 0}, + {"trcacatr2", TRCACATR2, 0}, + {"trcacatr3", TRCACATR3, 0}, + {"trcacatr4", TRCACATR4, 0}, + {"trcacatr5", TRCACATR5, 0}, + {"trcacatr6", TRCACATR6, 0}, + {"trcacatr7", TRCACATR7, 0}, + {"trcacatr8", TRCACATR8, 0}, + {"trcacatr9", TRCACATR9, 0}, + {"trcacatr10", TRCACATR10, 0}, + {"trcacatr11", TRCACATR11, 0}, + {"trcacatr12", TRCACATR12, 0}, + {"trcacatr13", TRCACATR13, 0}, + {"trcacatr14", TRCACATR14, 0}, + {"trcacatr15", TRCACATR15, 0}, + {"trcdvcvr0", TRCDVCVR0, 0}, + {"trcdvcvr1", TRCDVCVR1, 0}, + {"trcdvcvr2", TRCDVCVR2, 0}, + {"trcdvcvr3", TRCDVCVR3, 0}, + {"trcdvcvr4", TRCDVCVR4, 0}, + {"trcdvcvr5", TRCDVCVR5, 0}, + {"trcdvcvr6", TRCDVCVR6, 0}, + {"trcdvcvr7", TRCDVCVR7, 0}, + {"trcdvcmr0", TRCDVCMR0, 0}, + {"trcdvcmr1", TRCDVCMR1, 0}, + {"trcdvcmr2", TRCDVCMR2, 0}, + {"trcdvcmr3", TRCDVCMR3, 0}, + {"trcdvcmr4", TRCDVCMR4, 0}, + {"trcdvcmr5", TRCDVCMR5, 0}, + {"trcdvcmr6", TRCDVCMR6, 0}, + {"trcdvcmr7", TRCDVCMR7, 0}, + {"trccidcvr0", TRCCIDCVR0, 0}, + {"trccidcvr1", TRCCIDCVR1, 0}, + {"trccidcvr2", TRCCIDCVR2, 0}, + {"trccidcvr3", TRCCIDCVR3, 0}, + {"trccidcvr4", TRCCIDCVR4, 0}, + {"trccidcvr5", TRCCIDCVR5, 0}, + {"trccidcvr6", TRCCIDCVR6, 0}, + {"trccidcvr7", TRCCIDCVR7, 0}, + {"trcvmidcvr0", TRCVMIDCVR0, 0}, + {"trcvmidcvr1", TRCVMIDCVR1, 0}, + {"trcvmidcvr2", TRCVMIDCVR2, 0}, + {"trcvmidcvr3", TRCVMIDCVR3, 0}, + {"trcvmidcvr4", TRCVMIDCVR4, 0}, + {"trcvmidcvr5", TRCVMIDCVR5, 0}, + {"trcvmidcvr6", TRCVMIDCVR6, 0}, + {"trcvmidcvr7", TRCVMIDCVR7, 0}, + {"trccidcctlr0", TRCCIDCCTLR0, 0}, + {"trccidcctlr1", TRCCIDCCTLR1, 0}, + {"trcvmidcctlr0", TRCVMIDCCTLR0, 0}, + {"trcvmidcctlr1", TRCVMIDCCTLR1, 0}, + {"trcitctrl", TRCITCTRL, 0}, + {"trcclaimset", TRCCLAIMSET, 0}, + {"trcclaimclr", TRCCLAIMCLR, 0}, // GICv3 registers - {"icc_bpr1_el1", ICC_BPR1_EL1}, - {"icc_bpr0_el1", ICC_BPR0_EL1}, - {"icc_pmr_el1", ICC_PMR_EL1}, - {"icc_ctlr_el1", ICC_CTLR_EL1}, - {"icc_ctlr_el3", ICC_CTLR_EL3}, - {"icc_sre_el1", ICC_SRE_EL1}, - {"icc_sre_el2", ICC_SRE_EL2}, - {"icc_sre_el3", ICC_SRE_EL3}, - {"icc_igrpen0_el1", ICC_IGRPEN0_EL1}, - {"icc_igrpen1_el1", ICC_IGRPEN1_EL1}, - {"icc_igrpen1_el3", ICC_IGRPEN1_EL3}, - {"icc_seien_el1", ICC_SEIEN_EL1}, - {"icc_ap0r0_el1", ICC_AP0R0_EL1}, - {"icc_ap0r1_el1", ICC_AP0R1_EL1}, - {"icc_ap0r2_el1", ICC_AP0R2_EL1}, - {"icc_ap0r3_el1", ICC_AP0R3_EL1}, - {"icc_ap1r0_el1", ICC_AP1R0_EL1}, - {"icc_ap1r1_el1", ICC_AP1R1_EL1}, - {"icc_ap1r2_el1", ICC_AP1R2_EL1}, - {"icc_ap1r3_el1", ICC_AP1R3_EL1}, - {"ich_ap0r0_el2", ICH_AP0R0_EL2}, - {"ich_ap0r1_el2", ICH_AP0R1_EL2}, - {"ich_ap0r2_el2", ICH_AP0R2_EL2}, - {"ich_ap0r3_el2", ICH_AP0R3_EL2}, - {"ich_ap1r0_el2", ICH_AP1R0_EL2}, - {"ich_ap1r1_el2", ICH_AP1R1_EL2}, - {"ich_ap1r2_el2", ICH_AP1R2_EL2}, - {"ich_ap1r3_el2", ICH_AP1R3_EL2}, - {"ich_hcr_el2", ICH_HCR_EL2}, - {"ich_misr_el2", ICH_MISR_EL2}, - {"ich_vmcr_el2", ICH_VMCR_EL2}, - {"ich_vseir_el2", ICH_VSEIR_EL2}, - {"ich_lr0_el2", ICH_LR0_EL2}, - {"ich_lr1_el2", ICH_LR1_EL2}, - {"ich_lr2_el2", ICH_LR2_EL2}, - {"ich_lr3_el2", ICH_LR3_EL2}, - {"ich_lr4_el2", ICH_LR4_EL2}, - {"ich_lr5_el2", ICH_LR5_EL2}, - {"ich_lr6_el2", ICH_LR6_EL2}, - {"ich_lr7_el2", ICH_LR7_EL2}, - {"ich_lr8_el2", ICH_LR8_EL2}, - {"ich_lr9_el2", ICH_LR9_EL2}, - {"ich_lr10_el2", ICH_LR10_EL2}, - {"ich_lr11_el2", ICH_LR11_EL2}, - {"ich_lr12_el2", ICH_LR12_EL2}, - {"ich_lr13_el2", ICH_LR13_EL2}, - {"ich_lr14_el2", ICH_LR14_EL2}, - {"ich_lr15_el2", ICH_LR15_EL2} -}; - -const AArch64NamedImmMapper::Mapping -AArch64SysReg::SysRegMapper::CycloneSysRegMappings[] = { - {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3} + {"icc_bpr1_el1", ICC_BPR1_EL1, 0}, + {"icc_bpr0_el1", ICC_BPR0_EL1, 0}, + {"icc_pmr_el1", ICC_PMR_EL1, 0}, + {"icc_ctlr_el1", ICC_CTLR_EL1, 0}, + {"icc_ctlr_el3", ICC_CTLR_EL3, 0}, + {"icc_sre_el1", ICC_SRE_EL1, 0}, + {"icc_sre_el2", ICC_SRE_EL2, 0}, + {"icc_sre_el3", ICC_SRE_EL3, 0}, + {"icc_igrpen0_el1", ICC_IGRPEN0_EL1, 0}, + {"icc_igrpen1_el1", ICC_IGRPEN1_EL1, 0}, + {"icc_igrpen1_el3", ICC_IGRPEN1_EL3, 0}, + {"icc_seien_el1", ICC_SEIEN_EL1, 0}, + {"icc_ap0r0_el1", ICC_AP0R0_EL1, 0}, + {"icc_ap0r1_el1", ICC_AP0R1_EL1, 0}, + {"icc_ap0r2_el1", ICC_AP0R2_EL1, 0}, + {"icc_ap0r3_el1", ICC_AP0R3_EL1, 0}, + {"icc_ap1r0_el1", ICC_AP1R0_EL1, 0}, + {"icc_ap1r1_el1", ICC_AP1R1_EL1, 0}, + {"icc_ap1r2_el1", ICC_AP1R2_EL1, 0}, + {"icc_ap1r3_el1", ICC_AP1R3_EL1, 0}, + {"ich_ap0r0_el2", ICH_AP0R0_EL2, 0}, + {"ich_ap0r1_el2", ICH_AP0R1_EL2, 0}, + {"ich_ap0r2_el2", ICH_AP0R2_EL2, 0}, + {"ich_ap0r3_el2", ICH_AP0R3_EL2, 0}, + {"ich_ap1r0_el2", ICH_AP1R0_EL2, 0}, + {"ich_ap1r1_el2", ICH_AP1R1_EL2, 0}, + {"ich_ap1r2_el2", ICH_AP1R2_EL2, 0}, + {"ich_ap1r3_el2", ICH_AP1R3_EL2, 0}, + {"ich_hcr_el2", ICH_HCR_EL2, 0}, + {"ich_misr_el2", ICH_MISR_EL2, 0}, + {"ich_vmcr_el2", ICH_VMCR_EL2, 0}, + {"ich_vseir_el2", ICH_VSEIR_EL2, 0}, + {"ich_lr0_el2", ICH_LR0_EL2, 0}, + {"ich_lr1_el2", ICH_LR1_EL2, 0}, + {"ich_lr2_el2", ICH_LR2_EL2, 0}, + {"ich_lr3_el2", ICH_LR3_EL2, 0}, + {"ich_lr4_el2", ICH_LR4_EL2, 0}, + {"ich_lr5_el2", ICH_LR5_EL2, 0}, + {"ich_lr6_el2", ICH_LR6_EL2, 0}, + {"ich_lr7_el2", ICH_LR7_EL2, 0}, + {"ich_lr8_el2", ICH_LR8_EL2, 0}, + {"ich_lr9_el2", ICH_LR9_EL2, 0}, + {"ich_lr10_el2", ICH_LR10_EL2, 0}, + {"ich_lr11_el2", ICH_LR11_EL2, 0}, + {"ich_lr12_el2", ICH_LR12_EL2, 0}, + {"ich_lr13_el2", ICH_LR13_EL2, 0}, + {"ich_lr14_el2", ICH_LR14_EL2, 0}, + {"ich_lr15_el2", ICH_LR15_EL2, 0}, + + // Cyclone registers + {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3, AArch64::ProcCyclone}, + + // v8.1a "Privileged Access Never" extension-specific system registers + {"pan", PAN, AArch64::HasV8_1aOps}, + + // v8.1a "Limited Ordering Regions" extension-specific system registers + {"lorsa_el1", LORSA_EL1, AArch64::HasV8_1aOps}, + {"lorea_el1", LOREA_EL1, AArch64::HasV8_1aOps}, + {"lorn_el1", LORN_EL1, AArch64::HasV8_1aOps}, + {"lorc_el1", LORC_EL1, AArch64::HasV8_1aOps}, + {"lorid_el1", LORID_EL1, AArch64::HasV8_1aOps}, + + // v8.1a "Virtualization host extensions" system registers + {"ttbr1_el2", TTBR1_EL2, AArch64::HasV8_1aOps}, + {"contextidr_el2", CONTEXTIDR_EL2, AArch64::HasV8_1aOps}, + {"cnthv_tval_el2", CNTHV_TVAL_EL2, AArch64::HasV8_1aOps}, + {"cnthv_cval_el2", CNTHV_CVAL_EL2, AArch64::HasV8_1aOps}, + {"cnthv_ctl_el2", CNTHV_CTL_EL2, AArch64::HasV8_1aOps}, + {"sctlr_el12", SCTLR_EL12, AArch64::HasV8_1aOps}, + {"cpacr_el12", CPACR_EL12, AArch64::HasV8_1aOps}, + {"ttbr0_el12", TTBR0_EL12, AArch64::HasV8_1aOps}, + {"ttbr1_el12", TTBR1_EL12, AArch64::HasV8_1aOps}, + {"tcr_el12", TCR_EL12, AArch64::HasV8_1aOps}, + {"afsr0_el12", AFSR0_EL12, AArch64::HasV8_1aOps}, + {"afsr1_el12", AFSR1_EL12, AArch64::HasV8_1aOps}, + {"esr_el12", ESR_EL12, AArch64::HasV8_1aOps}, + {"far_el12", FAR_EL12, AArch64::HasV8_1aOps}, + {"mair_el12", MAIR_EL12, AArch64::HasV8_1aOps}, + {"amair_el12", AMAIR_EL12, AArch64::HasV8_1aOps}, + {"vbar_el12", VBAR_EL12, AArch64::HasV8_1aOps}, + {"contextidr_el12", CONTEXTIDR_EL12, AArch64::HasV8_1aOps}, + {"cntkctl_el12", CNTKCTL_EL12, AArch64::HasV8_1aOps}, + {"cntp_tval_el02", CNTP_TVAL_EL02, AArch64::HasV8_1aOps}, + {"cntp_ctl_el02", CNTP_CTL_EL02, AArch64::HasV8_1aOps}, + {"cntp_cval_el02", CNTP_CVAL_EL02, AArch64::HasV8_1aOps}, + {"cntv_tval_el02", CNTV_TVAL_EL02, AArch64::HasV8_1aOps}, + {"cntv_ctl_el02", CNTV_CTL_EL02, AArch64::HasV8_1aOps}, + {"cntv_cval_el02", CNTV_CVAL_EL02, AArch64::HasV8_1aOps}, + {"spsr_el12", SPSR_EL12, AArch64::HasV8_1aOps}, + {"elr_el12", ELR_EL12, AArch64::HasV8_1aOps}, }; uint32_t -AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { +AArch64SysReg::SysRegMapper::fromString(StringRef Name, uint64_t FeatureBits, + bool &Valid) const { std::string NameLower = Name.lower(); // First search the registers shared by all for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { - if (SysRegMappings[i].Name == NameLower) { + if (SysRegMappings[i].isNameEqual(NameLower, FeatureBits)) { Valid = true; return SysRegMappings[i].Value; } } - // Next search for target specific registers - if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) { - if (CycloneSysRegMappings[i].Name == NameLower) { - Valid = true; - return CycloneSysRegMappings[i].Value; - } - } - } - // Now try the instruction-specific registers (either read-only or // write-only). for (unsigned i = 0; i < NumInstMappings; ++i) { - if (InstMappings[i].Name == NameLower) { + if (InstMappings[i].isNameEqual(NameLower, FeatureBits)) { Valid = true; return InstMappings[i].Value; } @@ -814,27 +848,18 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { } std::string -AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { +AArch64SysReg::SysRegMapper::toString(uint32_t Bits, uint64_t FeatureBits) const { // First search the registers shared by all for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { - if (SysRegMappings[i].Value == Bits) { + if (SysRegMappings[i].isValueEqual(Bits, FeatureBits)) { return SysRegMappings[i].Name; } } - // Next search for target specific registers - if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) { - if (CycloneSysRegMappings[i].Value == Bits) { - return CycloneSysRegMappings[i].Name; - } - } - } - // Now try the instruction-specific registers (either read-only or // write-only). for (unsigned i = 0; i < NumInstMappings; ++i) { - if (InstMappings[i].Value == Bits) { + if (InstMappings[i].isValueEqual(Bits, FeatureBits)) { return InstMappings[i].Name; } } @@ -851,38 +876,38 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { } const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = { - {"ipas2e1is", IPAS2E1IS}, - {"ipas2le1is", IPAS2LE1IS}, - {"vmalle1is", VMALLE1IS}, - {"alle2is", ALLE2IS}, - {"alle3is", ALLE3IS}, - {"vae1is", VAE1IS}, - {"vae2is", VAE2IS}, - {"vae3is", VAE3IS}, - {"aside1is", ASIDE1IS}, - {"vaae1is", VAAE1IS}, - {"alle1is", ALLE1IS}, - {"vale1is", VALE1IS}, - {"vale2is", VALE2IS}, - {"vale3is", VALE3IS}, - {"vmalls12e1is", VMALLS12E1IS}, - {"vaale1is", VAALE1IS}, - {"ipas2e1", IPAS2E1}, - {"ipas2le1", IPAS2LE1}, - {"vmalle1", VMALLE1}, - {"alle2", ALLE2}, - {"alle3", ALLE3}, - {"vae1", VAE1}, - {"vae2", VAE2}, - {"vae3", VAE3}, - {"aside1", ASIDE1}, - {"vaae1", VAAE1}, - {"alle1", ALLE1}, - {"vale1", VALE1}, - {"vale2", VALE2}, - {"vale3", VALE3}, - {"vmalls12e1", VMALLS12E1}, - {"vaale1", VAALE1} + {"ipas2e1is", IPAS2E1IS, 0}, + {"ipas2le1is", IPAS2LE1IS, 0}, + {"vmalle1is", VMALLE1IS, 0}, + {"alle2is", ALLE2IS, 0}, + {"alle3is", ALLE3IS, 0}, + {"vae1is", VAE1IS, 0}, + {"vae2is", VAE2IS, 0}, + {"vae3is", VAE3IS, 0}, + {"aside1is", ASIDE1IS, 0}, + {"vaae1is", VAAE1IS, 0}, + {"alle1is", ALLE1IS, 0}, + {"vale1is", VALE1IS, 0}, + {"vale2is", VALE2IS, 0}, + {"vale3is", VALE3IS, 0}, + {"vmalls12e1is", VMALLS12E1IS, 0}, + {"vaale1is", VAALE1IS, 0}, + {"ipas2e1", IPAS2E1, 0}, + {"ipas2le1", IPAS2LE1, 0}, + {"vmalle1", VMALLE1, 0}, + {"alle2", ALLE2, 0}, + {"alle3", ALLE3, 0}, + {"vae1", VAE1, 0}, + {"vae2", VAE2, 0}, + {"vae3", VAE3, 0}, + {"aside1", ASIDE1, 0}, + {"vaae1", VAAE1, 0}, + {"alle1", ALLE1, 0}, + {"vale1", VALE1, 0}, + {"vale2", VALE2, 0}, + {"vale3", VALE3, 0}, + {"vmalls12e1", VMALLS12E1, 0}, + {"vaale1", VAALE1, 0} }; AArch64TLBI::TLBIMapper::TLBIMapper() diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 2ae6f52..659ea90 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -280,14 +280,26 @@ struct AArch64NamedImmMapper { struct Mapping { const char *Name; uint32_t Value; + uint64_t AvailableForFeatures; + // empty AvailableForFeatures means "always-on" + bool isNameEqual(std::string Other, uint64_t FeatureBits=~0ULL) const { + if (AvailableForFeatures && !(AvailableForFeatures & FeatureBits)) + return false; + return Name == Other; + } + bool isValueEqual(uint32_t Other, uint64_t FeatureBits=~0ULL) const { + if (AvailableForFeatures && !(AvailableForFeatures & FeatureBits)) + return false; + return Value == Other; + } }; template<int N> AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm) : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {} - StringRef toString(uint32_t Value, bool &Valid) const; - uint32_t fromString(StringRef Name, bool &Valid) const; + StringRef toString(uint32_t Value, uint64_t FeatureBits, bool &Valid) const; + uint32_t fromString(StringRef Name, uint64_t FeatureBits, bool &Valid) const; /// Many of the instructions allow an alternative assembly form consisting of /// a simple immediate. Currently the only valid forms are ranges [0, N) where @@ -435,7 +447,10 @@ namespace AArch64PState { Invalid = -1, SPSel = 0x05, DAIFSet = 0x1e, - DAIFClr = 0x1f + DAIFClr = 0x1f, + + // v8.1a "Privileged Access Never" extension-specific PStates + PAN = 0x04, }; struct PStateMapper : AArch64NamedImmMapper { @@ -1122,11 +1137,48 @@ namespace AArch64SysReg { ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101 ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110 ICH_LR15_EL2 = 0xe66f, // 11 100 1100 1101 111 - }; - // Cyclone specific system registers - enum CycloneSysRegValues { - CPM_IOACC_CTL_EL3 = 0xff90 + // v8.1a "Privileged Access Never" extension-specific system registers + PAN = 0xc213, // 11 000 0100 0010 011 + + // v8.1a "Limited Ordering Regions" extension-specific system registers + LORSA_EL1 = 0xc520, // 11 000 1010 0100 000 + LOREA_EL1 = 0xc521, // 11 000 1010 0100 001 + LORN_EL1 = 0xc522, // 11 000 1010 0100 010 + LORC_EL1 = 0xc523, // 11 000 1010 0100 011 + LORID_EL1 = 0xc527, // 11 000 1010 0100 111 + + // v8.1a "Virtualization host extensions" system registers + TTBR1_EL2 = 0xe101, // 11 100 0010 0000 001 + CONTEXTIDR_EL2 = 0xe681, // 11 100 1101 0000 001 + CNTHV_TVAL_EL2 = 0xe718, // 11 100 1110 0011 000 + CNTHV_CVAL_EL2 = 0xe71a, // 11 100 1110 0011 010 + CNTHV_CTL_EL2 = 0xe719, // 11 100 1110 0011 001 + SCTLR_EL12 = 0xe880, // 11 101 0001 0000 000 + CPACR_EL12 = 0xe882, // 11 101 0001 0000 010 + TTBR0_EL12 = 0xe900, // 11 101 0010 0000 000 + TTBR1_EL12 = 0xe901, // 11 101 0010 0000 001 + TCR_EL12 = 0xe902, // 11 101 0010 0000 010 + AFSR0_EL12 = 0xea88, // 11 101 0101 0001 000 + AFSR1_EL12 = 0xea89, // 11 101 0101 0001 001 + ESR_EL12 = 0xea90, // 11 101 0101 0010 000 + FAR_EL12 = 0xeb00, // 11 101 0110 0000 000 + MAIR_EL12 = 0xed10, // 11 101 1010 0010 000 + AMAIR_EL12 = 0xed18, // 11 101 1010 0011 000 + VBAR_EL12 = 0xee00, // 11 101 1100 0000 000 + CONTEXTIDR_EL12 = 0xee81, // 11 101 1101 0000 001 + CNTKCTL_EL12 = 0xef08, // 11 101 1110 0001 000 + CNTP_TVAL_EL02 = 0xef10, // 11 101 1110 0010 000 + CNTP_CTL_EL02 = 0xef11, // 11 101 1110 0010 001 + CNTP_CVAL_EL02 = 0xef12, // 11 101 1110 0010 010 + CNTV_TVAL_EL02 = 0xef18, // 11 101 1110 0011 000 + CNTV_CTL_EL02 = 0xef19, // 11 101 1110 0011 001 + CNTV_CVAL_EL02 = 0xef1a, // 11 101 1110 0011 010 + SPSR_EL12 = 0xea00, // 11 101 0100 0000 000 + ELR_EL12 = 0xea01, // 11 101 0100 0000 001 + + // Cyclone specific system registers + CPM_IOACC_CTL_EL3 = 0xff90, }; // Note that these do not inherit from AArch64NamedImmMapper. This class is @@ -1135,25 +1187,23 @@ namespace AArch64SysReg { // this one case. struct SysRegMapper { static const AArch64NamedImmMapper::Mapping SysRegMappings[]; - static const AArch64NamedImmMapper::Mapping CycloneSysRegMappings[]; const AArch64NamedImmMapper::Mapping *InstMappings; size_t NumInstMappings; - uint64_t FeatureBits; - SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { } - uint32_t fromString(StringRef Name, bool &Valid) const; - std::string toString(uint32_t Bits) const; + SysRegMapper() { } + uint32_t fromString(StringRef Name, uint64_t FeatureBits, bool &Valid) const; + std::string toString(uint32_t Bits, uint64_t FeatureBits) const; }; struct MSRMapper : SysRegMapper { static const AArch64NamedImmMapper::Mapping MSRMappings[]; - MSRMapper(uint64_t FeatureBits); + MSRMapper(); }; struct MRSMapper : SysRegMapper { static const AArch64NamedImmMapper::Mapping MRSMappings[]; - MRSMapper(uint64_t FeatureBits); + MRSMapper(); }; uint32_t ParseGenericRegister(StringRef Name, bool &Valid); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index ce0aed9..bd1c7af 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -180,7 +180,7 @@ def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", [HasV7Ops, FeatureVirtualization, FeatureMP]>; -def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", "Support ARM v8.1a instructions", [HasV8Ops, FeatureAClass, FeatureCRC]>; @@ -260,6 +260,14 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC]>; +def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", + "Cortex-R4 ARM processors", + [FeatureHWDiv, + FeatureAvoidPartialCPSR, + FeatureDSPThumb2, FeatureT2XtPk, + HasV7Ops, FeatureDB, FeatureHasRAS, + FeatureRClass]>; + def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", "Cortex-R5 ARM processors", [FeatureSlowFPBrcc, @@ -396,6 +404,16 @@ def : ProcessorModel<"krait", CortexA9Model, FeatureDSPThumb2, FeatureHasRAS, FeatureAClass]>; +// FIXME: R4 has currently the same ProcessorModel as A8. +def : ProcessorModel<"cortex-r4", CortexA8Model, + [ProcR4]>; + +// FIXME: R4F has currently the same ProcessorModel as A8. +def : ProcessorModel<"cortex-r4f", CortexA8Model, + [ProcR4, + FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVFP3, FeatureVFPOnlySP, FeatureD16]>; + // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ProcR5, HasV7Ops, FeatureDB, @@ -457,14 +475,6 @@ def : ProcessorModel<"cyclone", SwiftModel, FeatureDB,FeatureDSPThumb2, FeatureHasRAS, FeatureZCZeroing]>; -// V8.1 Processors -def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a, - FeatureDB, FeatureFPARMv8, - FeatureNEON, FeatureDSPThumb2, - FeatureHWDiv, FeatureHWDivARM, - FeatureTrustZone, FeatureT2XtPk, - FeatureCrypto]>; - //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -485,7 +495,15 @@ def ARMInstrInfo : InstrInfo; // Declare the target which we are implementing //===----------------------------------------------------------------------===// +def ARMAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; + bit isMCAsmWriter = 1; +} + def ARM : Target { // Pull in Instruction Info: let InstructionSet = ARMInstrInfo; + let AssemblyWriters = [ARMAsmWriter]; } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 102def1..1a2acf5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -207,7 +207,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { SmallString<60> Name; raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '_' << uid2; - return OutContext.GetOrCreateSymbol(Name.str()); + return OutContext.GetOrCreateSymbol(Name); } @@ -216,7 +216,7 @@ MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const { SmallString<60> Name; raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH" << getFunctionNumber(); - return OutContext.GetOrCreateSymbol(Name.str()); + return OutContext.GetOrCreateSymbol(Name); } bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, @@ -520,28 +520,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } - - // Emit a .data.rel section containing any stubs that were created. - if (TT.isOSBinFormatELF()) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); - - for (auto &stub: Stubs) { - OutStreamer.EmitLabel(stub.first); - OutStreamer.EmitSymbolValue(stub.second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } - } } //===----------------------------------------------------------------------===// @@ -597,7 +575,7 @@ void ARMAsmPrinter::emitAttributes() { std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS.str(); + ArchFS = (Twine(ArchFS) + "," + FS).str(); else ArchFS = FS; } @@ -661,8 +639,8 @@ void ARMAsmPrinter::emitAttributes() { // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a: - ARMBuildAttrs::AllowNeonARMv8); + STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a: + ARMBuildAttrs::AllowNeonARMv8); } else { if (STI.hasFPARMv8()) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index a8c7657..3f79a9b 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -245,11 +245,15 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, // This register should preferably be even (Odd == 0) or odd (Odd == 1). // Check if the other part of the pair has already been assigned, and provide // the paired register as the first hint. + unsigned Paired = Hint.second; + if (Paired == 0) + return; + unsigned PairedPhys = 0; - if (VRM && VRM->hasPhys(Hint.second)) { - PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this); - if (PairedPhys && MRI.isReserved(PairedPhys)) - PairedPhys = 0; + if (TargetRegisterInfo::isPhysicalRegister(Paired)) { + PairedPhys = Paired; + } else if (VRM && VRM->hasPhys(Paired)) { + PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this); } // First prefer the paired physreg. @@ -284,9 +288,14 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, // change. unsigned OtherReg = Hint.second; Hint = MRI->getRegAllocationHint(OtherReg); - if (Hint.second == Reg) - // Make sure the pair has not already divorced. + // Make sure the pair has not already divorced. + if (Hint.second == Reg) { MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg); + if (TargetRegisterInfo::isVirtualRegister(NewReg)) + MRI->setRegAllocationHint(NewReg, + Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven + : ARMRI::RegPairOdd, OtherReg); + } } } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 13bef54..36f63e2 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -86,7 +86,7 @@ protected: } public: - virtual ~ARMConstantPoolValue(); + ~ARMConstantPoolValue() override; ARMCP::ARMCPModifier getModifier() const { return Modifier; } const char *getModifierText() const; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 830953b..9d2b09b 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -311,6 +311,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { return; StackAdjustingInsts DefCFAOffsetCandidates; + bool HasFP = hasFP(MF); // Allocate the vararg register save area. if (ArgRegsSaveSize) { @@ -327,6 +328,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes - ArgRegsSaveSize, true); } + DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); return; } @@ -375,7 +377,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } // Determine starting offsets of spill areas. - bool HasFP = hasFP(MF); unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 3b1b8dd..72afd2c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3504,25 +3504,34 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - // Try to generate VSEL on ARMv8. + // Try to generate VMAXNM/VMINNM on ARMv8. if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { - // We can select VMAXNM/VMINNM from a compare followed by a select with the + // We can use VMAXNM/VMINNM for a compare followed by a select with the // same operands, as follows: - // c = fcmp [ogt, olt, ugt, ult] a, b + // c = fcmp [?gt, ?ge, ?lt, ?le] a, b // select c, a, b - // We only do this in unsafe-fp-math, because signed zeros and NaNs are - // handled differently than the original code sequence. + // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'. + // We only do this transformation in UnsafeFPMath and for no-NaNs + // comparisons, because signed zeros and NaNs are handled differently than + // the original code sequence. + // FIXME: There are more cases that can be transformed even with NaNs, + // signed zeroes and safe math. E.g. in the following, the result will be + // FalseVal if a is a NaN or -0./0. and that's what vmaxnm will give, too. + // c = fcmp ogt, a, 0. ; select c, a, 0. => vmaxnm a, 0. + // FIXME: There is similar code that allows some extensions in + // AArch64TargetLowering::LowerSELECT_CC that should be shared with this + // code. if (getTargetMachine().Options.UnsafeFPMath) { if (LHS == TrueVal && RHS == FalseVal) { - if (CC == ISD::SETOGT || CC == ISD::SETUGT) + if (CC == ISD::SETGT || CC == ISD::SETGE) return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETOLT || CC == ISD::SETULT) + if (CC == ISD::SETLT || CC == ISD::SETLE) return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); } else if (LHS == FalseVal && RHS == TrueVal) { - if (CC == ISD::SETOLT || CC == ISD::SETULT) + if (CC == ISD::SETLT || CC == ISD::SETLE) return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETOGT || CC == ISD::SETUGT) + if (CC == ISD::SETGT || CC == ISD::SETGE) return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); } } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c3984ca..52f3555 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -208,6 +208,8 @@ def HasV8 : Predicate<"Subtarget->hasV8Ops()">, AssemblerPredicate<"HasV8Ops", "armv8">; def PreV8 : Predicate<"!Subtarget->hasV8Ops()">, AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, + AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2", "VFP2">; @@ -226,8 +228,6 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; -def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, - AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -388,6 +388,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ // Immediate operands with a shared generic asm render method. class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; } +// Operands that are part of a memory addressing mode. +class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; } + // Branch target. // FIXME: rename brtarget to t2_brtarget def brtarget : Operand<OtherVT> { @@ -790,7 +793,7 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], // addrmode_imm12 := reg +/- imm12 // def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; } -class AddrMode_Imm12 : Operand<i32>, +class AddrMode_Imm12 : MemOperand, ComplexPattern<i32, 2, "SelectAddrModeImm12", []> { // 12-bit immediate operand. Note that instructions using this encode // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other @@ -813,7 +816,7 @@ def addrmode_imm12_pre : AddrMode_Imm12 { // ldst_so_reg := reg +/- reg shop imm // def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; } -def ldst_so_reg : Operand<i32>, +def ldst_so_reg : MemOperand, ComplexPattern<i32, 3, "SelectLdStSOReg", []> { let EncoderMethod = "getLdStSORegOpValue"; // FIXME: Simplify the printer @@ -829,7 +832,7 @@ def ldst_so_reg : Operand<i32>, // {8} 1 is imm8 is non-negative. 0 otherwise. // {7-0} [0,255] imm8 value. def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; } -def postidx_imm8 : Operand<i32> { +def postidx_imm8 : MemOperand { let PrintMethod = "printPostIdxImm8Operand"; let ParserMatchClass = PostIdxImm8AsmOperand; let MIOperandInfo = (ops i32imm); @@ -841,7 +844,7 @@ def postidx_imm8 : Operand<i32> { // {8} 1 is imm8 is non-negative. 0 otherwise. // {7-0} [0,255] imm8 value, scaled by 4. def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; } -def postidx_imm8s4 : Operand<i32> { +def postidx_imm8s4 : MemOperand { let PrintMethod = "printPostIdxImm8s4Operand"; let ParserMatchClass = PostIdxImm8s4AsmOperand; let MIOperandInfo = (ops i32imm); @@ -854,7 +857,7 @@ def PostIdxRegAsmOperand : AsmOperandClass { let Name = "PostIdxReg"; let ParserMethod = "parsePostIdxReg"; } -def postidx_reg : Operand<i32> { +def postidx_reg : MemOperand { let EncoderMethod = "getPostIdxRegOpValue"; let DecoderMethod = "DecodePostIdxReg"; let PrintMethod = "printPostIdxRegOperand"; @@ -869,7 +872,7 @@ def postidx_reg : Operand<i32> { // FIXME: addrmode2 should be refactored the rest of the way to always // use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg). def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; } -def addrmode2 : Operand<i32>, +def addrmode2 : MemOperand, ComplexPattern<i32, 3, "SelectAddrMode2", []> { let EncoderMethod = "getAddrMode2OpValue"; let PrintMethod = "printAddrMode2Operand"; @@ -881,7 +884,7 @@ def PostIdxRegShiftedAsmOperand : AsmOperandClass { let Name = "PostIdxRegShifted"; let ParserMethod = "parsePostIdxReg"; } -def am2offset_reg : Operand<i32>, +def am2offset_reg : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg", [], [SDNPWantRoot]> { let EncoderMethod = "getAddrMode2OffsetOpValue"; @@ -894,7 +897,7 @@ def am2offset_reg : Operand<i32>, // FIXME: am2offset_imm should only need the immediate, not the GPR. Having // the GPR is purely vestigal at this point. def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; } -def am2offset_imm : Operand<i32>, +def am2offset_imm : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm", [], [SDNPWantRoot]> { let EncoderMethod = "getAddrMode2OffsetOpValue"; @@ -909,7 +912,7 @@ def am2offset_imm : Operand<i32>, // // FIXME: split into imm vs. reg versions. def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; } -class AddrMode3 : Operand<i32>, +class AddrMode3 : MemOperand, ComplexPattern<i32, 3, "SelectAddrMode3", []> { let EncoderMethod = "getAddrMode3OpValue"; let ParserMatchClass = AddrMode3AsmOperand; @@ -932,7 +935,7 @@ def AM3OffsetAsmOperand : AsmOperandClass { let Name = "AM3Offset"; let ParserMethod = "parseAM3Offset"; } -def am3offset : Operand<i32>, +def am3offset : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode3Offset", [], [SDNPWantRoot]> { let EncoderMethod = "getAddrMode3OffsetOpValue"; @@ -951,7 +954,7 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> { // addrmode5 := reg +/- imm8*4 // def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; } -class AddrMode5 : Operand<i32>, +class AddrMode5 : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode5", []> { let EncoderMethod = "getAddrMode5OpValue"; let DecoderMethod = "DecodeAddrMode5Operand"; @@ -970,7 +973,7 @@ def addrmode5_pre : AddrMode5 { // addrmode6 := reg with optional alignment // def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } -def addrmode6 : Operand<i32>, +def addrmode6 : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm:$align); @@ -979,7 +982,7 @@ def addrmode6 : Operand<i32>, let ParserMatchClass = AddrMode6AsmOperand; } -def am6offset : Operand<i32>, +def am6offset : MemOperand, ComplexPattern<i32, 1, "SelectAddrMode6Offset", [], [SDNPWantRoot]> { let PrintMethod = "printAddrMode6OffsetOperand"; @@ -990,7 +993,7 @@ def am6offset : Operand<i32>, // Special version of addrmode6 to handle alignment encoding for VST1/VLD1 // (single element from one lane) for size 32. -def addrmode6oneL32 : Operand<i32>, +def addrmode6oneL32 : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); @@ -998,7 +1001,7 @@ def addrmode6oneL32 : Operand<i32>, } // Base class for addrmode6 with specific alignment restrictions. -class AddrMode6Align : Operand<i32>, +class AddrMode6Align : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm:$align); @@ -1074,7 +1077,7 @@ def addrmode6align64or128or256 : AddrMode6Align { // Special version of addrmode6 to handle alignment encoding for VLD-dup // instructions, specifically VLD4-dup. -def addrmode6dup : Operand<i32>, +def addrmode6dup : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); @@ -1085,7 +1088,7 @@ def addrmode6dup : Operand<i32>, } // Base class for addrmode6dup with specific alignment restrictions. -class AddrMode6DupAlign : Operand<i32>, +class AddrMode6DupAlign : MemOperand, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); @@ -1149,7 +1152,7 @@ def addrmode6dupalign64or128 : AddrMode6DupAlign { // addrmodepc := pc + reg // -def addrmodepc : Operand<i32>, +def addrmodepc : MemOperand, ComplexPattern<i32, 2, "SelectAddrModePC", []> { let PrintMethod = "printAddrModePCOperand"; let MIOperandInfo = (ops GPR, i32imm); @@ -1158,7 +1161,7 @@ def addrmodepc : Operand<i32>, // addr_offset_none := reg // def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; } -def addr_offset_none : Operand<i32>, +def addr_offset_none : MemOperand, ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> { let PrintMethod = "printAddrMode7Operand"; let DecoderMethod = "DecodeAddrMode7Operand"; @@ -1417,7 +1420,8 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, let isCompare = 1, Defs = [CPSR] in { multiclass AI1_cmp_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { + PatFrag opnode, bit Commutable = 0, + string rrDecoderMethod = ""> { def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii, opc, "\t$Rn, $imm", [(opnode GPR:$Rn, mod_imm:$imm)]>, @@ -1445,6 +1449,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{15-12} = 0b0000; let Inst{11-4} = 0b00000000; let Inst{3-0} = Rm; + let DecoderMethod = rrDecoderMethod; let Unpredictable{15-12} = 0b1111; } @@ -4263,6 +4268,30 @@ def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>; def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>; //===----------------------------------------------------------------------===// +// ARMv8.1a Privilege Access Never extension +// +// SETPAN #imm1 + +def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan", + "\t$imm", []>, Requires<[IsARM, HasV8, HasV8_1a]> { + bits<1> imm; + + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010001; + let Inst{19-16} = 0b0000; + let Inst{15-10} = 0b000000; + let Inst{9} = imm; + let Inst{8} = 0b0; + let Inst{7-4} = 0b0000; + let Inst{3-0} = 0b0000; + + let Unpredictable{19-16} = 0b1111; + let Unpredictable{15-10} = 0b111111; + let Unpredictable{8} = 0b1; + let Unpredictable{3-0} = 0b1111; +} + +//===----------------------------------------------------------------------===// // Comparison Instructions... // @@ -4366,7 +4395,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm), // Note that TST/TEQ don't set all the same flags that CMP does! defm TST : AI1_cmp_irs<0b1000, "tst", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, - BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>; + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1, + "DecodeTSTInstruction">; defm TEQ : AI1_cmp_irs<0b1001, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 3c62e0e..d0ade77 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -142,7 +142,7 @@ def t_blxtarget : Operand<i32> { // t_addrmode_pc := <label> => pc + imm8 * 4 // -def t_addrmode_pc : Operand<i32> { +def t_addrmode_pc : MemOperand { let EncoderMethod = "getAddrModePCOpValue"; let DecoderMethod = "DecodeThumbAddrModePC"; let PrintMethod = "printThumbLdrLabelOperand"; @@ -153,7 +153,7 @@ def t_addrmode_pc : Operand<i32> { // t_addrmode_rr := reg + reg // def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; } -def t_addrmode_rr : Operand<i32>, +def t_addrmode_rr : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let PrintMethod = "printThumbAddrModeRROperand"; @@ -169,7 +169,7 @@ def t_addrmode_rr : Operand<i32>, // the reg+imm forms will match instead. This is a horrible way to do that, // as it forces tight coupling between the methods, but it's how selectiondag // currently works. -def t_addrmode_rrs1 : Operand<i32>, +def t_addrmode_rrs1 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let PrintMethod = "printThumbAddrModeRROperand"; @@ -177,7 +177,7 @@ def t_addrmode_rrs1 : Operand<i32>, let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); } -def t_addrmode_rrs2 : Operand<i32>, +def t_addrmode_rrs2 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let DecoderMethod = "DecodeThumbAddrModeRR"; @@ -185,7 +185,7 @@ def t_addrmode_rrs2 : Operand<i32>, let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); } -def t_addrmode_rrs4 : Operand<i32>, +def t_addrmode_rrs4 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let DecoderMethod = "DecodeThumbAddrModeRR"; @@ -197,7 +197,7 @@ def t_addrmode_rrs4 : Operand<i32>, // t_addrmode_is4 := reg + imm5 * 4 // def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; } -def t_addrmode_is4 : Operand<i32>, +def t_addrmode_is4 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> { let EncoderMethod = "getAddrModeISOpValue"; let DecoderMethod = "DecodeThumbAddrModeIS"; @@ -209,7 +209,7 @@ def t_addrmode_is4 : Operand<i32>, // t_addrmode_is2 := reg + imm5 * 2 // def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; } -def t_addrmode_is2 : Operand<i32>, +def t_addrmode_is2 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> { let EncoderMethod = "getAddrModeISOpValue"; let DecoderMethod = "DecodeThumbAddrModeIS"; @@ -221,7 +221,7 @@ def t_addrmode_is2 : Operand<i32>, // t_addrmode_is1 := reg + imm5 // def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; } -def t_addrmode_is1 : Operand<i32>, +def t_addrmode_is1 : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> { let EncoderMethod = "getAddrModeISOpValue"; let DecoderMethod = "DecodeThumbAddrModeIS"; @@ -235,7 +235,7 @@ def t_addrmode_is1 : Operand<i32>, // FIXME: This really shouldn't have an explicit SP operand at all. It should // be implicit, just like in the instruction encoding itself. def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; } -def t_addrmode_sp : Operand<i32>, +def t_addrmode_sp : MemOperand, ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> { let EncoderMethod = "getAddrModeThumbSPOpValue"; let DecoderMethod = "DecodeThumbAddrModeSP"; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 10b0a0e..103ee00 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -148,7 +148,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{ // t2addrmode_imm12 := reg + imm12 def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";} -def t2addrmode_imm12 : Operand<i32>, +def t2addrmode_imm12 : MemOperand, ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> { let PrintMethod = "printAddrModeImm12Operand<false>"; let EncoderMethod = "getAddrModeImm12OpValue"; @@ -178,7 +178,7 @@ def t2adrlabel : Operand<i32> { // t2addrmode_posimm8 := reg + imm8 def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";} -def t2addrmode_posimm8 : Operand<i32> { +def t2addrmode_posimm8 : MemOperand { let PrintMethod = "printT2AddrModeImm8Operand<false>"; let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; @@ -188,7 +188,7 @@ def t2addrmode_posimm8 : Operand<i32> { // t2addrmode_negimm8 := reg - imm8 def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";} -def t2addrmode_negimm8 : Operand<i32>, +def t2addrmode_negimm8 : MemOperand, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let PrintMethod = "printT2AddrModeImm8Operand<false>"; let EncoderMethod = "getT2AddrModeImm8OpValue"; @@ -199,7 +199,7 @@ def t2addrmode_negimm8 : Operand<i32>, // t2addrmode_imm8 := reg +/- imm8 def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; } -class T2AddrMode_Imm8 : Operand<i32>, +class T2AddrMode_Imm8 : MemOperand, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; @@ -215,7 +215,7 @@ def t2addrmode_imm8_pre : T2AddrMode_Imm8 { let PrintMethod = "printT2AddrModeImm8Operand<true>"; } -def t2am_imm8_offset : Operand<i32>, +def t2am_imm8_offset : MemOperand, ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", [], [SDNPWantRoot]> { let PrintMethod = "printT2AddrModeImm8OffsetOperand"; @@ -225,7 +225,7 @@ def t2am_imm8_offset : Operand<i32>, // t2addrmode_imm8s4 := reg +/- (imm8 << 2) def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} -class T2AddrMode_Imm8s4 : Operand<i32> { +class T2AddrMode_Imm8s4 : MemOperand { let EncoderMethod = "getT2AddrModeImm8s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8s4"; let ParserMatchClass = MemImm8s4OffsetAsmOperand; @@ -241,7 +241,7 @@ def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 { } def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; } -def t2am_imm8s4_offset : Operand<i32> { +def t2am_imm8s4_offset : MemOperand { let PrintMethod = "printT2AddrModeImm8s4OffsetOperand"; let EncoderMethod = "getT2Imm8s4OpValue"; let DecoderMethod = "DecodeT2Imm8S4"; @@ -251,7 +251,7 @@ def t2am_imm8s4_offset : Operand<i32> { def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass { let Name = "MemImm0_1020s4Offset"; } -def t2addrmode_imm0_1020s4 : Operand<i32>, +def t2addrmode_imm0_1020s4 : MemOperand, ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> { let PrintMethod = "printT2AddrModeImm0_1020s4Operand"; let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue"; @@ -262,7 +262,7 @@ def t2addrmode_imm0_1020s4 : Operand<i32>, // t2addrmode_so_reg := reg + (reg << imm2) def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";} -def t2addrmode_so_reg : Operand<i32>, +def t2addrmode_so_reg : MemOperand, ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> { let PrintMethod = "printT2AddrModeSoRegOperand"; let EncoderMethod = "getT2AddrModeSORegOpValue"; @@ -273,13 +273,13 @@ def t2addrmode_so_reg : Operand<i32>, // Addresses for the TBB/TBH instructions. def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; } -def addrmode_tbb : Operand<i32> { +def addrmode_tbb : MemOperand { let PrintMethod = "printAddrModeTBB"; let ParserMatchClass = addrmode_tbb_asmoperand; let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm); } def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; } -def addrmode_tbh : Operand<i32> { +def addrmode_tbh : MemOperand { let PrintMethod = "printAddrModeTBH"; let ParserMatchClass = addrmode_tbh_asmoperand; let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm); @@ -3630,8 +3630,8 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} -def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>, - Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> { +def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>, + Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass]> { bits<4> func; let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -4281,6 +4281,23 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, //===----------------------------------------------------------------------===// +// ARMv8.1 Privilege Access Never extension +// +// SETPAN #imm1 + +def t2SETPAN : T1I<(outs), (ins imm0_1:$imm), NoItinerary, "setpan\t$imm", []>, + T1Misc<0b0110000>, Requires<[IsThumb2, HasV8, HasV8_1a]> { + bits<1> imm; + + let Inst{4} = 0b1; + let Inst{3} = imm; + let Inst{2-0} = 0b000; + + let Unpredictable{4} = 0b1; + let Unpredictable{2-0} = 0b111; +} + +//===----------------------------------------------------------------------===// // Non-Instruction Patterns // diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index fbec9e6..2a3e1da 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -133,6 +133,7 @@ void ARMSubtarget::initializeEnvironment() { HasV6T2Ops = false; HasV7Ops = false; HasV8Ops = false; + HasV8_1aOps = false; HasVFPv2 = false; HasVFPv3 = false; HasVFPv4 = false; @@ -166,7 +167,6 @@ void ARMSubtarget::initializeEnvironment() { HasTrustZone = false; HasCrypto = false; HasCRC = false; - HasV8_1a = false; HasZeroCycleZeroing = false; AllowsUnalignedMem = false; Thumb2DSP = false; @@ -191,7 +191,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS.str(); + ArchFS = (Twine(ArchFS) + "," + FS).str(); else ArchFS = FS; } @@ -254,7 +254,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { switch (IT) { case DefaultIT: - RestrictIT = hasV8Ops() ? true : false; + RestrictIT = hasV8Ops(); break; case RestrictedIT: RestrictIT = true; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index f36cd5c..d82314d 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -43,7 +43,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, - CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait, + CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait, }; enum ARMProcClassEnum { None, AClass, RClass, MClass @@ -67,6 +67,7 @@ protected: bool HasV6T2Ops; bool HasV7Ops; bool HasV8Ops; + bool HasV8_1aOps; /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what /// floating point ISAs are supported. @@ -182,9 +183,6 @@ protected: /// HasCRC - if true, processor supports CRC instructions bool HasCRC; - /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions - bool HasV8_1a; - /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing; @@ -295,6 +293,7 @@ public: bool hasV6T2Ops() const { return HasV6T2Ops; } bool hasV7Ops() const { return HasV7Ops; } bool hasV8Ops() const { return HasV8Ops; } + bool hasV8_1aOps() const { return HasV8_1aOps; } bool isCortexA5() const { return ARMProcFamily == CortexA5; } bool isCortexA7() const { return ARMProcFamily == CortexA7; } @@ -316,7 +315,6 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } - bool hasV8_1a() const { return HasV8_1a; } bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 1bee1b0..ae33340 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -42,6 +42,11 @@ EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden, cl::desc("Enable ARM load/store optimization pass"), cl::init(true)); +// FIXME: Unify control over GlobalMerge. +static cl::opt<cl::boolOrDefault> +EnableGlobalMerge("arm-global-merge", cl::Hidden, + cl::desc("Enable the global merge pass")); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget); @@ -332,7 +337,9 @@ void ARMPassConfig::addIRPasses() { } bool ARMPassConfig::addPreISel() { - if (TM->getOptLevel() == CodeGenOpt::Aggressive) + if ((TM->getOptLevel() == CodeGenOpt::Aggressive && + EnableGlobalMerge == cl::BOU_UNSET) || + EnableGlobalMerge == cl::BOU_TRUE) // FIXME: This is using the thumb1 only constant value for // maximal global offset for merging globals. We may want // to look into using the old value for non-thumb1 code of diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 2215efb..b9ad2c8 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -276,8 +276,8 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasD16() const { return STI.getFeatureBits() & ARM::FeatureD16; } - bool hasV8_1a() const { - return STI.getFeatureBits() & ARM::FeatureV8_1a; + bool hasV8_1aOps() const { + return STI.getFeatureBits() & ARM::HasV8_1aOps; } void SwitchMode() { @@ -5418,47 +5418,44 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, /// inclusion of carry set or predication code operands. // // FIXME: It would be nice to autogen this. -void ARMAsmParser:: -getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, - bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) { - if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || +void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, + bool &CanAcceptCarrySet, + bool &CanAcceptPredicationCode) { + CanAcceptCarrySet = + Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" || - Mnemonic == "add" || Mnemonic == "adc" || - Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" || - Mnemonic == "orr" || Mnemonic == "mvn" || - Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || - Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" || - Mnemonic == "vfm" || Mnemonic == "vfnm" || - (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" || - Mnemonic == "mla" || Mnemonic == "smlal" || - Mnemonic == "umlal" || Mnemonic == "umull"))) { - CanAcceptCarrySet = true; - } else - CanAcceptCarrySet = false; + Mnemonic == "add" || Mnemonic == "adc" || Mnemonic == "mul" || + Mnemonic == "bic" || Mnemonic == "asr" || Mnemonic == "orr" || + Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" || + Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" || + Mnemonic == "neg" || Mnemonic == "vfm" || Mnemonic == "vfnm" || + (!isThumb() && + (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" || + Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull")); if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || - Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || + Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" || Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") || - Mnemonic.startswith("vsel") || - Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || - Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || - Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || - Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" || + Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" || + Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || + Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || + Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || + Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { // Some instructions are only predicable in Thumb mode - CanAcceptPredicationCode - = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" && + CanAcceptPredicationCode = + Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" && Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" && Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" && Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" && - Mnemonic != "ldc2" && Mnemonic != "ldc2l" && - Mnemonic != "stc2" && Mnemonic != "stc2l" && - !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs"); + Mnemonic != "ldc2" && Mnemonic != "ldc2l" && Mnemonic != "stc2" && + Mnemonic != "stc2l" && !Mnemonic.startswith("rfe") && + !Mnemonic.startswith("srs"); } else if (isThumbOne()) { if (hasV6MOps()) CanAcceptPredicationCode = Mnemonic != "movs"; @@ -6153,6 +6150,14 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, "destination operands can't be identical"); return false; } + case ARM::t2BXJ: { + const unsigned RmReg = Inst.getOperand(0).getReg(); + // Rm = SP is no longer unpredictable in v8-A + if (RmReg == ARM::SP && !hasV8Ops()) + return Error(Operands[2]->getStartLoc(), + "r13 (SP) is an unpredictable operand to BXJ"); + return false; + } case ARM::STRD: { // Rt2 must be Rt + 1. unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4d5122a..4c169a8 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -91,7 +91,7 @@ public: MCDisassembler(STI, Ctx) { } - ~ARMDisassembler() {} + ~ARMDisassembler() override {} DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -106,7 +106,7 @@ public: MCDisassembler(STI, Ctx) { } - ~ThumbDisassembler() {} + ~ThumbDisassembler() override {} DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -212,6 +212,10 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, @@ -2119,6 +2123,54 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Pred = fieldFromInstruction(Insn, 28, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + + if (Pred == 0xF) + return DecodeSETPANInstruction(Inst, Insn, Address, Decoder); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, Pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Imm = fieldFromInstruction(Insn, 9, 1); + + const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + uint64_t FeatureBits = Dis->getSubtargetInfo().getFeatureBits(); + if ((FeatureBits & ARM::HasV8_1aOps) == 0 || + (FeatureBits & ARM::HasV8Ops) == 0 ) + return MCDisassembler::Fail; + + // Decoder can be called from DecodeTST, which does not check the full + // encoding is valid. + if (fieldFromInstruction(Insn, 20,12) != 0xf11 || + fieldFromInstruction(Insn, 4,4) != 0) + return MCDisassembler::Fail; + if (fieldFromInstruction(Insn, 10,10) != 0 || + fieldFromInstruction(Insn, 0,4) != 0) + S = MCDisassembler::SoftFail; + + Inst.setOpcode(ARM::SETPAN); + Inst.addOperand(MCOperand::CreateImm(Imm)); + + return S; +} + static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index e15323d..c2e1b2a 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -40,12 +40,12 @@ static unsigned translateShiftImm(unsigned imm) { /// Prints the shift value with an immediate value. static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc, - unsigned ShImm, bool UseMarkup) { + unsigned ShImm, bool UseMarkup) { if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm)) return; O << ", "; - assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0"); + assert(!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0"); O << getShiftOpcStr(ShOpc); if (ShOpc != ARM_AM::rrx) { @@ -58,49 +58,52 @@ static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc, } } -ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} +ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << markup("<reg:") - << getRegisterName(RegNo) - << markup(">"); + OS << markup("<reg:") << getRegisterName(RegNo) << markup(">"); } void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { unsigned Opcode = MI->getOpcode(); - switch(Opcode) { + switch (Opcode) { // Check for HINT instructions w/ canonical names. case ARM::HINT: case ARM::tHINT: case ARM::t2HINT: switch (MI->getOperand(0).getImm()) { - case 0: O << "\tnop"; break; - case 1: O << "\tyield"; break; - case 2: O << "\twfe"; break; - case 3: O << "\twfi"; break; - case 4: O << "\tsev"; break; + case 0: + O << "\tnop"; + break; + case 1: + O << "\tyield"; + break; + case 2: + O << "\twfe"; + break; + case 3: + O << "\twfi"; + break; + case 4: + O << "\tsev"; + break; case 5: - if ((getAvailableFeatures() & ARM::HasV8Ops)) { + if ((STI.getFeatureBits() & ARM::HasV8Ops)) { O << "\tsevl"; break; } // Fallthrough for non-v8 default: // Anything else should just print normally. - printInstruction(MI, O); + printInstruction(MI, STI, O); printAnnotation(O, Annot); return; } - printPredicateOperand(MI, 1, O); + printPredicateOperand(MI, 1, STI, O); if (Opcode == ARM::t2HINT) O << ".w"; printAnnotation(O, Annot); @@ -115,8 +118,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, const MCOperand &MO3 = MI->getOperand(3); O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())); - printSBitModifierOperand(MI, 6, O); - printPredicateOperand(MI, 4, O); + printSBitModifierOperand(MI, 6, STI, O); + printPredicateOperand(MI, 4, STI, O); O << '\t'; printRegName(O, Dst.getReg()); @@ -137,8 +140,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, const MCOperand &MO2 = MI->getOperand(2); O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())); - printSBitModifierOperand(MI, 5, O); - printPredicateOperand(MI, 3, O); + printSBitModifierOperand(MI, 5, STI, O); + printPredicateOperand(MI, 3, STI, O); O << '\t'; printRegName(O, Dst.getReg()); @@ -150,10 +153,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - O << ", " - << markup("<imm:") - << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) - << markup(">"); + O << ", " << markup("<imm:") << "#" + << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) << markup(">"); printAnnotation(O, Annot); return; } @@ -164,11 +165,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) { // Should only print PUSH if there are at least two registers in the list. O << '\t' << "push"; - printPredicateOperand(MI, 2, O); + printPredicateOperand(MI, 2, STI, O); if (Opcode == ARM::t2STMDB_UPD) O << ".w"; O << '\t'; - printRegisterList(MI, 4, O); + printRegisterList(MI, 4, STI, O); printAnnotation(O, Annot); return; } else @@ -178,7 +179,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, if (MI->getOperand(2).getReg() == ARM::SP && MI->getOperand(3).getImm() == -4) { O << '\t' << "push"; - printPredicateOperand(MI, 4, O); + printPredicateOperand(MI, 4, STI, O); O << "\t{"; printRegName(O, MI->getOperand(1).getReg()); O << "}"; @@ -193,11 +194,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) { // Should only print POP if there are at least two registers in the list. O << '\t' << "pop"; - printPredicateOperand(MI, 2, O); + printPredicateOperand(MI, 2, STI, O); if (Opcode == ARM::t2LDMIA_UPD) O << ".w"; O << '\t'; - printRegisterList(MI, 4, O); + printRegisterList(MI, 4, STI, O); printAnnotation(O, Annot); return; } else @@ -207,7 +208,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, if (MI->getOperand(2).getReg() == ARM::SP && MI->getOperand(4).getImm() == 4) { O << '\t' << "pop"; - printPredicateOperand(MI, 5, O); + printPredicateOperand(MI, 5, STI, O); O << "\t{"; printRegName(O, MI->getOperand(0).getReg()); O << "}"; @@ -221,9 +222,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, case ARM::VSTMDDB_UPD: if (MI->getOperand(0).getReg() == ARM::SP) { O << '\t' << "vpush"; - printPredicateOperand(MI, 2, O); + printPredicateOperand(MI, 2, STI, O); O << '\t'; - printRegisterList(MI, 4, O); + printRegisterList(MI, 4, STI, O); printAnnotation(O, Annot); return; } else @@ -234,9 +235,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, case ARM::VLDMDIA_UPD: if (MI->getOperand(0).getReg() == ARM::SP) { O << '\t' << "vpop"; - printPredicateOperand(MI, 2, O); + printPredicateOperand(MI, 2, STI, O); O << '\t'; - printRegisterList(MI, 4, O); + printRegisterList(MI, 4, STI, O); printAnnotation(O, Annot); return; } else @@ -252,12 +253,13 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, O << "\tldm"; - printPredicateOperand(MI, 1, O); + printPredicateOperand(MI, 1, STI, O); O << '\t'; printRegName(O, BaseReg); - if (Writeback) O << "!"; + if (Writeback) + O << "!"; O << ", "; - printRegisterList(MI, 3, O); + printRegisterList(MI, 3, STI, O); printAnnotation(O, Annot); return; } @@ -268,9 +270,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // GPRs. However, when decoding them, the two GRPs cannot be automatically // expressed as a GPRPair, so we have to manually merge them. // FIXME: We would really like to be able to tablegen'erate this. - case ARM::LDREXD: case ARM::STREXD: - case ARM::LDAEXD: case ARM::STLEXD: { - const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID); + case ARM::LDREXD: + case ARM::STREXD: + case ARM::LDAEXD: + case ARM::STLEXD: { + const MCRegisterClass &MRC = MRI.getRegClass(ARM::GPRRegClassID); bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD; unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg(); if (MRC.contains(Reg)) { @@ -280,28 +284,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, if (isStore) NewMI.addOperand(MI->getOperand(0)); - NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0, - &MRI.getRegClass(ARM::GPRPairRegClassID))); + NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg( + Reg, ARM::gsub_0, &MRI.getRegClass(ARM::GPRPairRegClassID))); NewMI.addOperand(NewReg); // Copy the rest operands into NewMI. - for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i) + for (unsigned i = isStore ? 3 : 2; i < MI->getNumOperands(); ++i) NewMI.addOperand(MI->getOperand(i)); - printInstruction(&NewMI, O); + printInstruction(&NewMI, STI, O); return; } break; } - // B9.3.3 ERET (Thumb) - // For a target that has Virtualization Extensions, ERET is the preferred - // disassembly of SUBS PC, LR, #0 + // B9.3.3 ERET (Thumb) + // For a target that has Virtualization Extensions, ERET is the preferred + // disassembly of SUBS PC, LR, #0 case ARM::t2SUBS_PC_LR: { - if (MI->getNumOperands() == 3 && - MI->getOperand(0).isImm() && + if (MI->getNumOperands() == 3 && MI->getOperand(0).isImm() && MI->getOperand(0).getImm() == 0 && - (getAvailableFeatures() & ARM::FeatureVirtualization)) { + (STI.getFeatureBits() & ARM::FeatureVirtualization)) { O << "\teret"; - printPredicateOperand(MI, 1, O); + printPredicateOperand(MI, 1, STI, O); printAnnotation(O, Annot); return; } @@ -309,20 +312,18 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, } } - printInstruction(MI, O); + printInstruction(MI, STI, O); printAnnotation(O, Annot); } void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { unsigned Reg = Op.getReg(); printRegName(O, Reg); } else if (Op.isImm()) { - O << markup("<imm:") - << '#' << formatImm(Op.getImm()) - << markup(">"); + O << markup("<imm:") << '#' << formatImm(Op.getImm()) << markup(">"); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); const MCExpr *Expr = Op.getExpr(); @@ -354,6 +355,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); if (MO1.isExpr()) { @@ -370,13 +372,9 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, if (OffImm == INT32_MIN) OffImm = 0; if (isSub) { - O << markup("<imm:") - << "#-" << formatImm(-OffImm) - << markup(">"); + O << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">"); } else { - O << markup("<imm:") - << "#" << formatImm(OffImm) - << markup(">"); + O << markup("<imm:") << "#" << formatImm(OffImm) << markup(">"); } O << "]" << markup(">"); } @@ -387,10 +385,11 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, // REG REG 0,SH_OPC - e.g. R5, ROR R3 // REG 0 IMM,SH_OPC - e.g. R5, LSL #3 void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); + const MCOperand &MO3 = MI->getOperand(OpNum + 2); printRegName(O, MO1.getReg()); @@ -406,9 +405,10 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); printRegName(O, MO1.getReg()); @@ -417,28 +417,25 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum, ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup); } - //===--------------------------------------------------------------------===// // Addressing Mode #2 //===--------------------------------------------------------------------===// void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - const MCOperand &MO3 = MI->getOperand(Op+2); + const MCOperand &MO2 = MI->getOperand(Op + 1); + const MCOperand &MO3 = MI->getOperand(Op + 2); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (!MO2.getReg()) { if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0. - O << ", " - << markup("<imm:") - << "#" + O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << ARM_AM::getAM2Offset(MO3.getImm()) - << markup(">"); + << ARM_AM::getAM2Offset(MO3.getImm()) << markup(">"); } O << "]" << markup(">"); return; @@ -454,9 +451,10 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, } void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO2 = MI->getOperand(Op + 1); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); O << ", "; @@ -465,9 +463,10 @@ void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op, } void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO2 = MI->getOperand(Op + 1); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); O << ", "; @@ -476,35 +475,35 @@ void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op, } void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, STI, O); return; } #ifndef NDEBUG - const MCOperand &MO3 = MI->getOperand(Op+2); + const MCOperand &MO3 = MI->getOperand(Op + 2); unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm()); - assert(IdxMode != ARMII::IndexModePost && - "Should be pre or offset index op"); + assert(IdxMode != ARMII::IndexModePost && "Should be pre or offset index op"); #endif - printAM2PreOrOffsetIndexOp(MI, Op, O); + printAM2PreOrOffsetIndexOp(MI, Op, STI, O); } void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - O << markup("<imm:") - << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << ImmOffs + O << markup("<imm:") << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs << markup(">"); return; } @@ -524,8 +523,8 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O, bool AlwaysPrintImm0) { const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - const MCOperand &MO3 = MI->getOperand(Op+2); + const MCOperand &MO2 = MI->getOperand(Op + 1); + const MCOperand &MO3 = MI->getOperand(Op + 2); O << markup("<mem:") << '['; printRegName(O, MO1.getReg()); @@ -537,16 +536,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, return; } - //If the op is sub we have to print the immediate even if it is 0 + // If the op is sub we have to print the immediate even if it is 0 unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm()); if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) { - O << ", " - << markup("<imm:") - << "#" - << ARM_AM::getAddrOpcStr(op) - << ImmOffs + O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs << markup(">"); } O << ']' << markup(">"); @@ -554,10 +549,11 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); - if (!MO1.isReg()) { // For label symbolic references. - printOperand(MI, Op, O); + if (!MO1.isReg()) { // For label symbolic references. + printOperand(MI, Op, STI, O); return; } @@ -569,9 +565,10 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); if (MO1.getReg()) { O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())); @@ -580,56 +577,56 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - O << markup("<imm:") - << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs + O << markup("<imm:") << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs << markup(">"); } -void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, - unsigned OpNum, +void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); unsigned Imm = MO.getImm(); - O << markup("<imm:") - << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff) + O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff) << markup(">"); } void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); O << (MO2.getImm() ? "" : "-"); printRegName(O, MO1.getReg()); } -void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); unsigned Imm = MO.getImm(); - O << markup("<imm:") - << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2) + O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2) << markup(">"); } - void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { - ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum) - .getImm()); + ARM_AM::AMSubMode Mode = + ARM_AM::getAM4SubMode(MI->getOperand(OpNum).getImm()); O << ARM_AM::getAMSubModeStr(Mode); } template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, OpNum, O); + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum, STI, O); return; } @@ -639,20 +636,17 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm()); if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) { - O << ", " - << markup("<imm:") - << "#" - << ARM_AM::getAddrOpcStr(Op) - << ImmOffs * 4 - << markup(">"); + O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(Op) + << ImmOffs * 4 << markup(">"); } O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); @@ -663,6 +657,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); O << markup("<mem:") << "["; @@ -672,6 +667,7 @@ void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); if (MO.getReg() == 0) @@ -684,49 +680,47 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); uint32_t v = ~MO.getImm(); int32_t lsb = countTrailingZeros(v); - int32_t width = (32 - countLeadingZeros (v)) - lsb; + int32_t width = (32 - countLeadingZeros(v)) - lsb; assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); - O << markup("<imm:") << '#' << lsb << markup(">") - << ", " - << markup("<imm:") << '#' << width << markup(">"); + O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:") + << '#' << width << markup(">"); } void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned val = MI->getOperand(OpNum).getImm(); - O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops)); + O << ARM_MB::MemBOptToString(val, (STI.getFeatureBits() & ARM::HasV8Ops)); } void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned val = MI->getOperand(OpNum).getImm(); O << ARM_ISB::InstSyncBOptToString(val); } void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned ShiftOp = MI->getOperand(OpNum).getImm(); bool isASR = (ShiftOp & (1 << 5)) != 0; unsigned Amt = ShiftOp & 0x1f; if (isASR) { - O << ", asr " - << markup("<imm:") - << "#" << (Amt == 0 ? 32 : Amt) - << markup(">"); - } - else if (Amt) { - O << ", lsl " - << markup("<imm:") - << "#" << Amt + O << ", asr " << markup("<imm:") << "#" << (Amt == 0 ? 32 : Amt) << markup(">"); + } else if (Amt) { + O << ", lsl " << markup("<imm:") << "#" << Amt << markup(">"); } } void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); if (Imm == 0) @@ -736,6 +730,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); // A shift amount of 32 is encoded as 0. @@ -746,16 +741,19 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "{"; for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { - if (i != OpNum) O << ", "; + if (i != OpNum) + O << ", "; printRegName(O, MI->getOperand(i).getReg()); } O << "}"; } void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0)); @@ -763,8 +761,8 @@ void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum, printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1)); } - void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); if (Op.getImm()) @@ -774,16 +772,16 @@ void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); O << ARM_PROC::IModToString(Op.getImm()); } void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); unsigned IFlags = Op.getImm(); - for (int i=2; i >= 0; --i) + for (int i = 2; i >= 0; --i) if (IFlags & (1 << i)) O << ARM_PROC::IFlagsToString(1 << i); @@ -792,11 +790,12 @@ void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); unsigned SpecRegRBit = Op.getImm() >> 4; unsigned Mask = Op.getImm() & 0xf; - uint64_t FeatureBits = getAvailableFeatures(); + uint64_t FeatureBits = STI.getFeatureBits(); if (FeatureBits & ARM::FeatureMClass) { unsigned SYSm = Op.getImm(); @@ -805,14 +804,30 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, // For writes, handle extended mask bits if the DSP extension is present. if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) { switch (SYSm) { - case 0x400: O << "apsr_g"; return; - case 0xc00: O << "apsr_nzcvqg"; return; - case 0x401: O << "iapsr_g"; return; - case 0xc01: O << "iapsr_nzcvqg"; return; - case 0x402: O << "eapsr_g"; return; - case 0xc02: O << "eapsr_nzcvqg"; return; - case 0x403: O << "xpsr_g"; return; - case 0xc03: O << "xpsr_nzcvqg"; return; + case 0x400: + O << "apsr_g"; + return; + case 0xc00: + O << "apsr_nzcvqg"; + return; + case 0x401: + O << "iapsr_g"; + return; + case 0xc01: + O << "iapsr_nzcvqg"; + return; + case 0x402: + O << "eapsr_g"; + return; + case 0xc02: + O << "eapsr_nzcvqg"; + return; + case 0x403: + O << "xpsr_g"; + return; + case 0xc03: + O << "xpsr_nzcvqg"; + return; } } @@ -823,29 +838,66 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an // alias for MSR APSR_nzcvq. switch (SYSm) { - case 0: O << "apsr_nzcvq"; return; - case 1: O << "iapsr_nzcvq"; return; - case 2: O << "eapsr_nzcvq"; return; - case 3: O << "xpsr_nzcvq"; return; + case 0: + O << "apsr_nzcvq"; + return; + case 1: + O << "iapsr_nzcvq"; + return; + case 2: + O << "eapsr_nzcvq"; + return; + case 3: + O << "xpsr_nzcvq"; + return; } } switch (SYSm) { - default: llvm_unreachable("Unexpected mask value!"); - case 0: O << "apsr"; return; - case 1: O << "iapsr"; return; - case 2: O << "eapsr"; return; - case 3: O << "xpsr"; return; - case 5: O << "ipsr"; return; - case 6: O << "epsr"; return; - case 7: O << "iepsr"; return; - case 8: O << "msp"; return; - case 9: O << "psp"; return; - case 16: O << "primask"; return; - case 17: O << "basepri"; return; - case 18: O << "basepri_max"; return; - case 19: O << "faultmask"; return; - case 20: O << "control"; return; + default: + llvm_unreachable("Unexpected mask value!"); + case 0: + O << "apsr"; + return; + case 1: + O << "iapsr"; + return; + case 2: + O << "eapsr"; + return; + case 3: + O << "xpsr"; + return; + case 5: + O << "ipsr"; + return; + case 6: + O << "epsr"; + return; + case 7: + O << "iepsr"; + return; + case 8: + O << "msp"; + return; + case 9: + O << "psp"; + return; + case 16: + O << "primask"; + return; + case 17: + O << "basepri"; + return; + case 18: + O << "basepri_max"; + return; + case 19: + O << "faultmask"; + return; + case 20: + O << "control"; + return; } } @@ -854,10 +906,17 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) { O << "APSR_"; switch (Mask) { - default: llvm_unreachable("Unexpected mask value!"); - case 4: O << "g"; return; - case 8: O << "nzcvq"; return; - case 12: O << "nzcvqg"; return; + default: + llvm_unreachable("Unexpected mask value!"); + case 4: + O << "g"; + return; + case 8: + O << "nzcvq"; + return; + case 12: + O << "nzcvqg"; + return; } } @@ -868,14 +927,19 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, if (Mask) { O << '_'; - if (Mask & 8) O << 'f'; - if (Mask & 4) O << 's'; - if (Mask & 2) O << 'x'; - if (Mask & 1) O << 'c'; + if (Mask & 8) + O << 'f'; + if (Mask & 4) + O << 's'; + if (Mask & 2) + O << 'x'; + if (Mask & 1) + O << 'c'; } } void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { uint32_t Banked = MI->getOperand(OpNum).getImm(); uint32_t R = (Banked & 0x20) >> 5; @@ -886,25 +950,40 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum, if (R) { O << "SPSR_"; - switch(SysM) { - case 0x0e: O << "fiq"; return; - case 0x10: O << "irq"; return; - case 0x12: O << "svc"; return; - case 0x14: O << "abt"; return; - case 0x16: O << "und"; return; - case 0x1c: O << "mon"; return; - case 0x1e: O << "hyp"; return; - default: llvm_unreachable("Invalid banked SPSR register"); + switch (SysM) { + case 0x0e: + O << "fiq"; + return; + case 0x10: + O << "irq"; + return; + case 0x12: + O << "svc"; + return; + case 0x14: + O << "abt"; + return; + case 0x16: + O << "und"; + return; + case 0x1c: + O << "mon"; + return; + case 0x1e: + O << "hyp"; + return; + default: + llvm_unreachable("Invalid banked SPSR register"); } } assert(!R && "should have dealt with SPSR regs"); const char *RegNames[] = { - "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "", - "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "", - "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt", "sp_abt", "lr_und", "sp_und", - "", "", "", "", "lr_mon", "sp_mon", "elr_hyp", "sp_hyp" - }; + "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", + "", "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", + "lr_fiq", "", "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt", + "sp_abt", "lr_und", "sp_und", "", "", "", "", + "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"}; const char *Name = RegNames[SysM]; assert(Name[0] && "invalid banked register operand"); @@ -912,6 +991,7 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); // Handle the undefined 15 CC value here for printing so we don't abort(). @@ -923,12 +1003,14 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); O << ARMCondCodeToString(CC); } void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { if (MI->getOperand(OpNum).getReg()) { assert(MI->getOperand(OpNum).getReg() == ARM::CPSR && @@ -938,33 +1020,38 @@ void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << MI->getOperand(OpNum).getImm(); } void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "p" << MI->getOperand(OpNum).getImm(); } void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "c" << MI->getOperand(OpNum).getImm(); } void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "{" << MI->getOperand(OpNum).getImm() << "}"; } void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, raw_ostream &O) { llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); } -template<unsigned scale> +template <unsigned scale> void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); if (MO.isExpr()) { @@ -985,25 +1072,26 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { - O << markup("<imm:") - << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4) + O << markup("<imm:") << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4) << markup(">"); } void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - O << markup("<imm:") - << "#" << formatImm((Imm == 0 ? 32 : Imm)) + O << markup("<imm:") << "#" << formatImm((Imm == 0 ? 32 : Imm)) << markup(">"); } void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { // (3 - the number of trailing zeros) is the number of then / else. unsigned Mask = MI->getOperand(OpNum).getImm(); - unsigned Firstcond = MI->getOperand(OpNum-1).getImm(); + unsigned Firstcond = MI->getOperand(OpNum - 1).getImm(); unsigned CondBit0 = Firstcond & 1; unsigned NumTZ = countTrailingZeros(Mask); assert(NumTZ <= 3 && "Invalid IT mask!"); @@ -1017,12 +1105,13 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op + 1); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, STI, O); return; } @@ -1037,22 +1126,21 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op, void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O, unsigned Scale) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op + 1); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, STI, O); return; } O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (unsigned ImmOffs = MO2.getImm()) { - O << ", " - << markup("<imm:") - << "#" << formatImm(ImmOffs * Scale) + O << ", " << markup("<imm:") << "#" << formatImm(ImmOffs * Scale) << markup(">"); } O << "]" << markup(">"); @@ -1060,25 +1148,29 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { - printThumbAddrModeImm5SOperand(MI, Op, O, 1); + printThumbAddrModeImm5SOperand(MI, Op, STI, O, 1); } void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { - printThumbAddrModeImm5SOperand(MI, Op, O, 2); + printThumbAddrModeImm5SOperand(MI, Op, STI, O, 2); } void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { - printThumbAddrModeImm5SOperand(MI, Op, O, 4); + printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4); } void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op, + const MCSubtargetInfo &STI, raw_ostream &O) { - printThumbAddrModeImm5SOperand(MI, Op, O, 4); + printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4); } // Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 @@ -1086,9 +1178,10 @@ void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op, // REG 0 0 - e.g. R5 // REG IMM, SH_OPC - e.g. R5, LSL #3 void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); unsigned Reg = MO1.getReg(); printRegName(O, Reg); @@ -1101,12 +1194,13 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, OpNum, O); + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum, STI, O); return; } @@ -1119,26 +1213,20 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, if (OffImm == INT32_MIN) OffImm = 0; if (isSub) { - O << ", " - << markup("<imm:") - << "#-" << formatImm(-OffImm) - << markup(">"); - } - else if (AlwaysPrintImm0 || OffImm > 0) { - O << ", " - << markup("<imm:") - << "#" << formatImm(OffImm) - << markup(">"); + O << ", " << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">"); + } else if (AlwaysPrintImm0 || OffImm > 0) { + O << ", " << markup("<imm:") << "#" << formatImm(OffImm) << markup(">"); } O << "]" << markup(">"); } -template<bool AlwaysPrintImm0> +template <bool AlwaysPrintImm0> void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); @@ -1149,28 +1237,23 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, if (OffImm == INT32_MIN) OffImm = 0; if (isSub) { - O << ", " - << markup("<imm:") - << "#-" << -OffImm - << markup(">"); + O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">"); } else if (AlwaysPrintImm0 || OffImm > 0) { - O << ", " - << markup("<imm:") - << "#" << OffImm - << markup(">"); + O << ", " << markup("<imm:") << "#" << OffImm << markup(">"); } O << "]" << markup(">"); } -template<bool AlwaysPrintImm0> +template <bool AlwaysPrintImm0> void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); - if (!MO1.isReg()) { // For label symbolic references. - printOperand(MI, OpNum, O); + if (!MO1.isReg()) { // For label symbolic references. + printOperand(MI, OpNum, STI, O); return; } @@ -1186,39 +1269,31 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, if (OffImm == INT32_MIN) OffImm = 0; if (isSub) { - O << ", " - << markup("<imm:") - << "#-" << -OffImm - << markup(">"); + O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">"); } else if (AlwaysPrintImm0 || OffImm > 0) { - O << ", " - << markup("<imm:") - << "#" << OffImm - << markup(">"); + O << ", " << markup("<imm:") << "#" << OffImm << markup(">"); } O << "]" << markup(">"); } -void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (MO2.getImm()) { - O << ", " - << markup("<imm:") - << "#" << formatImm(MO2.getImm() * 4) + O << ", " << markup("<imm:") << "#" << formatImm(MO2.getImm() * 4) << markup(">"); } O << "]" << markup(">"); } -void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printT2AddrModeImm8OffsetOperand( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm(); O << ", " << markup("<imm:"); @@ -1231,9 +1306,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, O << markup(">"); } -void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm(); @@ -1251,10 +1326,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); + const MCOperand &MO2 = MI->getOperand(OpNum + 1); + const MCOperand &MO3 = MI->getOperand(OpNum + 2); O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); @@ -1266,71 +1342,74 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, unsigned ShAmt = MO3.getImm(); if (ShAmt) { assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); - O << ", lsl " - << markup("<imm:") - << "#" << ShAmt - << markup(">"); + O << ", lsl " << markup("<imm:") << "#" << ShAmt << markup(">"); } O << "]" << markup(">"); } void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); - O << markup("<imm:") - << '#' << ARM_AM::getFPImmFloat(MO.getImm()) + O << markup("<imm:") << '#' << ARM_AM::getFPImmFloat(MO.getImm()) << markup(">"); } void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned EncodedImm = MI->getOperand(OpNum).getImm(); unsigned EltBits; uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); - O << markup("<imm:") - << "#0x"; + O << markup("<imm:") << "#0x"; O.write_hex(Val); O << markup(">"); } void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - O << markup("<imm:") - << "#" << formatImm(Imm + 1) - << markup(">"); + O << markup("<imm:") << "#" << formatImm(Imm + 1) << markup(">"); } void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); if (Imm == 0) return; - O << ", ror " - << markup("<imm:") - << "#"; + O << ", ror " << markup("<imm:") << "#"; switch (Imm) { - default: assert (0 && "illegal ror immediate!"); - case 1: O << "8"; break; - case 2: O << "16"; break; - case 3: O << "24"; break; + default: + assert(0 && "illegal ror immediate!"); + case 1: + O << "8"; + break; + case 2: + O << "16"; + break; + case 3: + O << "24"; + break; } O << markup(">"); } void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { MCOperand Op = MI->getOperand(OpNum); // Support for fixups (MCFixup) if (Op.isExpr()) - return printOperand(MI, OpNum, O); + return printOperand(MI, OpNum, STI, O); unsigned Bits = Op.getImm() & 0xFF; unsigned Rot = (Op.getImm() & 0xF00) >> 7; - bool PrintUnsigned = false; - switch (MI->getOpcode()){ + bool PrintUnsigned = false; + switch (MI->getOpcode()) { case ARM::MOVi: // Movs to PC should be treated unsigned PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC); @@ -1354,36 +1433,30 @@ void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum, } // Explicit #bits, #rot implied - O << "#" - << markup("<imm:") - << Bits - << markup(">") - << ", #" - << markup("<imm:") - << Rot - << markup(">"); + O << "#" << markup("<imm:") << Bits << markup(">") << ", #" << markup("<imm:") + << Rot << markup(">"); } void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << markup("<imm:") - << "#" << 16 - MI->getOperand(OpNum).getImm() + const MCSubtargetInfo &STI, raw_ostream &O) { + O << markup("<imm:") << "#" << 16 - MI->getOperand(OpNum).getImm() << markup(">"); } void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << markup("<imm:") - << "#" << 32 - MI->getOperand(OpNum).getImm() + const MCSubtargetInfo &STI, raw_ostream &O) { + O << markup("<imm:") << "#" << 32 - MI->getOperand(OpNum).getImm() << markup(">"); } void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "[" << MI->getOperand(OpNum).getImm() << "]"; } void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "{"; printRegName(O, MI->getOperand(OpNum).getReg()); @@ -1391,7 +1464,8 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + const MCSubtargetInfo &STI, + raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); @@ -1402,8 +1476,8 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, O << "}"; } -void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, - unsigned OpNum, +void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); @@ -1416,6 +1490,7 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, } void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the @@ -1430,6 +1505,7 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the @@ -1447,6 +1523,7 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { O << "{"; printRegName(O, MI->getOperand(OpNum).getReg()); @@ -1455,6 +1532,7 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); @@ -1468,6 +1546,7 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the @@ -1482,8 +1561,9 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, } void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { + unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. @@ -1498,9 +1578,9 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, O << "[]}"; } -void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printVectorListTwoSpacedAllLanes( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); @@ -1511,24 +1591,24 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, O << "[]}"; } -void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printVectorListThreeSpacedAllLanes( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. O << "{"; printRegName(O, MI->getOperand(OpNum).getReg()); - O << "[], "; + O << "[], "; printRegName(O, MI->getOperand(OpNum).getReg() + 2); O << "[], "; printRegName(O, MI->getOperand(OpNum).getReg() + 4); O << "[]}"; } -void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printVectorListFourSpacedAllLanes( + const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. @@ -1545,6 +1625,7 @@ void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the @@ -1558,9 +1639,9 @@ void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, O << "}"; } -void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index f179e01..3927c9f 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -24,146 +24,207 @@ class MCOperand; class ARMInstPrinter : public MCInstPrinter { public: ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; void printRegName(raw_ostream &OS, unsigned RegNo) const override; // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); + void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - - void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + + void printSORegRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSORegImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + + void printAddrModeTBB(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddrModeTBH(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); template <bool AlwaysPrintImm0> - void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O, bool AlwaysPrintImm0); void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); - void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); template <bool AlwaysPrintImm0> - void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); - void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printInstSyncBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printMemBOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printInstSyncBOption(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printShiftImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); template <unsigned scale> - void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printThumbSRImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printThumbITMask(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O, unsigned Scale); void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); - void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - template<bool AlwaysPrintImm0> + void printT2SOOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + template <bool AlwaysPrintImm0> void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - template<bool AlwaysPrintImm0> + const MCSubtargetInfo &STI, raw_ostream &O); + template <bool AlwaysPrintImm0> void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - template<bool AlwaysPrintImm0> + const MCSubtargetInfo &STI, raw_ostream &O); + template <bool AlwaysPrintImm0> void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, + raw_ostream &O); void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + + void printSetendOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printCPSIMod(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printCPSIFlag(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printBankedRegOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printPredicateOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printSBitModifierOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + void printRegisterList(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printNoHashImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printPImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printCImmediate(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printRotImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printModImmOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printGPRPairOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + + void printPCLabel(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + void printFBits16(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printFBits32(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printVectorIndex(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printVectorListOne(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printVectorListTwo(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); + void printVectorListThree(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printVectorListFour(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + const MCSubtargetInfo &STI, raw_ostream &O); }; } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 0b2e3b0..590d72f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -153,18 +153,20 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) { } } // end anonymous namespace -static unsigned getRelaxedOpcode(unsigned Op) { +unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const { + bool HasThumb2 = STI->getFeatureBits() & ARM::FeatureThumb2; + switch (Op) { default: return Op; case ARM::tBcc: - return ARM::t2Bcc; + return HasThumb2 ? (unsigned)ARM::t2Bcc : Op; case ARM::tLDRpci: - return ARM::t2LDRpci; + return HasThumb2 ? (unsigned)ARM::t2LDRpci : Op; case ARM::tADR: - return ARM::t2ADR; + return HasThumb2 ? (unsigned)ARM::t2ADR : Op; case ARM::tB: - return ARM::t2B; + return HasThumb2 ? (unsigned)ARM::t2B : Op; case ARM::tCBZ: return ARM::tHINT; case ARM::tCBNZ: @@ -589,7 +591,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 && (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { if (A) { - const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); + const MCSymbol &Sym = A->getSymbol(); if (Asm.isThumbFunc(&Sym)) Value |= 1; } @@ -598,7 +600,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // the basic blocks of the same function. Thus, we would like to resolve // the offset when the destination has the same MCFragment. if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { - const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); + const MCSymbol &Sym = A->getSymbol(); const MCSymbolData &SymData = Asm.getSymbolData(Sym); IsResolved = (SymData.getFragment() == DF); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index f4f1082..4fa8c79 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -47,6 +47,8 @@ public: void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const override; + unsigned getRelaxedOpcode(unsigned Op) const; + bool mayNeedRelaxation(const MCInst &Inst) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index 3bd7ab7..ebef789 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -23,7 +23,7 @@ public: HasDataInCodeSupport = true; } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM, Subtype); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h index 4efd325..263c4c4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h @@ -18,7 +18,7 @@ public: ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle) : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createARMELFObjectWriter(OS, OSABI, isLittle()); } }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h index 33be347..f2c4358 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h @@ -17,7 +17,7 @@ class ARMAsmBackendWinCOFF : public ARMAsmBackend { public: ARMAsmBackendWinCOFF(const Target &T, StringRef Triple) : ARMAsmBackend(T, Triple, true) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); } }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index a821a6b..f4fedee 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -32,7 +32,7 @@ namespace { public: ARMELFObjectWriter(uint8_t OSABI); - virtual ~ARMELFObjectWriter(); + ~ARMELFObjectWriter() override; unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -81,7 +81,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, unsigned Type = 0; if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("Unimplemented"); + default: + report_fatal_error("unsupported relocation on symbol"); + return ELF::R_ARM_NONE; case FK_Data_4: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); @@ -147,7 +149,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, } } else { switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("invalid fixup kind!"); + default: + report_fatal_error("unsupported relocation on symbol"); + return ELF::R_ARM_NONE; case FK_Data_1: switch (Modifier) { default: llvm_unreachable("unsupported Modifier"); @@ -247,7 +251,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, return Type; } -MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, bool IsLittleEndian) { MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI); diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 9648ffa..e7c777e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -472,7 +472,7 @@ class ARMELFStreamer : public MCELFStreamer { public: friend class ARMTargetELFStreamer; - ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool IsThumb) : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { @@ -1083,14 +1083,13 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, } // Get .ARM.extab or .ARM.exidx section - const MCSectionELF *EHSection = nullptr; - if (const MCSymbol *Group = FnSection.getGroup()) { - EHSection = - getContext().getELFSection(EHSecName, Type, Flags | ELF::SHF_GROUP, - FnSection.getEntrySize(), Group->getName()); - } else { - EHSection = getContext().getELFSection(EHSecName, Type, Flags); - } + const MCSymbol *Group = FnSection.getGroup(); + if (Group) + Flags |= ELF::SHF_GROUP; + const MCSectionELF *EHSection = + getContext().getELFSection(EHSecName, Type, Flags, 0, Group, + FnSection.getUniqueID(), nullptr, &FnSection); + assert(EHSection && "Failed to get the required EH section"); // Switch to .ARM.extab or .ARM.exidx section @@ -1383,8 +1382,9 @@ MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, } MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool IsThumb) { + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IsThumb) { ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); // FIXME: This should eventually end up somewhere else where more // intelligent flag decisions can be made. For now we are just maintaining diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index e48cabb..6b650f0 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -48,7 +48,7 @@ public: : MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) { } - ~ARMMCCodeEmitter() {} + ~ARMMCCodeEmitter() override {} bool isThumb(const MCSubtargetInfo &STI) const { return (STI.getFeatureBits() & ARM::ModeThumb) != 0; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 7ff7f9a..daa8af2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -255,7 +255,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU, std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS.str(); + ArchFS = (Twine(ArchFS) + "," + FS).str(); else ArchFS = FS; } @@ -310,27 +310,26 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, } static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, - MCAsmBackend &MAB, raw_ostream &OS, + MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, T.getArch() == Triple::thumb); } static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll, bool DWARFMustBeAtTheEnd) { return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd); } -static MCInstPrinter *createARMMCInstPrinter(const Target &T, +static MCInstPrinter *createARMMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI, MII, MRI, STI); + return new ARMInstPrinter(MAI, MII, MRI); return nullptr; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 7e9ba66..24ca567 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -34,6 +34,7 @@ class StringRef; class Target; class Triple; class raw_ostream; +class raw_pwrite_stream; extern Target TheARMLETarget, TheThumbLETarget; extern Target TheARMBETarget, TheThumbBETarget; @@ -41,9 +42,8 @@ extern Target TheARMBETarget, TheThumbBETarget; namespace ARM_MC { std::string ParseARMTriple(StringRef TT, StringRef CPU); - /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance. - /// This is exposed so Asm parser, etc. do not need to go through - /// TargetRegistry. + /// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc. + /// do not need to go through TargetRegistry. MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS); } @@ -83,24 +83,23 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI // Construct a PE/COFF machine code streamer which will generate a PE/COFF // object file. MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll); + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll); -/// createARMELFObjectWriter - Construct an ELF Mach-O object writer. -MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, +/// Construct an ELF Mach-O object writer. +MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, bool IsLittleEndian); -/// createARMMachObjectWriter - Construct an ARM Mach-O object writer. -MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, - bool Is64Bit, +/// Construct an ARM Mach-O object writer. +MCObjectWriter *createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype); -/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer. -MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit); +/// Construct an ARM PE/COFF object writer. +MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit); -/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info. +/// Construct ARM Mach-O relocation info. MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx); } // End llvm namespace diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 3187d36..b1f9b58 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -88,6 +88,7 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_thumb_br: return false; // Handle 24-bit branch kinds. @@ -101,12 +102,6 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, Log2Size = llvm::Log2_32(4); return true; - // Handle Thumb branches. - case ARM::fixup_arm_thumb_br: - RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22); - Log2Size = llvm::Log2_32(2); - return true; - case ARM::fixup_t2_uncondbranch: case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: @@ -477,9 +472,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint32_t CPUType, +MCObjectWriter *llvm::createARMMachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit, CPUType, diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 2fd6445..166c04b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -26,7 +26,7 @@ public: : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) { assert(!Is64Bit && "AArch64 support not yet implemented"); } - virtual ~ARMWinCOFFObjectWriter() { } + ~ARMWinCOFFObjectWriter() override {} unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, @@ -82,7 +82,8 @@ bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { } namespace llvm { -MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) { +MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit) { MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit); return createWinCOFFObjectWriter(MOTW, OS); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index dc707dc..b993b1b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -16,8 +16,8 @@ namespace { class ARMWinCOFFStreamer : public MCWinCOFFStreamer { public: ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, - raw_ostream &OS) - : MCWinCOFFStreamer(C, AB, CE, OS) { } + raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, AB, CE, OS) {} void EmitAssemblerFlag(MCAssemblerFlag Flag) override; void EmitThumbFunc(MCSymbol *Symbol) override; @@ -38,7 +38,8 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { } MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context, - MCAsmBackend &MAB, raw_ostream &OS, + MCAsmBackend &MAB, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS); diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index b91b0e1..b2599fe 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -132,7 +132,7 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) { } case ISD::FrameIndex: { - int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex(); + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); EVT VT = Node->getValueType(0); SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); unsigned Opc = BPF::MOV_rr; diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index d94416b..37f9164 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -63,11 +63,11 @@ public: std::string Str; raw_string_ostream OS(Str); - if (DLoc.isUnknown() == false) { - DILocation DIL(DLoc.getAsMDNode(Fn.getContext())); - StringRef Filename = DIL.getFilename(); - unsigned Line = DIL.getLineNumber(); - unsigned Column = DIL.getColumnNumber(); + if (DLoc) { + auto DIL = DLoc.get(); + StringRef Filename = DIL->getFilename(); + unsigned Line = DIL->getLine(); + unsigned Column = DIL->getColumn(); OS << Filename << ':' << Line << ':' << Column << ' '; } @@ -137,7 +137,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); - setOperationAction(ISD::BSWAP, MVT::i64, Expand); setOperationAction(ISD::CTTZ, MVT::i64, Custom); setOperationAction(ISD::CTLZ, MVT::i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); @@ -538,12 +537,10 @@ SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op, MachineBasicBlock * BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - unsigned Opc = MI->getOpcode(); - const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); - assert(Opc == BPF::Select && "Unexpected instr type to insert"); + assert(MI->getOpcode() == BPF::Select && "Unexpected instr type to insert"); // To "insert" a SELECT instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td index 47001f0..26b2cfe 100644 --- a/lib/Target/BPF/BPFInstrInfo.td +++ b/lib/Target/BPF/BPFInstrInfo.td @@ -231,8 +231,6 @@ class MOV_RI<string OpcodeStr> let BPFSrc = 0; // BPF_K let BPFClass = 7; // BPF_ALU64 } -def MOV_rr : MOV_RR<"mov">; -def MOV_ri : MOV_RI<"mov">; class LD_IMM64<bits<4> Pseudo, string OpcodeStr> : InstBPF<(outs GPR:$dst), (ins u64imm:$imm), @@ -255,7 +253,35 @@ class LD_IMM64<bits<4> Pseudo, string OpcodeStr> let size = 3; // BPF_DW let BPFClass = 0; // BPF_LD } + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def LD_imm64 : LD_IMM64<0, "ld_64">; +def MOV_rr : MOV_RR<"mov">; +def MOV_ri : MOV_RI<"mov">; +} + +def LD_pseudo + : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm), + "ld_pseudo\t$dst, $pseudo, $imm", + [(set GPR:$dst, (int_bpf_pseudo imm:$pseudo, imm:$imm))]> { + + bits<3> mode; + bits<2> size; + bits<4> dst; + bits<64> imm; + bits<4> pseudo; + + let Inst{63-61} = mode; + let Inst{60-59} = size; + let Inst{51-48} = dst; + let Inst{55-52} = pseudo; + let Inst{47-32} = 0; + let Inst{31-0} = imm{31-0}; + + let mode = 0; // BPF_IMM + let size = 3; // BPF_DW + let BPFClass = 0; // BPF_LD +} // STORE instructions class STORE<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern> @@ -461,6 +487,33 @@ def XADD64 : XADD<3, "xadd64", atomic_load_add_64>; // undefined def XADD8 : XADD<2, "xadd8", atomic_load_add_8>; } +// bswap16, bswap32, bswap64 +class BSWAP<bits<32> SizeOp, string OpcodeStr, list<dag> Pattern> + : InstBPF<(outs GPR:$dst), (ins GPR:$src), + !strconcat(OpcodeStr, "\t$dst"), + Pattern> { + bits<4> op; + bits<1> BPFSrc; + bits<4> dst; + bits<32> imm; + + let Inst{63-60} = op; + let Inst{59} = BPFSrc; + let Inst{51-48} = dst; + let Inst{31-0} = imm; + + let op = 0xd; // BPF_END + let BPFSrc = 1; // BPF_TO_BE (TODO: use BPF_TO_LE for big-endian target) + let BPFClass = 4; // BPF_ALU + let imm = SizeOp; +} + +let Constraints = "$dst = $src" in { +def BSWAP16 : BSWAP<16, "bswap16", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>; +def BSWAP32 : BSWAP<32, "bswap32", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>; +def BSWAP64 : BSWAP<64, "bswap64", [(set GPR:$dst, (bswap GPR:$src))]>; +} + let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1, hasExtraDefRegAllocReq = 1, hasExtraSrcRegAllocReq = 1, mayLoad = 1 in { class LOAD_ABS<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode> diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp index 3f09379..05f6d82 100644 --- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp +++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp @@ -27,7 +27,7 @@ using namespace llvm; #include "BPFGenAsmWriter.inc" void BPFInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { printInstruction(MI, O); printAnnotation(O, Annot); } diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h index d7c2899..adcaff6 100644 --- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h +++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h @@ -25,7 +25,8 @@ public: const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier = nullptr); void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O, diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 87c8077..8393135 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -31,7 +31,7 @@ public: void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const override; - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override; + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; // No instruction requires relaxation bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -71,7 +71,7 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, *(uint16_t *)&Data[Fixup.getOffset() + 2] = (uint16_t)((Value - 8) / 8); } -MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_ostream &OS) const { +MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createBPFELFObjectWriter(OS, 0); } } diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index 169a8a7..a5562c1 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -47,7 +47,7 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target, } } -MCObjectWriter *llvm::createBPFELFObjectWriter(raw_ostream &OS, uint8_t OSABI) { +MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { MCELFObjectTargetWriter *MOTW = new BPFELFObjectWriter(OSABI); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index 9c51d66..32d2ef5 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -76,9 +76,8 @@ unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI, assert(MO.isExpr()); const MCExpr *Expr = MO.getExpr(); - MCExpr::ExprKind Kind = Expr->getKind(); - assert(Kind == MCExpr::SymbolRef); + assert(Expr->getKind() == MCExpr::SymbolRef); if (MI.getOpcode() == BPF::JAL) // func call name @@ -125,7 +124,7 @@ void BPFMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, // Keep track of the current byte being emitted unsigned CurByte = 0; - if (Opcode == BPF::LD_imm64) { + if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) { uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); EmitByte(Value >> 56, CurByte, OS); EmitByte(((Value >> 48) & 0xff), CurByte, OS); diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index fd04001..95f0b02 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -63,15 +63,16 @@ static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCStreamer *createBPFMCStreamer(const Triple &T, MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); } -static MCInstPrinter * -createBPFMCInstPrinter(const Target &T, unsigned SyntaxVariant, - const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { +static MCInstPrinter *createBPFMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) return new BPFInstPrinter(MAI, MII, MRI); return 0; diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index 1fd2bec..ce08b7c 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -28,6 +28,7 @@ class MCSubtargetInfo; class Target; class StringRef; class raw_ostream; +class raw_pwrite_stream; extern Target TheBPFTarget; @@ -38,7 +39,7 @@ MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createBPFELFObjectWriter(raw_ostream &OS, uint8_t OSABI); +MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); } // Defines symbolic names for BPF registers. This defines a mapping from diff --git a/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt b/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt index 209d17c..8bca2e3 100644 --- a/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = BPFDesc parent = BPF -required_libraries = MC BPFAsmPrinter BPFInfo +required_libraries = MC BPFAsmPrinter BPFInfo Support add_to_library_groups = BPF diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index d0e2010..f1a7127 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -15,6 +15,7 @@ #include "CPPTargetMachine.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Config/config.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -91,6 +92,7 @@ namespace { /// CppWriter - This class is the main chunk of code that converts an LLVM /// module to a C++ translation unit. class CppWriter : public ModulePass { + std::unique_ptr<formatted_raw_ostream> OutOwner; formatted_raw_ostream &Out; const Module *TheModule; uint64_t uniqueNum; @@ -105,8 +107,9 @@ namespace { public: static char ID; - explicit CppWriter(formatted_raw_ostream &o) : - ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} + explicit CppWriter(std::unique_ptr<formatted_raw_ostream> o) + : ModulePass(ID), OutOwner(std::move(o)), Out(*OutOwner), uniqueNum(0), + is_inline(false), indent_level(0) {} const char *getPassName() const override { return "C++ backend"; } @@ -1721,7 +1724,7 @@ void CppWriter::printFunctionUses(const Function* F) { // initializers. if (GenerationType != GenFunction) { nl(Out) << "// Global Variable Definitions"; nl(Out); - for (const auto &GV : gvs) { + for (auto *GV : gvs) { if (GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) printVariableBody(Var); } @@ -2146,13 +2149,12 @@ char CppWriter::ID = 0; // External Interface declaration //===----------------------------------------------------------------------===// -bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - bool DisableVerify, - AnalysisID StartAfter, - AnalysisID StopAfter) { - if (FileType != TargetMachine::CGFT_AssemblyFile) return true; - PM.add(new CppWriter(o)); +bool CPPTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &o, CodeGenFileType FileType, + bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { + if (FileType != TargetMachine::CGFT_AssemblyFile) + return true; + auto FOut = llvm::make_unique<formatted_raw_ostream>(o); + PM.add(new CppWriter(std::move(FOut))); return false; } diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 678a932..02d705e 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -29,7 +29,7 @@ struct CPPTargetMachine : public TargetMachine { : TargetMachine(T, "", TT, CPU, FS, Options) {} public: - bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, + bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) override; diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index c6ffb96..758ccc7 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_target(HexagonCodeGen HexagonAsmPrinter.cpp HexagonCFGOptimizer.cpp HexagonCopyToCombine.cpp + HexagonExpandCondsets.cpp HexagonExpandPredSpillCode.cpp HexagonFixupHwLoops.cpp HexagonFrameLowering.cpp diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 180762f..f0c81e0 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -222,21 +222,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } -static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - if (SyntaxVariant == 0) - return(new HexagonInstPrinter(MAI, MII, MRI)); - else - return nullptr; -} - extern "C" void LLVMInitializeHexagonAsmPrinter() { RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget); - - TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, - createHexagonMCInstPrinter); } diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp new file mode 100644 index 0000000..37ed173 --- /dev/null +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -0,0 +1,1348 @@ +// Replace mux instructions with the corresponding legal instructions. +// It is meant to work post-SSA, but still on virtual registers. It was +// originally placed between register coalescing and machine instruction +// scheduler. +// In this place in the optimization sequence, live interval analysis had +// been performed, and the live intervals should be preserved. A large part +// of the code deals with preserving the liveness information. +// +// Liveness tracking aside, the main functionality of this pass is divided +// into two steps. The first step is to replace an instruction +// vreg0 = C2_mux vreg0, vreg1, vreg2 +// with a pair of conditional transfers +// vreg0 = A2_tfrt vreg0, vreg1 +// vreg0 = A2_tfrf vreg0, vreg2 +// It is the intention that the execution of this pass could be terminated +// after this step, and the code generated would be functionally correct. +// +// If the uses of the source values vreg1 and vreg2 are kills, and their +// definitions are predicable, then in the second step, the conditional +// transfers will then be rewritten as predicated instructions. E.g. +// vreg0 = A2_or vreg1, vreg2 +// vreg3 = A2_tfrt vreg99, vreg0<kill> +// will be rewritten as +// vreg3 = A2_port vreg99, vreg1, vreg2 +// +// This replacement has two variants: "up" and "down". Consider this case: +// vreg0 = A2_or vreg1, vreg2 +// ... [intervening instructions] ... +// vreg3 = A2_tfrt vreg99, vreg0<kill> +// variant "up": +// vreg3 = A2_port vreg99, vreg1, vreg2 +// ... [intervening instructions, vreg0->vreg3] ... +// [deleted] +// variant "down": +// [deleted] +// ... [intervening instructions] ... +// vreg3 = A2_port vreg99, vreg1, vreg2 +// +// Both, one or none of these variants may be valid, and checks are made +// to rule out inapplicable variants. +// +// As an additional optimization, before either of the two steps above is +// executed, the pass attempts to coalesce the target register with one of +// the source registers, e.g. given an instruction +// vreg3 = C2_mux vreg0, vreg1, vreg2 +// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds, +// the instruction would then be (for example) +// vreg3 = C2_mux vreg0, vreg3, vreg2 +// and, under certain circumstances, this could result in only one predicated +// instruction: +// vreg3 = A2_tfrf vreg0, vreg2 +// + +#define DEBUG_TYPE "expand-condsets" +#include "HexagonTargetMachine.h" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit", + cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions")); +static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit", + cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings")); + +namespace llvm { + void initializeHexagonExpandCondsetsPass(PassRegistry&); + FunctionPass *createHexagonExpandCondsets(); +} + +namespace { + class HexagonExpandCondsets : public MachineFunctionPass { + public: + static char ID; + HexagonExpandCondsets() : + MachineFunctionPass(ID), HII(0), TRI(0), MRI(0), + LIS(0), CoaLimitActive(false), + TfrLimitActive(false), CoaCounter(0), TfrCounter(0) { + if (OptCoaLimit.getPosition()) + CoaLimitActive = true, CoaLimit = OptCoaLimit; + if (OptTfrLimit.getPosition()) + TfrLimitActive = true, TfrLimit = OptTfrLimit; + initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); + } + + virtual const char *getPassName() const { + return "Hexagon Expand Condsets"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + const HexagonInstrInfo *HII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveIntervals *LIS; + + bool CoaLimitActive, TfrLimitActive; + unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter; + + struct RegisterRef { + RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()), + Sub(Op.getSubReg()) {} + RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {} + bool operator== (RegisterRef RR) const { + return Reg == RR.Reg && Sub == RR.Sub; + } + bool operator!= (RegisterRef RR) const { return !operator==(RR); } + unsigned Reg, Sub; + }; + + typedef DenseMap<unsigned,unsigned> ReferenceMap; + enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) }; + enum { Exec_Then = 0x10, Exec_Else = 0x20 }; + unsigned getMaskForSub(unsigned Sub); + bool isCondset(const MachineInstr *MI); + + void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec); + bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec); + + LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S); + LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S); + void makeDefined(unsigned Reg, SlotIndex S, bool SetDef); + void makeUndead(unsigned Reg, SlotIndex S); + void shrinkToUses(unsigned Reg, LiveInterval &LI); + void updateKillFlags(unsigned Reg, LiveInterval &LI); + void terminateSegment(LiveInterval::iterator LT, SlotIndex S, + LiveInterval &LI); + void addInstrToLiveness(MachineInstr *MI); + void removeInstrFromLiveness(MachineInstr *MI); + + unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond); + MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR, + unsigned DstSR, const MachineOperand &PredOp, bool Cond); + bool split(MachineInstr *MI); + bool splitInBlock(MachineBasicBlock &B); + + bool isPredicable(MachineInstr *MI); + MachineInstr *getReachingDefForPred(RegisterRef RD, + MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond); + bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses); + bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown); + void predicateAt(RegisterRef RD, MachineInstr *MI, + MachineBasicBlock::iterator Where, unsigned PredR, bool Cond); + void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR, + bool Cond, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Last); + bool predicate(MachineInstr *TfrI, bool Cond); + bool predicateInBlock(MachineBasicBlock &B); + + void postprocessUndefImplicitUses(MachineBasicBlock &B); + void removeImplicitUses(MachineInstr *MI); + void removeImplicitUses(MachineBasicBlock &B); + + bool isIntReg(RegisterRef RR, unsigned &BW); + bool isIntraBlocks(LiveInterval &LI); + bool coalesceRegisters(RegisterRef R1, RegisterRef R2); + bool coalesceSegments(MachineFunction &MF); + }; +} + +char HexagonExpandCondsets::ID = 0; + + +unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { + switch (Sub) { + case Hexagon::subreg_loreg: + return Sub_Low; + case Hexagon::subreg_hireg: + return Sub_High; + case Hexagon::NoSubRegister: + return Sub_None; + } + llvm_unreachable("Invalid subregister"); +} + + +bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::C2_mux: + case Hexagon::C2_muxii: + case Hexagon::C2_muxir: + case Hexagon::C2_muxri: + case Hexagon::MUX64_rr: + return true; + break; + } + return false; +} + + +void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, + unsigned Exec) { + unsigned Mask = getMaskForSub(RR.Sub) | Exec; + ReferenceMap::iterator F = Map.find(RR.Reg); + if (F == Map.end()) + Map.insert(std::make_pair(RR.Reg, Mask)); + else + F->second |= Mask; +} + + +bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, + unsigned Exec) { + ReferenceMap::iterator F = Map.find(RR.Reg); + if (F == Map.end()) + return false; + unsigned Mask = getMaskForSub(RR.Sub) | Exec; + if (Mask & F->second) + return true; + return false; +} + + +LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI, + SlotIndex S) { + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (I->start >= S) + return I; + } + return LI.end(); +} + + +LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI, + SlotIndex S) { + LiveInterval::iterator P = LI.end(); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (I->end > S) + return P; + P = I; + } + return P; +} + + +/// Find the implicit use of register Reg in slot index S, and make sure +/// that the "defined" flag is set to SetDef. While the mux expansion is +/// going on, predicated instructions will have implicit uses of the +/// registers that are being defined. This is to keep any preceding +/// definitions live. If there is no preceding definition, the implicit +/// use will be marked as "undef", otherwise it will be "defined". This +/// function is used to update the flag. +void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S, + bool SetDef) { + if (!S.isRegister()) + return; + MachineInstr *MI = LIS->getInstructionFromIndex(S); + assert(MI && "Expecting instruction"); + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) + continue; + bool IsDef = !Op.isUndef(); + if (Op.isImplicit() && IsDef != SetDef) + Op.setIsUndef(!SetDef); + } +} + + +void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) { + // If S is a block boundary, then there can still be a dead def reaching + // this point. Instead of traversing the CFG, queue start points of all + // live segments that begin with a register, and end at a block boundary. + // This may "resurrect" some truly dead definitions, but doing so is + // harmless. + SmallVector<MachineInstr*,8> Defs; + if (S.isBlock()) { + LiveInterval &LI = LIS->getInterval(Reg); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (!I->start.isRegister() || !I->end.isBlock()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(I->start); + Defs.push_back(MI); + } + } else if (S.isRegister()) { + MachineInstr *MI = LIS->getInstructionFromIndex(S); + Defs.push_back(MI); + } + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + MachineInstr *MI = Defs[i]; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + Op.setIsDead(false); + } + } +} + + +/// Shrink the segments in the live interval for a given register to the last +/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this +/// function will not mark any definitions of Reg as dead. The reason for this +/// is that this function is used while a MUX instruction is being expanded, +/// or while a conditional copy is undergoing predication. During these +/// processes, there may be defs present in the instruction sequence that have +/// not yet been removed, or there may be missing uses that have not yet been +/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible, +/// but since it does not extend any intervals that are too short, we need to +/// pre-emptively extend them here in anticipation of further changes. +void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) { + SmallVector<MachineInstr*,4> Deads; + LIS->shrinkToUses(&LI, &Deads); + // Need to undo the deadification made by "shrinkToUses". It's easier to + // do it here, since we have a list of all instructions that were just + // marked as dead. + for (unsigned i = 0, n = Deads.size(); i < n; ++i) { + MachineInstr *MI = Deads[i]; + // Clear the "dead" flag. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + Op.setIsDead(false); + } + // Extend the live segment to the beginning of the next one. + LiveInterval::iterator End = LI.end(); + SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot(); + LiveInterval::iterator T = LI.FindSegmentContaining(S); + assert(T != End); + LiveInterval::iterator N = std::next(T); + if (N != End) + T->end = N->start; + else + T->end = LIS->getMBBEndIdx(MI->getParent()); + } + updateKillFlags(Reg, LI); +} + + +/// Given an updated live interval LI for register Reg, update the kill flags +/// in instructions using Reg to reflect the liveness changes. +void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) { + MRI->clearKillFlags(Reg); + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + SlotIndex EX = I->end; + if (!EX.isRegister()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(EX); + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) + continue; + // Only set the kill flag on the first encountered use of Reg in this + // instruction. + Op.setIsKill(true); + break; + } + } +} + + +/// When adding a new instruction to liveness, the newly added definition +/// will start a new live segment. This may happen at a position that falls +/// within an existing live segment. In such case that live segment needs to +/// be truncated to make room for the new segment. Ultimately, the truncation +/// will occur at the last use, but for now the segment can be terminated +/// right at the place where the new segment will start. The segments will be +/// shrunk-to-uses later. +void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT, + SlotIndex S, LiveInterval &LI) { + // Terminate the live segment pointed to by LT within a live interval LI. + if (LT == LI.end()) + return; + + VNInfo *OldVN = LT->valno; + SlotIndex EX = LT->end; + LT->end = S; + // If LT does not end at a block boundary, the termination is done. + if (!EX.isBlock()) + return; + + // If LT ended at a block boundary, it's possible that its value number + // is picked up at the beginning other blocks. Create a new value number + // and change such blocks to use it instead. + VNInfo *NewVN = 0; + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + if (!I->start.isBlock() || I->valno != OldVN) + continue; + // Generate on-demand a new value number that is defined by the + // block beginning (i.e. -phi). + if (!NewVN) + NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator()); + I->valno = NewVN; + } +} + + +/// Add the specified instruction to live intervals. This function is used +/// to update the live intervals while the program code is being changed. +/// Neither the expansion of a MUX, nor the predication are atomic, and this +/// function is used to update the live intervals while these transformations +/// are being done. +void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) { + SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI) + : LIS->getInstructionIndex(MI); + DEBUG(dbgs() << "adding liveness info for instr\n " << MX << " " << *MI); + + MX = MX.getRegSlot(); + bool Predicated = HII->isPredicated(MI); + MachineBasicBlock *MB = MI->getParent(); + + // Strip all implicit uses from predicated instructions. They will be + // added again, according to the updated information. + if (Predicated) + removeImplicitUses(MI); + + // For each def in MI we need to insert a new live segment starting at MX + // into the interval. If there already exists a live segment in the interval + // that contains MX, we need to terminate it at MX. + SmallVector<RegisterRef,2> Defs; + for (auto &Op : MI->operands()) + if (Op.isReg() && Op.isDef()) + Defs.push_back(RegisterRef(Op)); + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + unsigned DefR = Defs[i].Reg; + LiveInterval &LID = LIS->getInterval(DefR); + DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI) + << " with interval\n " << LID << "\n"); + // If MX falls inside of an existing live segment, terminate it. + LiveInterval::iterator LT = LID.FindSegmentContaining(MX); + if (LT != LID.end()) + terminateSegment(LT, MX, LID); + DEBUG(dbgs() << "after terminating segment\n " << LID << "\n"); + + // Create a new segment starting from MX. + LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX); + SlotIndex EX; + VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator()); + if (N == LID.end()) { + // There is no live segment after MX. End this segment at the end of + // the block. + EX = LIS->getMBBEndIdx(MB); + } else { + // If the next segment starts at the block boundary, end the new segment + // at the boundary of the preceding block (i.e. the previous index). + // Otherwise, end the segment at the beginning of the next segment. In + // either case it will be "shrunk-to-uses" later. + EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start; + } + if (Predicated) { + // Predicated instruction will have an implicit use of the defined + // register. This is necessary so that this definition will not make + // any previous definitions dead. If there are no previous live + // segments, still add the implicit use, but make it "undef". + // Because of the implicit use, the preceding definition is not + // dead. Mark is as such (if necessary). + MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true); + ImpUse.setSubReg(Defs[i].Sub); + bool Undef = false; + if (P == LID.end()) + Undef = true; + else { + // If the previous segment extends to the end of the previous block, + // the end index may actually be the beginning of this block. If + // the previous segment ends at a block boundary, move it back by one, + // to get the proper block for it. + SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end; + MachineBasicBlock *PB = LIS->getMBBFromIndex(PE); + if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) + Undef = true; + } + if (!Undef) { + makeUndead(DefR, P->valno->def); + // We are adding a live use, so extend the previous segment to + // include it. + P->end = MX; + } else { + ImpUse.setIsUndef(true); + } + + if (!MI->readsRegister(DefR)) + MI->addOperand(ImpUse); + if (N != LID.end()) + makeDefined(DefR, N->start, true); + } + LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN); + LID.addSegment(NR); + DEBUG(dbgs() << "added a new segment " << NR << "\n " << LID << "\n"); + shrinkToUses(DefR, LID); + DEBUG(dbgs() << "updated imp-uses: " << *MI); + LID.verify(); + } + + // For each use in MI: + // - If there is no live segment that contains MX for the used register, + // extend the previous one. Ignore implicit uses. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef()) + continue; + unsigned UseR = Op.getReg(); + LiveInterval &LIU = LIS->getInterval(UseR); + // Find the last segment P that starts before MX. + LiveInterval::iterator P = LIU.FindSegmentContaining(MX); + if (P == LIU.end()) + P = prevSegment(LIU, MX); + + assert(P != LIU.end() && "MI uses undefined register?"); + SlotIndex EX = P->end; + // If P contains MX, there is not much to do. + if (EX > MX) { + Op.setIsKill(false); + continue; + } + // Otherwise, extend P to "next(MX)". + P->end = MX.getNextIndex(); + Op.setIsKill(true); + // Get the old "kill" instruction, and remove the kill flag. + if (MachineInstr *KI = LIS->getInstructionFromIndex(MX)) + KI->clearRegisterKills(UseR, nullptr); + shrinkToUses(UseR, LIU); + LIU.verify(); + } +} + + +/// Update the live interval information to reflect the removal of the given +/// instruction from the program. As with "addInstrToLiveness", this function +/// is called while the program code is being changed. +void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) { + SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot(); + DEBUG(dbgs() << "removing instr\n " << MX << " " << *MI); + + // For each def in MI: + // If MI starts a live segment, merge this segment with the previous segment. + // + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned DefR = Op.getReg(); + LiveInterval &LID = LIS->getInterval(DefR); + LiveInterval::iterator LT = LID.FindSegmentContaining(MX); + assert(LT != LID.end() && "Expecting live segments"); + DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI) + << " with interval\n " << LID << "\n"); + if (LT->start != MX) + continue; + + VNInfo *MVN = LT->valno; + if (LT != LID.begin()) { + // If the current live segment is not the first, the task is easy. If + // the previous segment continues into the current block, extend it to + // the end of the current one, and merge the value numbers. + // Otherwise, remove the current segment, and make the end of it "undef". + LiveInterval::iterator P = std::prev(LT); + SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end; + MachineBasicBlock *MB = MI->getParent(); + MachineBasicBlock *PB = LIS->getMBBFromIndex(PE); + if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) { + makeDefined(DefR, LT->end, false); + LID.removeSegment(*LT); + } else { + // Make the segments adjacent, so that merge-vn can also merge the + // segments. + P->end = LT->start; + makeUndead(DefR, P->valno->def); + LID.MergeValueNumberInto(MVN, P->valno); + } + } else { + LiveInterval::iterator N = std::next(LT); + LiveInterval::iterator RmB = LT, RmE = N; + while (N != LID.end()) { + // Iterate until the first register-based definition is found + // (i.e. skip all block-boundary entries). + LiveInterval::iterator Next = std::next(N); + if (N->start.isRegister()) { + makeDefined(DefR, N->start, false); + break; + } + if (N->end.isRegister()) { + makeDefined(DefR, N->end, false); + RmE = Next; + break; + } + RmE = Next; + N = Next; + } + // Erase the segments in one shot to avoid invalidating iterators. + LID.segments.erase(RmB, RmE); + } + + bool VNUsed = false; + for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) { + if (I->valno != MVN) + continue; + VNUsed = true; + break; + } + if (!VNUsed) + MVN->markUnused(); + + DEBUG(dbgs() << "new interval: "); + if (!LID.empty()) { + DEBUG(dbgs() << LID << "\n"); + LID.verify(); + } else { + DEBUG(dbgs() << "<empty>\n"); + LIS->removeInterval(DefR); + } + } + + // For uses there is nothing to do. The intervals will be updated via + // shrinkToUses. + SmallVector<unsigned,4> Uses; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Uses.push_back(R); + } + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + for (unsigned i = 0, n = Uses.size(); i < n; ++i) { + LiveInterval &LI = LIS->getInterval(Uses[i]); + shrinkToUses(Uses[i], LI); + } +} + + +/// Get the opcode for a conditional transfer of the value in SO (source +/// operand). The condition (true/false) is given in Cond. +unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, + bool Cond) { + using namespace Hexagon; + if (SO.isReg()) { + unsigned PhysR; + RegisterRef RS = SO; + if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) { + const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg); + assert(VC->begin() != VC->end() && "Empty register class"); + PhysR = *VC->begin(); + } else { + assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg)); + PhysR = RS.Reg; + } + unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); + switch (RC->getSize()) { + case 4: + return Cond ? A2_tfrt : A2_tfrf; + case 8: + return Cond ? A2_tfrpt : A2_tfrpf; + } + llvm_unreachable("Invalid register operand"); + } + if (SO.isImm() || SO.isFPImm()) + return Cond ? C2_cmoveit : C2_cmoveif; + llvm_unreachable("Unexpected source operand"); +} + + +/// Generate a conditional transfer, copying the value SrcOp to the +/// destination register DstR:DstSR, and using the predicate register from +/// PredOp. The Cond argument specifies whether the predicate is to be +/// if(PredOp), or if(!PredOp). +MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp, + unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) { + MachineInstr *MI = SrcOp.getParent(); + MachineBasicBlock &B = *MI->getParent(); + MachineBasicBlock::iterator At = MI; + DebugLoc DL = MI->getDebugLoc(); + + // Don't avoid identity copies here (i.e. if the source and the destination + // are the same registers). It is actually better to generate them here, + // since this would cause the copy to potentially be predicated in the next + // step. The predication will remove such a copy if it is unable to + /// predicate. + + unsigned Opc = getCondTfrOpcode(SrcOp, Cond); + MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc)) + .addReg(DstR, RegState::Define, DstSR) + .addOperand(PredOp) + .addOperand(SrcOp); + // We don't want any kills yet. + TfrI->clearKillInfo(); + DEBUG(dbgs() << "created an initial copy: " << *TfrI); + return TfrI; +} + + +/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function +/// performs all necessary changes to complete the replacement. +bool HexagonExpandCondsets::split(MachineInstr *MI) { + if (TfrLimitActive) { + if (TfrCounter >= TfrLimit) + return false; + TfrCounter++; + } + DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber() + << ": " << *MI); + MachineOperand &MD = MI->getOperand(0); // Definition + MachineOperand &MP = MI->getOperand(1); // Predicate register + assert(MD.isDef()); + unsigned DR = MD.getReg(), DSR = MD.getSubReg(); + + // First, create the two invididual conditional transfers, and add each + // of them to the live intervals information. Do that first and then remove + // the old instruction from live intervals. + if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true)) + addInstrToLiveness(TfrT); + if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false)) + addInstrToLiveness(TfrF); + removeInstrFromLiveness(MI); + + return true; +} + + +/// Split all MUX instructions in the given block into pairs of contitional +/// transfers. +bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) { + bool Changed = false; + MachineBasicBlock::iterator I, E, NextI; + for (I = B.begin(), E = B.end(); I != E; I = NextI) { + NextI = std::next(I); + if (isCondset(I)) + Changed |= split(I); + } + return Changed; +} + + +bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { + if (HII->isPredicated(MI) || !HII->isPredicable(MI)) + return false; + if (MI->hasUnmodeledSideEffects() || MI->mayStore()) + return false; + // Reject instructions with multiple defs (e.g. post-increment loads). + bool HasDef = false; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + if (HasDef) + return false; + HasDef = true; + } + for (auto &Mo : MI->memoperands()) + if (Mo->isVolatile()) + return false; + return true; +} + + +/// Find the reaching definition for a predicated use of RD. The RD is used +/// under the conditions given by PredR and Cond, and this function will ignore +/// definitions that set RD under the opposite conditions. +MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, + MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) { + MachineBasicBlock &B = *UseIt->getParent(); + MachineBasicBlock::iterator I = UseIt, S = B.begin(); + if (I == S) + return 0; + + bool PredValid = true; + do { + --I; + MachineInstr *MI = &*I; + // Check if this instruction can be ignored, i.e. if it is predicated + // on the complementary condition. + if (PredValid && HII->isPredicated(MI)) { + if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI))) + continue; + } + + // Check the defs. If the PredR is defined, invalidate it. If RD is + // defined, return the instruction or 0, depending on the circumstances. + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + RegisterRef RR = Op; + if (RR.Reg == PredR) { + PredValid = false; + continue; + } + if (RR.Reg != RD.Reg) + continue; + // If the "Reg" part agrees, there is still the subregister to check. + // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but + // not vreg1 (w/o subregisters). + if (RR.Sub == RD.Sub) + return MI; + if (RR.Sub == 0 || RD.Sub == 0) + return 0; + // We have different subregisters, so we can continue looking. + } + } while (I != S); + + return 0; +} + + +/// Check if the instruction MI can be safely moved over a set of instructions +/// whose side-effects (in terms of register defs and uses) are expressed in +/// the maps Defs and Uses. These maps reflect the conditional defs and uses +/// that depend on the same predicate register to allow moving instructions +/// over instructions predicated on the opposite condition. +bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs, + ReferenceMap &Uses) { + // In order to be able to safely move MI over instructions that define + // "Defs" and use "Uses", no def operand from MI can be defined or used + // and no use operand can be defined. + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + RegisterRef RR = Op; + // For physical register we would need to check register aliases, etc. + // and we don't want to bother with that. It would be of little value + // before the actual register rewriting (from virtual to physical). + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + // No redefs for any operand. + if (isRefInMap(RR, Defs, Exec_Then)) + return false; + // For defs, there cannot be uses. + if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then)) + return false; + } + return true; +} + + +/// Check if the instruction accessing memory (TheI) can be moved to the +/// location ToI. +bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI, + bool IsDown) { + bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore(); + if (!IsLoad && !IsStore) + return true; + if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI)) + return true; + if (TheI->hasUnmodeledSideEffects()) + return false; + + MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI; + MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI; + bool Ordered = TheI->hasOrderedMemoryRef(); + + // Search for aliased memory reference in (StartI, EndI). + for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) { + MachineInstr *MI = &*I; + if (MI->hasUnmodeledSideEffects()) + return false; + bool L = MI->mayLoad(), S = MI->mayStore(); + if (!L && !S) + continue; + if (Ordered && MI->hasOrderedMemoryRef()) + return false; + + bool Conflict = (L && IsStore) || S; + if (Conflict) + return false; + } + return true; +} + + +/// Generate a predicated version of MI (where the condition is given via +/// PredR and Cond) at the point indicated by Where. +void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI, + MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) { + // The problem with updating live intervals is that we can move one def + // past another def. In particular, this can happen when moving an A2_tfrt + // over an A2_tfrf defining the same register. From the point of view of + // live intervals, these two instructions are two separate definitions, + // and each one starts another live segment. LiveIntervals's "handleMove" + // does not allow such moves, so we need to handle it ourselves. To avoid + // invalidating liveness data while we are using it, the move will be + // implemented in 4 steps: (1) add a clone of the instruction MI at the + // target location, (2) update liveness, (3) delete the old instruction, + // and (4) update liveness again. + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction. + unsigned Opc = MI->getOpcode(); + unsigned PredOpc = HII->getCondOpcode(Opc, !Cond); + MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc)); + unsigned Ox = 0, NP = MI->getNumOperands(); + // Skip all defs from MI first. + while (Ox < NP) { + MachineOperand &MO = MI->getOperand(Ox); + if (!MO.isReg() || !MO.isDef()) + break; + Ox++; + } + // Add the new def, then the predicate register, then the rest of the + // operands. + MB.addReg(RD.Reg, RegState::Define, RD.Sub); + MB.addReg(PredR); + while (Ox < NP) { + MachineOperand &MO = MI->getOperand(Ox); + if (!MO.isReg() || !MO.isImplicit()) + MB.addOperand(MO); + Ox++; + } + + MachineFunction &MF = *B.getParent(); + MachineInstr::mmo_iterator I = MI->memoperands_begin(); + unsigned NR = std::distance(I, MI->memoperands_end()); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR); + for (unsigned i = 0; i < NR; ++i) + MemRefs[i] = *I++; + MB.setMemRefs(MemRefs, MemRefs+NR); + + MachineInstr *NewI = MB; + NewI->clearKillInfo(); + addInstrToLiveness(NewI); +} + + +/// In the range [First, Last], rename all references to the "old" register RO +/// to the "new" register RN, but only in instructions predicated on the given +/// condition. +void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, + unsigned PredR, bool Cond, MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Last) { + MachineBasicBlock::iterator End = std::next(Last); + for (MachineBasicBlock::iterator I = First; I != End; ++I) { + MachineInstr *MI = &*I; + // Do not touch instructions that are not predicated, or are predicated + // on the opposite condition. + if (!HII->isPredicated(MI)) + continue; + if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI))) + continue; + + for (auto &Op : MI->operands()) { + if (!Op.isReg() || RO != RegisterRef(Op)) + continue; + Op.setReg(RN.Reg); + Op.setSubReg(RN.Sub); + // In practice, this isn't supposed to see any defs. + assert(!Op.isDef() && "Not expecting a def"); + } + } +} + + +/// For a given conditional copy, predicate the definition of the source of +/// the copy under the given condition (using the same predicate register as +/// the copy). +bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) { + // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi). + unsigned Opc = TfrI->getOpcode(); + (void)Opc; + assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf); + DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false") + << ": " << *TfrI); + + MachineOperand &MD = TfrI->getOperand(0); + MachineOperand &MP = TfrI->getOperand(1); + MachineOperand &MS = TfrI->getOperand(2); + // The source operand should be a <kill>. This is not strictly necessary, + // but it makes things a lot simpler. Otherwise, we would need to rename + // some registers, which would complicate the transformation considerably. + if (!MS.isKill()) + return false; + + RegisterRef RT(MS); + unsigned PredR = MP.getReg(); + MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond); + if (!DefI || !isPredicable(DefI)) + return false; + + DEBUG(dbgs() << "Source def: " << *DefI); + + // Collect the information about registers defined and used between the + // DefI and the TfrI. + // Map: reg -> bitmask of subregs + ReferenceMap Uses, Defs; + MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI; + + // Check if the predicate register is valid between DefI and TfrI. + // If it is, we can then ignore instructions predicated on the negated + // conditions when collecting def and use information. + bool PredValid = true; + for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { + if (!I->modifiesRegister(PredR, 0)) + continue; + PredValid = false; + break; + } + + for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { + MachineInstr *MI = &*I; + // If this instruction is predicated on the same register, it could + // potentially be ignored. + // By default assume that the instruction executes on the same condition + // as TfrI (Exec_Then), and also on the opposite one (Exec_Else). + unsigned Exec = Exec_Then | Exec_Else; + if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR)) + Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else; + + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + // We don't want to deal with physical registers. The reason is that + // they can be aliased with other physical registers. Aliased virtual + // registers must share the same register number, and can only differ + // in the subregisters, which we are keeping track of. Physical + // registers ters no longer have subregisters---their super- and + // subregisters are other physical registers, and we are not checking + // that. + RegisterRef RR = Op; + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + + ReferenceMap &Map = Op.isDef() ? Defs : Uses; + addRefToMap(RR, Map, Exec); + } + } + + // The situation: + // RT = DefI + // ... + // RD = TfrI ..., RT + + // If the register-in-the-middle (RT) is used or redefined between + // DefI and TfrI, we may not be able proceed with this transformation. + // We can ignore a def that will not execute together with TfrI, and a + // use that will. If there is such a use (that does execute together with + // TfrI), we will not be able to move DefI down. If there is a use that + // executed if TfrI's condition is false, then RT must be available + // unconditionally (cannot be predicated). + // Essentially, we need to be able to rename RT to RD in this segment. + if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else)) + return false; + RegisterRef RD = MD; + // If the predicate register is defined between DefI and TfrI, the only + // potential thing to do would be to move the DefI down to TfrI, and then + // predicate. The reaching def (DefI) must be movable down to the location + // of the TfrI. + // If the target register of the TfrI (RD) is not used or defined between + // DefI and TfrI, consider moving TfrI up to DefI. + bool CanUp = canMoveOver(TfrI, Defs, Uses); + bool CanDown = canMoveOver(DefI, Defs, Uses); + // The TfrI does not access memory, but DefI could. Check if it's safe + // to move DefI down to TfrI. + if (DefI->mayLoad() || DefI->mayStore()) + if (!canMoveMemTo(DefI, TfrI, true)) + CanDown = false; + + DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no") + << ", can move down: " << (CanDown ? "yes\n" : "no\n")); + MachineBasicBlock::iterator PastDefIt = std::next(DefIt); + if (CanUp) + predicateAt(RD, DefI, PastDefIt, PredR, Cond); + else if (CanDown) + predicateAt(RD, DefI, TfrIt, PredR, Cond); + else + return false; + + if (RT != RD) + renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt); + + // Delete the user of RT first (it should work either way, but this order + // of deleting is more natural). + removeInstrFromLiveness(TfrI); + removeInstrFromLiveness(DefI); + return true; +} + + +/// Predicate all cases of conditional copies in the specified block. +bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) { + bool Changed = false; + MachineBasicBlock::iterator I, E, NextI; + for (I = B.begin(), E = B.end(); I != E; I = NextI) { + NextI = std::next(I); + unsigned Opc = I->getOpcode(); + if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) { + bool Done = predicate(I, (Opc == Hexagon::A2_tfrt)); + if (!Done) { + // If we didn't predicate I, we may need to remove it in case it is + // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1. + if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) + removeInstrFromLiveness(I); + } + Changed |= Done; + } + } + return Changed; +} + + +void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) { + for (unsigned i = MI->getNumOperands(); i > 0; --i) { + MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && MO.isUse() && MO.isImplicit()) + MI->RemoveOperand(i-1); + } +} + + +void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) { + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + if (HII->isPredicated(MI)) + removeImplicitUses(MI); + } +} + + +void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) { + // Implicit uses that are "undef" are only meaningful (outside of the + // internals of this pass) when the instruction defines a subregister, + // and the implicit-undef use applies to the defined register. In such + // cases, the proper way to record the information in the IR is to mark + // the definition as "undef", which will be interpreted as "read-undef". + typedef SmallSet<unsigned,2> RegisterSet; + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + RegisterSet Undefs; + for (unsigned i = MI->getNumOperands(); i > 0; --i) { + MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) { + MI->RemoveOperand(i-1); + Undefs.insert(MO.getReg()); + } + } + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef() || !Op.getSubReg()) + continue; + if (Undefs.count(Op.getReg())) + Op.setIsUndef(true); + } + } +} + + +bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg); + if (RC == &Hexagon::IntRegsRegClass) { + BW = 32; + return true; + } + if (RC == &Hexagon::DoubleRegsRegClass) { + BW = (RR.Sub != 0) ? 32 : 64; + return true; + } + return false; +} + + +bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { + for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + LiveRange::Segment &LR = *I; + // Range must start at a register... + if (!LR.start.isRegister()) + return false; + // ...and end in a register or in a dead slot. + if (!LR.end.isRegister() && !LR.end.isDead()) + return false; + } + return true; +} + + +bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { + if (CoaLimitActive) { + if (CoaCounter >= CoaLimit) + return false; + CoaCounter++; + } + unsigned BW1, BW2; + if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2) + return false; + if (MRI->isLiveIn(R1.Reg)) + return false; + if (MRI->isLiveIn(R2.Reg)) + return false; + + LiveInterval &L1 = LIS->getInterval(R1.Reg); + LiveInterval &L2 = LIS->getInterval(R2.Reg); + bool Overlap = L1.overlaps(L2); + + DEBUG(dbgs() << "compatible registers: (" + << (Overlap ? "overlap" : "disjoint") << ")\n " + << PrintReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n " + << PrintReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n"); + if (R1.Sub || R2.Sub) + return false; + if (Overlap) + return false; + + // Coalescing could have a negative impact on scheduling, so try to limit + // to some reasonable extent. Only consider coalescing segments, when one + // of them does not cross basic block boundaries. + if (!isIntraBlocks(L1) && !isIntraBlocks(L2)) + return false; + + MRI->replaceRegWith(R2.Reg, R1.Reg); + + // Move all live segments from L2 to L1. + typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap; + ValueInfoMap VM; + for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) { + VNInfo *NewVN, *OldVN = I->valno; + ValueInfoMap::iterator F = VM.find(OldVN); + if (F == VM.end()) { + NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator()); + VM.insert(std::make_pair(OldVN, NewVN)); + } else { + NewVN = F->second; + } + L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN)); + } + while (L2.begin() != L2.end()) + L2.removeSegment(*L2.begin()); + + updateKillFlags(R1.Reg, L1); + DEBUG(dbgs() << "coalesced: " << L1 << "\n"); + L1.verify(); + + return true; +} + + +/// Attempt to coalesce one of the source registers to a MUX intruction with +/// the destination register. This could lead to having only one predicated +/// instruction in the end instead of two. +bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) { + SmallVector<MachineInstr*,16> Condsets; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock &B = *I; + for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) { + MachineInstr *MI = &*J; + if (!isCondset(MI)) + continue; + MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3); + if (!S1.isReg() && !S2.isReg()) + continue; + Condsets.push_back(MI); + } + } + + bool Changed = false; + for (unsigned i = 0, n = Condsets.size(); i < n; ++i) { + MachineInstr *CI = Condsets[i]; + RegisterRef RD = CI->getOperand(0); + RegisterRef RP = CI->getOperand(1); + MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3); + bool Done = false; + // Consider this case: + // vreg1 = instr1 ... + // vreg2 = instr2 ... + // vreg0 = C2_mux ..., vreg1, vreg2 + // If vreg0 was coalesced with vreg1, we could end up with the following + // code: + // vreg0 = instr1 ... + // vreg2 = instr2 ... + // vreg0 = A2_tfrf ..., vreg2 + // which will later become: + // vreg0 = instr1 ... + // vreg0 = instr2_cNotPt ... + // i.e. there will be an unconditional definition (instr1) of vreg0 + // followed by a conditional one. The output dependency was there before + // and it unavoidable, but if instr1 is predicable, we will no longer be + // able to predicate it here. + // To avoid this scenario, don't coalesce the destination register with + // a source register that is defined by a predicable instruction. + if (S1.isReg()) { + RegisterRef RS = S1; + MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true); + if (!RDef || !HII->isPredicable(RDef)) + Done = coalesceRegisters(RD, RegisterRef(S1)); + } + if (!Done && S2.isReg()) { + RegisterRef RS = S2; + MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false); + if (!RDef || !HII->isPredicable(RDef)) + Done = coalesceRegisters(RD, RegisterRef(S2)); + } + Changed |= Done; + } + return Changed; +} + + +bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { + HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo()); + TRI = MF.getSubtarget().getRegisterInfo(); + LIS = &getAnalysis<LiveIntervals>(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + + // Try to coalesce the target of a mux with one of its sources. + // This could eliminate a register copy in some circumstances. + Changed |= coalesceSegments(MF); + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + // First, simply split all muxes into a pair of conditional transfers + // and update the live intervals to reflect the new arrangement. + // This is done mainly to make the live interval update simpler, than it + // would be while trying to predicate instructions at the same time. + Changed |= splitInBlock(*I); + // Traverse all blocks and collapse predicable instructions feeding + // conditional transfers into predicated instructions. + // Walk over all the instructions again, so we may catch pre-existing + // cases that were not created in the previous step. + Changed |= predicateInBlock(*I); + } + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + postprocessUndefImplicitUses(*I); + return Changed; +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon Expand Condsets"; + PassInfo *PI = new PassInfo(Name, "expand-condsets", + &HexagonExpandCondsets::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + + +FunctionPass *llvm::createHexagonExpandCondsets() { + return new HexagonExpandCondsets(); +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index a2209ab..63900e0 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -316,6 +316,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); } @@ -1716,6 +1717,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBC, MVT::i32, Expand); setOperationAction(ISD::SUBC, MVT::i64, Expand); + // Only add and sub that detect overflow are the saturating ones. + for (MVT VT : MVT::integer_valuetypes()) { + setOperationAction(ISD::UADDO, VT, Expand); + setOperationAction(ISD::SADDO, VT, Expand); + setOperationAction(ISD::USUBO, VT, Expand); + setOperationAction(ISD::SSUBO, VT, Expand); + } + setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i64, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); @@ -2106,7 +2115,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // is Big Endian. unsigned OpIdx = NElts - i - 1; SDValue Operand = BVN->getOperand(OpIdx); - if (dyn_cast<ConstantSDNode>(Operand)) + if (isa<ConstantSDNode>(Operand)) // This operand is already in ConstVal. continue; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index fbf1ca9..ff4bcad 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -845,8 +845,7 @@ bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask); } -int HexagonInstrInfo:: -getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { +int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { enum Hexagon::PredSense inPredSense; inPredSense = invertPredicate ? Hexagon::PredSense_false : Hexagon::PredSense_true; @@ -884,7 +883,7 @@ PredicateInstruction(MachineInstr *MI, // This will change MI's opcode to its predicate version. // However, its operand list is still the old one, i.e. the // non-predicate one. - MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump))); + MI->setDesc(get(getCondOpcode(Opc, invertJump))); int oper = -1; unsigned int GAIdx = 0; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 2644248..284dde1 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -216,9 +216,7 @@ public: short getNonExtOpcode(const MachineInstr *MI) const; bool PredOpcodeHasJMP_c(Opcode_t Opcode) const; bool PredOpcodeHasNot(Opcode_t Opcode) const; - -private: - int getMatchingCondBranchOpcode(int Opc, bool sense) const; + int getCondOpcode(int Opc, bool sense) const; }; diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 1717ae3..d61cc54 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -72,7 +72,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS, const TargetMachine &TM) - : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()), + : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() { diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 48b0bc8..0679866 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -27,11 +27,15 @@ using namespace llvm; static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops", - cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); + cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon CFG Optimization")); + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon CFG Optimization")); + +static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets", + cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Early expansion of MUX")); /// HexagonTargetMachineModule - Note that this is used on hosts that @@ -55,6 +59,10 @@ static MachineSchedRegistry SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", createVLIWMachineSched); +namespace llvm { + FunctionPass *createHexagonExpandCondsets(); +} + /// HexagonTargetMachine ctor - Create an ILP32 architecture model. /// @@ -79,7 +87,15 @@ namespace { class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None); + if (!NoOpt) { + if (EnableExpandCondsets) { + Pass *Exp = createHexagonExpandCondsets(); + insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp)); + } + } + } HexagonTargetMachine &getHexagonTargetMachine() const { return getTM<HexagonTargetMachine>(); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index bdccf88..155aa9e 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -57,7 +57,7 @@ public: ELFHexagonAsmBackend(Target const &T, uint8_t OSABI) : HexagonAsmBackend(T), OSABI(OSABI) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { StringRef CPU("HexagonV4"); return createHexagonELFObjectWriter(OS, OSABI, CPU); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index 4a3ac8c..fde935b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -27,8 +27,8 @@ private: public: HexagonELFObjectWriter(uint8_t OSABI, StringRef C); - virtual unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup, - bool IsPCRel) const override; + unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup, + bool IsPCRel) const override; }; } @@ -55,9 +55,9 @@ unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/, return Type; } -MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, StringRef CPU) { MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true); -}
\ No newline at end of file +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 6c87c9f..ec55234 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -78,7 +78,8 @@ StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { } void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { const char startPacket = '{', endPacket = '}'; // TODO: add outer HW loop when it's supported too. @@ -94,7 +95,7 @@ void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O, Nop.setOpcode (Hexagon::A2_nop); HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI)); - printInst (&Nop, O, NoAnnot); + printInst (&Nop, O, NoAnnot, STI); } // Close the packet. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h index d02243b..98fb99b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -25,7 +25,8 @@ namespace llvm { MCRegisterInfo const &MRI) : MCInstPrinter(MAI, MII, MRI), MII(MII) {} - void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot) override; + void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; virtual StringRef getOpcodeName(unsigned Opcode) const; void printInstruction(const MCInst *MI, raw_ostream &O); StringRef getRegName(unsigned RegNo) const; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index c63bf32..2e10d81 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -75,13 +75,16 @@ static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM, X->InitMCCodeGenInfo(Reloc::Static, CM, OL); return X; } -static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, + +static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - return new HexagonInstPrinter(MAI, MII, MRI); + const MCRegisterInfo &MRI) { + if (SyntaxVariant == 0) + return(new HexagonInstPrinter(MAI, MII, MRI)); + else + return nullptr; } // Force static initialization. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 17072d9..de63fd2 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -27,6 +27,7 @@ class MCSubtargetInfo; class Target; class StringRef; class raw_ostream; +class raw_pwrite_stream; extern Target TheHexagonTarget; @@ -40,8 +41,8 @@ MCAsmBackend *createHexagonAsmBackend(Target const &T, MCRegisterInfo const &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createHexagonELFObjectWriter(raw_ostream &OS, uint8_t OSABI, - StringRef CPU); +MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, StringRef CPU); } // End llvm namespace diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp index acf1214..6c43d97 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp @@ -27,7 +27,7 @@ using namespace llvm; #include "MSP430GenAsmWriter.inc" void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { printInstruction(MI, O); printAnnotation(O, Annot); } diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index 7fae505..70141a9 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -25,7 +25,8 @@ namespace llvm { const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index 4c70803..775c0b2 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -58,12 +58,11 @@ static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, +static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) return new MSP430InstPrinter(MAI, MII, MRI); return nullptr; diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 18141a6..08f41a8 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -645,6 +645,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); } else { diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 68868b6..9266c3b 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -102,12 +102,6 @@ namespace llvm { const std::string &Constraint, MVT VT) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { - // FIXME: Map different constraints differently. - return InlineAsm::Constraint_m; - } - /// isTruncateFree - Return true if it's free to truncate a value of type /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in /// register R15W to i8 by referencing its sub-register R15B. diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp index 05352a2..c63a57c 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.cpp +++ b/lib/Target/MSP430/MSP430MCInstLower.cpp @@ -62,7 +62,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const { } // Create a symbol for the name. - return Ctx.GetOrCreateSymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Name); } MCSymbol *MSP430MCInstLower:: @@ -79,7 +79,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const { } // Create a symbol for the name. - return Ctx.GetOrCreateSymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Name); } MCSymbol *MSP430MCInstLower:: diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 6401bc1..6f7e3c1 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -220,6 +220,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseDirectiveNaN(); bool parseDirectiveSet(); bool parseDirectiveOption(); + bool parseInsnDirective(); bool parseSetAtDirective(); bool parseSetNoAtDirective(); @@ -272,7 +273,10 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned getGPR(int RegNo); - int getATReg(SMLoc Loc); + /// Returns the internal register number for the current AT. Also checks if + /// the current AT is unavailable (set to $0) and gives an error if it is. + /// This should be used in pseudo-instruction expansions which need AT. + unsigned getATReg(SMLoc Loc); bool processInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); @@ -1713,7 +1717,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, // FIXME: gas has a special case for values that are 000...1111, which // becomes a li -1 and then a dsrl if (0 <= ImmValue && ImmValue <= 65535) { - // For 0 <= j <= 65535. + // For unsigned and positive signed 16-bit values (0 <= j <= 65535): // li d,j => ori d,$zero,j tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); @@ -1721,7 +1725,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else if (ImmValue < 0 && ImmValue >= -32768) { - // For -32768 <= j < 0. + // For negative signed 16-bit values (-32768 <= j < 0): // li d,j => addiu d,$zero,j tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); @@ -1729,8 +1733,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else if ((ImmValue & 0xffffffff) == ImmValue) { - // For any value of j that is representable as a 32-bit integer, create - // a sequence of: + // For all other values which are representable as a 32-bit integer: // li d,j => lui d,hi16(j) // ori d,d,lo16(j) tmpInst.setOpcode(Mips::LUi); @@ -1752,8 +1755,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, // | 16-bytes | 16-bytes | 16-bytes | // |__________|__________|__________| // - // For any value of j that is representable as a 48-bit integer, create - // a sequence of: + // For any 64-bit value that is representable as a 48-bit integer: // li d,j => lui d,hi16(j) // ori d,d,hi16(lo32(j)) // dsll d,d,16 @@ -1778,7 +1780,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, // | 16-bytes | 16-bytes | 16-bytes | 16-bytes | // |__________|__________|__________|__________| // - // For any value of j that isn't representable as a 48-bit integer. + // For all other values which are representable as a 64-bit integer: // li d,j => lui d,hi16(j) // ori d,d,lo16(hi32(j)) // dsll d,d,16 @@ -2048,13 +2050,11 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, if (isLoad && IsGPR && (BaseRegNum != RegOpNum)) TmpRegNum = RegOpNum; else { - int AT = getATReg(IDLoc); // At this point we need AT to perform the expansions and we exit if it is // not available. - if (!AT) + TmpRegNum = getATReg(IDLoc); + if (!TmpRegNum) return; - TmpRegNum = getReg( - (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT); } TempInst.setOpcode(Mips::LUi); @@ -2078,12 +2078,14 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, // Prepare TempInst for next instruction. TempInst.clear(); // Add temp register to base. - TempInst.setOpcode(Mips::ADDu); - TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); - TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); - TempInst.addOperand(MCOperand::CreateReg(BaseRegNum)); - Instructions.push_back(TempInst); - TempInst.clear(); + if (BaseRegNum != Mips::ZERO) { + TempInst.setOpcode(Mips::ADDu); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(BaseRegNum)); + Instructions.push_back(TempInst); + TempInst.clear(); + } // And finally, create original instruction with low part // of offset and new base. TempInst.setOpcode(Inst.getOpcode()); @@ -2383,11 +2385,15 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) { return CC; } -int MipsAsmParser::getATReg(SMLoc Loc) { - int AT = AssemblerOptions.back()->getATRegNum(); - if (AT == 0) +unsigned MipsAsmParser::getATReg(SMLoc Loc) { + unsigned ATIndex = AssemblerOptions.back()->getATRegNum(); + if (ATIndex == 0) { reportParseError(Loc, "pseudo-instruction requires $at, which is not available"); + return 0; + } + unsigned AT = getReg( + (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, ATIndex); return AT; } @@ -2571,7 +2577,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { if (Tok.isNot(AsmToken::Identifier)) return true; - std::string Str = Tok.getIdentifier().str(); + std::string Str = Tok.getIdentifier(); Parser.Lex(); // Eat the identifier. // Now make an expression from the rest of the operand. @@ -3579,11 +3585,7 @@ bool MipsAsmParser::parseSetAssignment() { if (Parser.parseExpression(Value)) return reportParseError("expected valid expression after comma"); - // Check if the Name already exists as a symbol. - MCSymbol *Sym = getContext().LookupSymbol(Name); - if (Sym) - return reportParseError("symbol already defined"); - Sym = getContext().GetOrCreateSymbol(Name); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); Sym->setVariableValue(Value); return false; @@ -4044,6 +4046,23 @@ bool MipsAsmParser::parseDirectiveOption() { return false; } +/// parseInsnDirective +/// ::= .insn +bool MipsAsmParser::parseInsnDirective() { + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + // The actual label marking happens in + // MipsELFStreamer::createPendingLabelRelocs(). + getTargetStreamer().emitDirectiveInsn(); + + getParser().Lex(); // Eat EndOfStatement token. + return false; +} + /// parseDirectiveModule /// ::= .module oddspreg /// ::= .module nooddspreg @@ -4437,6 +4456,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".llvm_internal_mips_reallow_module_directive") return parseInternalDirectiveReallowModule(); + if (IDVal == ".insn") + return parseInsnDirective(); + return true; } diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index aad549d..e80a47b 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -77,7 +77,7 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { switch (MI->getOpcode()) { default: break; diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 468dc07..713f35c 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -86,7 +86,8 @@ public: static const char *getRegisterName(unsigned RegNo); void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; bool printAliasInstr(const MCInst *MI, raw_ostream &OS); void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index acf6f21..dbcd867 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -155,7 +155,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } -MCObjectWriter *MipsAsmBackend::createObjectWriter(raw_ostream &OS) const { +MCObjectWriter * +MipsAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createMipsELFObjectWriter(OS, MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 243b73d..b3d5a49 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -36,7 +36,7 @@ public: bool Is64Bit) : MCAsmBackend(), OSType(OSType), IsLittle(IsLittle), Is64Bit(Is64Bit) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override; + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const override; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index a68bf16..8d9e3e3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -10,6 +10,7 @@ #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -22,17 +23,33 @@ using namespace llvm; namespace { +// A helper structure based on ELFRelocationEntry, used for sorting entries in +// the relocation table. +struct MipsRelocationEntry { + MipsRelocationEntry(const ELFRelocationEntry &R) + : R(R), SortOffset(R.Offset), HasMatchingHi(false) {} + const ELFRelocationEntry R; + // SortOffset equals R.Offset except for the *HI16 relocations, for which it + // will be set based on the R.Offset of the matching *LO16 relocation. + int64_t SortOffset; + // True when this is a *LO16 relocation chosen as a match for a *HI16 + // relocation. + bool HasMatchingHi; +}; + class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64, bool IsLittleEndian); - virtual ~MipsELFObjectWriter(); + ~MipsELFObjectWriter() override; unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; bool needsRelocateWithSymbol(const MCSymbolData &SD, unsigned Type) const override; + virtual void sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) override; }; } @@ -225,6 +242,169 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } +// Sort entries by SortOffset in descending order. +// When there are more *HI16 relocs paired with one *LO16 reloc, the 2nd rule +// sorts them in ascending order of R.Offset. +static int cmpRelMips(const MipsRelocationEntry *AP, + const MipsRelocationEntry *BP) { + const MipsRelocationEntry &A = *AP; + const MipsRelocationEntry &B = *BP; + if (A.SortOffset != B.SortOffset) + return B.SortOffset - A.SortOffset; + if (A.R.Offset != B.R.Offset) + return A.R.Offset - B.R.Offset; + if (B.R.Type != A.R.Type) + return B.R.Type - A.R.Type; + //llvm_unreachable("ELFRelocs might be unstable!"); + return 0; +} + +// For the given Reloc.Type, return the matching relocation type, as in the +// table below. +static unsigned getMatchingLoType(const MCAssembler &Asm, + const ELFRelocationEntry &Reloc) { + unsigned Type = Reloc.Type; + if (Type == ELF::R_MIPS_HI16) + return ELF::R_MIPS_LO16; + if (Type == ELF::R_MICROMIPS_HI16) + return ELF::R_MICROMIPS_LO16; + if (Type == ELF::R_MIPS16_HI16) + return ELF::R_MIPS16_LO16; + + const MCSymbolData &SD = Asm.getSymbolData(*Reloc.Symbol); + + if (MCELF::GetBinding(SD) != ELF::STB_LOCAL) + return ELF::R_MIPS_NONE; + + if (Type == ELF::R_MIPS_GOT16) + return ELF::R_MIPS_LO16; + if (Type == ELF::R_MICROMIPS_GOT16) + return ELF::R_MICROMIPS_LO16; + if (Type == ELF::R_MIPS16_GOT16) + return ELF::R_MIPS16_LO16; + + return ELF::R_MIPS_NONE; +} + +// Return true if First needs a matching *LO16, its matching *LO16 type equals +// Second's type and both relocations are against the same symbol. +static bool areMatchingHiAndLo(const MCAssembler &Asm, + const ELFRelocationEntry &First, + const ELFRelocationEntry &Second) { + return getMatchingLoType(Asm, First) != ELF::R_MIPS_NONE && + getMatchingLoType(Asm, First) == Second.Type && + First.Symbol && First.Symbol == Second.Symbol; +} + +// Return true if MipsRelocs[Index] is a *LO16 preceded by a matching *HI16. +static bool +isPrecededByMatchingHi(const MCAssembler &Asm, uint32_t Index, + std::vector<MipsRelocationEntry> &MipsRelocs) { + return Index < MipsRelocs.size() - 1 && + areMatchingHiAndLo(Asm, MipsRelocs[Index + 1].R, MipsRelocs[Index].R); +} + +// Return true if MipsRelocs[Index] is a *LO16 not preceded by a matching *HI16 +// and not chosen by a *HI16 as a match. +static bool isFreeLo(const MCAssembler &Asm, uint32_t Index, + std::vector<MipsRelocationEntry> &MipsRelocs) { + return Index < MipsRelocs.size() && !MipsRelocs[Index].HasMatchingHi && + !isPrecededByMatchingHi(Asm, Index, MipsRelocs); +} + +// Lo is chosen as a match for Hi, set their fields accordingly. +// Mips instructions have fixed length of at least two bytes (two for +// micromips/mips16, four for mips32/64), so we can set HI's SortOffset to +// matching LO's Offset minus one to simplify the sorting function. +static void setMatch(MipsRelocationEntry &Hi, MipsRelocationEntry &Lo) { + Lo.HasMatchingHi = true; + Hi.SortOffset = Lo.R.Offset - 1; +} + +// We sort relocation table entries by offset, except for one additional rule +// required by MIPS ABI: every *HI16 relocation must be immediately followed by +// the corresponding *LO16 relocation. We also support a GNU extension that +// allows more *HI16s paired with one *LO16. +// +// *HI16 relocations and their matching *LO16 are: +// +// +---------------------------------------------+-------------------+ +// | *HI16 | matching *LO16 | +// |---------------------------------------------+-------------------| +// | R_MIPS_HI16, local R_MIPS_GOT16 | R_MIPS_LO16 | +// | R_MICROMIPS_HI16, local R_MICROMIPS_GOT16 | R_MICROMIPS_LO16 | +// | R_MIPS16_HI16, local R_MIPS16_GOT16 | R_MIPS16_LO16 | +// +---------------------------------------------+-------------------+ +// +// (local R_*_GOT16 meaning R_*_GOT16 against the local symbol.) +// +// To handle *HI16 and *LO16 relocations, the linker needs a combined addend +// ("AHL") calculated from both *HI16 ("AHI") and *LO16 ("ALO") relocations: +// AHL = (AHI << 16) + (short)ALO; +// +// We are reusing gnu as sorting algorithm so we are emitting the relocation +// table sorted the same way as gnu as would sort it, for easier comparison of +// the generated .o files. +// +// The logic is: +// search the table (starting from the highest offset and going back to zero) +// for all *HI16 relocations that don't have a matching *LO16. +// For every such HI, find a matching LO with highest offset that isn't already +// matched with another HI. If there are no free LOs, match it with the first +// found (starting from lowest offset). +// When there are more HIs matched with one LO, sort them in descending order by +// offset. +// +// In other words, when searching for a matching LO: +// - don't look for a 'better' match for the HIs that are already followed by a +// matching LO; +// - prefer LOs without a pair; +// - prefer LOs with higher offset; +void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) { + if (Relocs.size() < 2) + return; + + // The default function sorts entries by Offset in descending order. + MCELFObjectTargetWriter::sortRelocs(Asm, Relocs); + + // Init MipsRelocs from Relocs. + std::vector<MipsRelocationEntry> MipsRelocs; + for (unsigned I = 0, E = Relocs.size(); I != E; ++I) + MipsRelocs.push_back(MipsRelocationEntry(Relocs[I])); + + // Find a matching LO for all HIs that need it. + for (int32_t I = 0, E = MipsRelocs.size(); I != E; ++I) { + if (getMatchingLoType(Asm, MipsRelocs[I].R) == ELF::R_MIPS_NONE || + (I > 0 && isPrecededByMatchingHi(Asm, I - 1, MipsRelocs))) + continue; + + int32_t MatchedLoIndex = -1; + + // Search the list in the ascending order of Offset. + for (int32_t J = MipsRelocs.size() - 1, N = -1; J != N; --J) { + // check for a match + if (areMatchingHiAndLo(Asm, MipsRelocs[I].R, MipsRelocs[J].R) && + (MatchedLoIndex == -1 || // first match + // or we already have a match, + // but this one is with higher offset and it's free + (MatchedLoIndex > J && isFreeLo(Asm, J, MipsRelocs)))) + MatchedLoIndex = J; + } + + if (MatchedLoIndex != -1) + // We have a match. + setMatch(MipsRelocs[I], MipsRelocs[MatchedLoIndex]); + } + + // SortOffsets are calculated, call the sorting function. + array_pod_sort(MipsRelocs.begin(), MipsRelocs.end(), cmpRelMips); + + // Copy sorted MipsRelocs back to Relocs. + for (unsigned I = 0, E = MipsRelocs.size(); I != E; ++I) + Relocs[I] = MipsRelocs[I].R; +} + bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, unsigned Type) const { @@ -264,7 +444,8 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, } } -MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, +MCObjectWriter *llvm::createMipsELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, bool IsLittleEndian, bool Is64Bit) { MCELFObjectTargetWriter *MOTW = diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index 93f60df..6d1d9f4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -21,8 +21,6 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst, MCContext &Context = getContext(); const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo(); - MipsTargetELFStreamer *ELFTargetStreamer = - static_cast<MipsTargetELFStreamer *>(getTargetStreamer()); for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) { const MCOperand &Op = Inst.getOperand(OpIndex); @@ -34,6 +32,14 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst, RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo); } + createPendingLabelRelocs(); +} + +void MipsELFStreamer::createPendingLabelRelocs() { + MipsTargetELFStreamer *ELFTargetStreamer = + static_cast<MipsTargetELFStreamer *>(getTargetStreamer()); + + // FIXME: Also mark labels when in MIPS16 mode. if (ELFTargetStreamer->isMicroMipsEnabled()) { for (auto Label : Labels) { MCSymbolData &Data = getOrCreateSymbolData(Label); @@ -70,7 +76,8 @@ void MipsELFStreamer::EmitMipsOptionRecords() { } MCELFStreamer *llvm::createMipsELFStreamer(MCContext &Context, - MCAsmBackend &MAB, raw_ostream &OS, + MCAsmBackend &MAB, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { return new MipsELFStreamer(Context, MAB, OS, Emitter); diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index 6b834c6..4e30901 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -33,7 +33,7 @@ class MipsELFStreamer : public MCELFStreamer { public: - MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, + MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter) : MCELFStreamer(Context, MAB, OS, Emitter) { @@ -65,10 +65,13 @@ public: /// Emits all the option records stored up until the point it's called. void EmitMipsOptionRecords(); + + /// Mark labels as microMIPS, if necessary for the subtarget. + void createPendingLabelRelocs(); }; MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll); + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll); } // namespace llvm. #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h index b01726d..cc40e2e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h @@ -43,7 +43,7 @@ public: MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle) : MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {} - ~MipsMCCodeEmitter() {} + ~MipsMCCodeEmitter() override {} void EmitByte(unsigned char C, raw_ostream &OS) const; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h index e6b5be7..687b800 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h @@ -23,9 +23,8 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg); // This function creates an MCELFStreamer for Mips NaCl. MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll); + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll); } #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 6f3f37b..a75d27d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -97,17 +97,16 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createMipsMCInstPrinter(const Target &T, +static MCInstPrinter *createMipsMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { return new MipsInstPrinter(MAI, MII, MRI); } static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, - MCAsmBackend &MAB, raw_ostream &OS, + MCAsmBackend &MAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { MCStreamer *S; if (!T.isOSNaCl()) diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 92f394a..577a8b3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -27,6 +27,7 @@ class MCSubtargetInfo; class StringRef; class Target; class raw_ostream; +class raw_pwrite_stream; extern Target TheMipsTarget; extern Target TheMipselTarget; @@ -53,7 +54,7 @@ MCAsmBackend *createMipsAsmBackendEL64(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, +MCObjectWriter *createMipsELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, bool IsLittleEndian, bool Is64Bit); namespace MIPS_MC { diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index 1adfdf9..35348aa 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -36,11 +36,11 @@ const unsigned LoadStoreStackMaskReg = Mips::T7; class MipsNaClELFStreamer : public MipsELFStreamer { public: - MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter) + MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter) : MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {} - ~MipsNaClELFStreamer() {} + ~MipsNaClELFStreamer() override {} private: // Whether we started the sandboxing sequence for calls. Calls are bundled @@ -252,7 +252,7 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) { } MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, + raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool RelaxAll) { MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter); diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 5790a5c..cfd56c6 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -54,6 +54,7 @@ void MipsTargetStreamer::emitDirectiveNaN2008() {} void MipsTargetStreamer::emitDirectiveNaNLegacy() {} void MipsTargetStreamer::emitDirectiveOptionPic0() {} void MipsTargetStreamer::emitDirectiveOptionPic2() {} +void MipsTargetStreamer::emitDirectiveInsn() { forbidModuleDirective(); } void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg) {} void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {} @@ -189,6 +190,11 @@ void MipsTargetAsmStreamer::emitDirectiveOptionPic2() { OS << "\t.option\tpic2\n"; } +void MipsTargetAsmStreamer::emitDirectiveInsn() { + MipsTargetStreamer::emitDirectiveInsn(); + OS << "\t.insn\n"; +} + void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg) { OS << "\t.frame\t$" @@ -507,9 +513,8 @@ void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol, const MCSymbol &RhsSym = static_cast<const MCSymbolRefExpr *>(Value)->getSymbol(); MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym); - uint8_t Type = MCELF::GetType(Data); - if ((Type != ELF::STT_FUNC) || - !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2))) + + if (!(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2))) return; MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol); @@ -637,6 +642,12 @@ void MipsTargetELFStreamer::emitDirectiveOptionPic2() { MCA.setELFHeaderEFlags(Flags); } +void MipsTargetELFStreamer::emitDirectiveInsn() { + MipsTargetStreamer::emitDirectiveInsn(); + MipsELFStreamer &MEF = static_cast<MipsELFStreamer &>(Streamer); + MEF.createPendingLabelRelocs(); +} + void MipsTargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg_) { MCContext &Context = getStreamer().getAssembler().getContext(); diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index abecfa0..5828fbd 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -143,25 +143,6 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions -void Mips16FrameLowering:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { - int64_t Amount = I->getOperand(0).getImm(); - - if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) - Amount = -Amount; - - const Mips16InstrInfo &TII = - *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo()); - - TII.adjustStackPtr(Mips::SP, Amount, MBB, I); - } - - MBB.erase(I); -} - bool Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h index 012d558..0287e59 100644 --- a/lib/Target/Mips/Mips16FrameLowering.h +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -26,10 +26,6 @@ public: void emitPrologue(MachineFunction &MF) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const override; - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 00d4495..a49572e 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -293,6 +293,9 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted( void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + if (Amount == 0) + return; + if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16> BuildAddiuSpImm(MBB, I, Amount); else diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index f9b7387..6540b40 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -77,7 +77,7 @@ public: /// Adjust SP by Amount bytes. void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock::iterator I) const override; /// Emit a series of instructions to load an immediate. // This is to adjust some FrameReg. We return the new register to be used diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 1eb3b2c..9024f21 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -260,31 +260,22 @@ void MipsAsmPrinter::printSavedRegsBitmask() { unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize(); bool HasAFGR64Reg = false; unsigned CSFPRegsSize = 0; - unsigned i, e = CSI.size(); - - // Set FPU Bitmask. - for (i = 0; i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (Mips::GPR32RegClass.contains(Reg)) - break; + for (const auto &I : CSI) { + unsigned Reg = I.getReg(); unsigned RegNum = TRI->getEncodingValue(Reg); - if (Mips::AFGR64RegClass.contains(Reg)) { + + // If it's a floating point register, set the FPU Bitmask. + // If it's a general purpose register, set the CPU Bitmask. + if (Mips::FGR32RegClass.contains(Reg)) { + FPUBitmask |= (1 << RegNum); + CSFPRegsSize += FGR32RegSize; + } else if (Mips::AFGR64RegClass.contains(Reg)) { FPUBitmask |= (3 << RegNum); CSFPRegsSize += AFGR64RegSize; HasAFGR64Reg = true; - continue; - } - - FPUBitmask |= (1 << RegNum); - CSFPRegsSize += FGR32RegSize; - } - - // Set CPU Bitmask. - for (; i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - unsigned RegNum = TRI->getEncodingValue(Reg); - CPUBitmask |= (1 << RegNum); + } else if (Mips::GPR32RegClass.contains(Reg)) + CPUBitmask |= (1 << RegNum); } // FP Regs are saved right below where the virtual frame pointer points to. diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 7de0081..e8e3d3d 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -440,7 +440,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) { bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) { const GlobalValue *GV = dyn_cast<GlobalValue>(V); - if (GV && isa<Function>(GV) && dyn_cast<Function>(GV)->isIntrinsic()) + if (GV && isa<Function>(GV) && cast<Function>(GV)->isIntrinsic()) return false; if (!GV) return false; diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 8b8b019..826fbaf 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -131,3 +131,20 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { return RoundUpToAlignment(Offset, getStackAlignment()); } + +// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions +void MipsFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + unsigned SP = STI.getABI().IsN64() ? Mips::SP_64 : Mips::SP; + + if (!hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + STI.getInstrInfo()->adjustStackPtr(SP, Amount, MBB, I); + } + + MBB.erase(I); +} diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index 90a8d2a..96d1e29 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -32,6 +32,11 @@ public: bool hasFP(const MachineFunction &MF) const override; + void + eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; + protected: uint64_t estimateStackSize(const MachineFunction &MF) const; }; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index e4bae03..f37737d 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3682,6 +3682,7 @@ void MipsTargetLowering::passByValArg( DAG.getIntPtrConstant(VA.getLocMemOffset())); Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy), Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); MemOpChains.push_back(Chain); } diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 7b2b289..4589535 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -117,6 +117,10 @@ public: const TargetRegisterInfo *TRI, int64_t Offset) const = 0; + virtual void adjustStackPtr(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const = 0; + /// Create an instruction which has the same operands and memory operands /// as MI but has a new opcode. MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc, diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index c937d2b..a1fad66 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -186,10 +186,8 @@ def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">, AssemblerPredicate<"FeatureMips16">; def HasCnMips : Predicate<"Subtarget->hasCnMips()">, AssemblerPredicate<"FeatureCnMips">; -def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">, - AssemblerPredicate<"FeatureMips32">; -def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">, - AssemblerPredicate<"FeatureMips32">; +def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; +def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def HasStdEnc : Predicate<"Subtarget->hasStandardEncoding()">, AssemblerPredicate<"!FeatureMips16">; @@ -1596,8 +1594,12 @@ def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>; } def : MipsInstAlias<"bnez $rs,$offset", (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; +def : MipsInstAlias<"bnezl $rs,$offset", + (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; def : MipsInstAlias<"beqz $rs,$offset", (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; +def : MipsInstAlias<"beqzl $rs,$offset", + (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; def : MipsInstAlias<"syscall", (SYSCALL 0), 1>; def : MipsInstAlias<"break", (BREAK 0, 0), 1>; diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 09e722d..0d1ee04 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -60,15 +60,7 @@ void MipsCallEntry::printCustom(raw_ostream &O) const { #endif } -MipsFunctionInfo::~MipsFunctionInfo() { - for (StringMap<const MipsCallEntry *>::iterator - I = ExternalCallEntries.begin(), E = ExternalCallEntries.end(); I != E; - ++I) - delete I->getValue(); - - for (const auto &Entry : GlobalCallEntries) - delete Entry.second; -} +MipsFunctionInfo::~MipsFunctionInfo() {} bool MipsFunctionInfo::globalBaseRegSet() const { return GlobalBaseReg; @@ -125,21 +117,21 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const { } MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) { - const MipsCallEntry *&E = ExternalCallEntries[Name]; + std::unique_ptr<const MipsCallEntry> &E = ExternalCallEntries[Name]; if (!E) - E = new MipsCallEntry(Name); + E = llvm::make_unique<MipsCallEntry>(Name); - return MachinePointerInfo(E); + return MachinePointerInfo(E.get()); } MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) { - const MipsCallEntry *&E = GlobalCallEntries[Val]; + std::unique_ptr<const MipsCallEntry> &E = GlobalCallEntries[Val]; if (!E) - E = new MipsCallEntry(Val); + E = llvm::make_unique<MipsCallEntry>(Val); - return MachinePointerInfo(E); + return MachinePointerInfo(E.get()); } int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) { diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 217f307..32436ef 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -144,8 +144,9 @@ private: int MoveF64ViaSpillFI; /// MipsCallEntry maps. - StringMap<const MipsCallEntry *> ExternalCallEntries; - ValueMap<const GlobalValue *, const MipsCallEntry *> GlobalCallEntries; + StringMap<std::unique_ptr<const MipsCallEntry>> ExternalCallEntries; + ValueMap<const GlobalValue *, std::unique_ptr<const MipsCallEntry>> + GlobalCallEntries; }; } // end of namespace llvm diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h index dc29cbd..746feab 100644 --- a/lib/Target/Mips/MipsOptionRecord.h +++ b/lib/Target/Mips/MipsOptionRecord.h @@ -52,7 +52,7 @@ public: COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID)); COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID)); } - ~MipsRegInfoRecord() {} + ~MipsRegInfoRecord() override {} void EmitMipsOptionRecord() override; void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo); diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 7c79c4c..23feb5c 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -607,26 +607,6 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { !MFI->hasVarSizedObjects(); } -// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions -void MipsSEFrameLowering:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const MipsSEInstrInfo &TII = - *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo()); - - if (!hasReservedCallFrame(MF)) { - int64_t Amount = I->getOperand(0).getImm(); - - if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) - Amount = -Amount; - - unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; - TII.adjustStackPtr(SP, Amount, MBB, I); - } - - MBB.erase(I); -} - void MipsSEFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 0eca1df..22448a4 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -27,10 +27,6 @@ public: void emitPrologue(MachineFunction &MF) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const override; - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index a598c3f..6daa632 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -948,7 +948,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, llvm_unreachable("Unexpected asm memory constraint"); // All memory constraints can at least accept raw pointers. case InlineAsm::Constraint_i: - case InlineAsm::Constraint_R: OutOps.push_back(Op); OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); return false; @@ -961,6 +960,20 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, OutOps.push_back(Op); OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); return false; + case InlineAsm::Constraint_R: + // The 'R' constraint is supposed to be much more complicated than this. + // However, it's becoming less useful due to architectural changes and + // ought to be replaced by other constraints such as 'ZC'. + // For now, support 9-bit signed offsets which is supportable by all + // subtargets for all instructions. + if (selectAddrRegImm9(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); + return false; case InlineAsm::Constraint_ZC: // ZC matches whatever the pref, ll, and sc instructions can handle for the // given subtarget. diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index b992579..cb38393 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -364,6 +364,9 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; + if (Amount == 0) + return; + if (isInt<16>(Amount))// addi sp, sp, amount BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); else { // Expand immediate that doesn't fit in 16-bit. diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index d16fab2..bebbabf 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -68,7 +68,7 @@ public: /// Adjust SP by Amount bytes. void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock::iterator I) const override; /// Emit a series of instructions to load an immediate. If NewImm is a /// non-NULL parameter, the last instruction is not emitted, but instead diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 1ff041d..22b0c6c 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -45,6 +45,7 @@ public: virtual void emitDirectiveNaNLegacy(); virtual void emitDirectiveOptionPic0(); virtual void emitDirectiveOptionPic2(); + virtual void emitDirectiveInsn(); virtual void emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg); virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff); @@ -160,6 +161,7 @@ public: void emitDirectiveNaNLegacy() override; void emitDirectiveOptionPic0() override; void emitDirectiveOptionPic2() override; + void emitDirectiveInsn() override; void emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg) override; void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override; @@ -227,6 +229,7 @@ public: void emitDirectiveNaNLegacy() override; void emitDirectiveOptionPic0() override; void emitDirectiveOptionPic2() override; + void emitDirectiveInsn() override; void emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg) override; void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override; diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp index 80b2f62..ac92df9 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -28,13 +28,9 @@ using namespace llvm; #include "NVPTXGenAsmWriter.inc" - NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - setAvailableFeatures(STI.getFeatureBits()); -} + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { // Decode the virtual register @@ -72,7 +68,7 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { printInstruction(MI, OS); // Next always print the annotation. diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h index 0496964..02c5a21 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h @@ -25,10 +25,11 @@ class MCSubtargetInfo; class NVPTXInstPrinter : public MCInstPrinter { public: NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 2b4d864..f9e4324 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -58,14 +58,13 @@ static MCCodeGenInfo *createNVPTXMCCodeGenInfo( return X; } -static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T, +static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) - return new NVPTXInstPrinter(MAI, MII, MRI, STI); + return new NVPTXInstPrinter(MAI, MII, MRI); return nullptr; } diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td index 93fabf6..96abfa8 100644 --- a/lib/Target/NVPTX/NVPTX.td +++ b/lib/Target/NVPTX/NVPTX.td @@ -32,20 +32,28 @@ def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", "Target SM 2.1">; def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30", "Target SM 3.0">; +def SM32 : SubtargetFeature<"sm_32", "SmVersion", "32", + "Target SM 3.2">; def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35", "Target SM 3.5">; +def SM37 : SubtargetFeature<"sm_37", "SmVersion", "37", + "Target SM 3.7">; def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50", "Target SM 5.0">; +def SM52 : SubtargetFeature<"sm_52", "SmVersion", "52", + "Target SM 5.2">; +def SM53 : SubtargetFeature<"sm_53", "SmVersion", "53", + "Target SM 5.3">; // PTX Versions -def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30", - "Use PTX version 3.0">; -def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", - "Use PTX version 3.1">; def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32", "Use PTX version 3.2">; def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40", "Use PTX version 4.0">; +def PTX41 : SubtargetFeature<"ptx41", "PTXVersion", "41", + "Use PTX version 4.1">; +def PTX42 : SubtargetFeature<"ptx42", "PTXVersion", "42", + "Use PTX version 4.2">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -57,8 +65,12 @@ class Proc<string Name, list<SubtargetFeature> Features> def : Proc<"sm_20", [SM20]>; def : Proc<"sm_21", [SM21]>; def : Proc<"sm_30", [SM30]>; +def : Proc<"sm_32", [SM32, PTX40]>; def : Proc<"sm_35", [SM35]>; -def : Proc<"sm_50", [SM50]>; +def : Proc<"sm_37", [SM37, PTX41]>; +def : Proc<"sm_50", [SM50, PTX40]>; +def : Proc<"sm_52", [SM52, PTX41]>; +def : Proc<"sm_53", [SM53, PTX42]>; def NVPTXInstrInfo : InstrInfo { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index cc58b07..9a71964 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -118,7 +118,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { DebugLoc curLoc = MI.getDebugLoc(); - if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) + if (!prevDebugLoc && !curLoc) return; if (prevDebugLoc == curLoc) @@ -126,39 +126,32 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { prevDebugLoc = curLoc; - if (curLoc.isUnknown()) + if (!curLoc) return; - const MachineFunction *MF = MI.getParent()->getParent(); - //const TargetMachine &TM = MF->getTarget(); - - const LLVMContext &ctx = MF->getFunction()->getContext(); - DIScope Scope(curLoc.getScope(ctx)); - - assert((!Scope || Scope.isScope()) && - "Scope of a DebugLoc should be null or a DIScope."); + auto *Scope = cast_or_null<MDScope>(curLoc.getScope()); if (!Scope) return; - StringRef fileName(Scope.getFilename()); - StringRef dirName(Scope.getDirectory()); + StringRef fileName(Scope->getFilename()); + StringRef dirName(Scope->getDirectory()); SmallString<128> FullPathName = dirName; if (!dirName.empty() && !sys::path::is_absolute(fileName)) { sys::path::append(FullPathName, fileName); - fileName = FullPathName.str(); + fileName = FullPathName; } - if (filenameMap.find(fileName.str()) == filenameMap.end()) + if (filenameMap.find(fileName) == filenameMap.end()) return; // Emit the line from the source file. if (InterleaveSrc) - this->emitSrcInText(fileName.str(), curLoc.getLine()); + this->emitSrcInText(fileName, curLoc.getLine()); std::stringstream temp; - temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() + temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine() << " " << curLoc.getCol(); - OutStreamer.EmitRawText(Twine(temp.str().c_str())); + OutStreamer.EmitRawText(temp.str()); } void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -641,7 +634,7 @@ static bool usedInGlobalVarDef(const Constant *C) { return false; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { - if (GV->getName().str() == "llvm.used") + if (GV->getName() == "llvm.used") return false; return true; } @@ -656,7 +649,7 @@ static bool usedInGlobalVarDef(const Constant *C) { static bool usedInOneFunc(const User *U, Function const *&oneFunc) { if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { - if (othergv->getName().str() == "llvm.used") + if (othergv->getName() == "llvm.used") return true; } @@ -780,32 +773,32 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { DbgFinder.processModule(M); unsigned i = 1; - for (DICompileUnit DIUnit : DbgFinder.compile_units()) { - StringRef Filename(DIUnit.getFilename()); - StringRef Dirname(DIUnit.getDirectory()); + for (const MDCompileUnit *DIUnit : DbgFinder.compile_units()) { + StringRef Filename = DIUnit->getFilename(); + StringRef Dirname = DIUnit->getDirectory(); SmallString<128> FullPathName = Dirname; if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { sys::path::append(FullPathName, Filename); - Filename = FullPathName.str(); + Filename = FullPathName; } - if (filenameMap.find(Filename.str()) != filenameMap.end()) + if (filenameMap.find(Filename) != filenameMap.end()) continue; - filenameMap[Filename.str()] = i; - OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); + filenameMap[Filename] = i; + OutStreamer.EmitDwarfFileDirective(i, "", Filename); ++i; } - for (DISubprogram SP : DbgFinder.subprograms()) { - StringRef Filename(SP.getFilename()); - StringRef Dirname(SP.getDirectory()); + for (MDSubprogram *SP : DbgFinder.subprograms()) { + StringRef Filename = SP->getFilename(); + StringRef Dirname = SP->getDirectory(); SmallString<128> FullPathName = Dirname; if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { sys::path::append(FullPathName, Filename); - Filename = FullPathName.str(); + Filename = FullPathName; } - if (filenameMap.find(Filename.str()) != filenameMap.end()) + if (filenameMap.find(Filename) != filenameMap.end()) continue; - filenameMap[Filename.str()] = i; + filenameMap[Filename] = i; ++i; } } @@ -1011,7 +1004,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, msg.append("Error: "); msg.append("Symbol "); if (V->hasName()) - msg.append(V->getName().str()); + msg.append(V->getName()); msg.append("has unsupported appending linkage type"); llvm_unreachable(msg.c_str()); } else if (!V->hasInternalLinkage() && @@ -1147,7 +1140,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, const Function *demotedFunc = nullptr; if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { - O << "// " << GVar->getName().str() << " has been demoted\n"; + O << "// " << GVar->getName() << " has been demoted\n"; if (localDecls.find(demotedFunc) != localDecls.end()) localDecls[demotedFunc].push_back(GVar); else { @@ -1195,9 +1188,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, // The frontend adds zero-initializer to variables that don't have an // initial value, so skip warning for this case. if (!GVar->getInitializer()->isNullValue()) { - std::string warnMsg = "initial value of '" + GVar->getName().str() + - "' is not allowed in addrspace(" + - llvm::utostr_32(PTy->getAddressSpace()) + ")"; + std::string warnMsg = + ("initial value of '" + GVar->getName() + + "' is not allowed in addrspace(" + + Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str(); report_fatal_error(warnMsg.c_str()); } } @@ -1771,12 +1765,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, case Type::IntegerTyID: { const Type *ETy = CPV->getType(); if (ETy == Type::getInt8Ty(CPV->getContext())) { - unsigned char c = - (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); ptr = &c; aggBuffer->addBytes(ptr, 1, Bytes); } else if (ETy == Type::getInt16Ty(CPV->getContext())) { - short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); ptr = (unsigned char *)&int16; aggBuffer->addBytes(ptr, 2, Bytes); } else if (ETy == Type::getInt32Ty(CPV->getContext())) { @@ -2086,7 +2079,7 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { std::stringstream temp; - LineReader *reader = this->getReader(filename.str()); + LineReader *reader = this->getReader(filename); temp << "\n//"; temp << filename.str(); temp << ":"; @@ -2094,7 +2087,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { temp << " "; temp << reader->readLine(line); temp << "\n"; - this->OutStreamer.EmitRawText(Twine(temp.str())); + this->OutStreamer.EmitRawText(temp.str()); } LineReader *NVPTXAsmPrinter::getReader(std::string filename) { diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index 6d7c99c..ae63cae 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -132,9 +132,8 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( } else { // GEP is a constant expression. Constant *NewGEPCE = ConstantExpr::getGetElementPtr( - cast<Constant>(Cast->getOperand(0)), - Indices, - GEP->isInBounds()); + GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)), + Indices, GEP->isInBounds()); GEP->replaceAllUsesWith( ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); } diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 850c020..6fd09c4 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -347,6 +347,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, NewOperands[0], makeArrayRef(&NewOperands[1], NumOperands - 1)) : Builder.CreateInBoundsGEP( + cast<GEPOperator>(C)->getSourceElementType(), NewOperands[0], makeArrayRef(&NewOperands[1], NumOperands - 1)); case Instruction::Select: diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index ff74e6e..8b06657 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3893,7 +3893,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, const SDNode *left = N0.getOperand(0).getNode(); const SDNode *right = N0.getOperand(1).getNode(); - if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right)) + if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) opIsLive = true; if (!opIsLive) diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 578401a..6ab0fad 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -70,8 +70,8 @@ static void convertTransferToLoop( // srcAddr and dstAddr are expected to be pointer types, // so no check is made here. - unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); - unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace(); + unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointers to (char *) srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); @@ -84,9 +84,11 @@ static void convertTransferToLoop( ind->addIncoming(ConstantInt::get(indType, 0), origBB); // load from srcAddr+ind - Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile); + Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind), + srcVolatile); // store at dstAddr+ind - loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile); + loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind), + dstVolatile); // The value for ind coming from backedge is (ind + 1) Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1)); @@ -106,7 +108,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, origBB->getTerminator()->setSuccessor(0, loopBB); IRBuilder<> builder(origBB, origBB->getTerminator()); - unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointer to the type of value getting stored dstAddr = @@ -116,7 +118,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, PHINode *ind = loop.CreatePHI(len->getType(), 0); ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB); - loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false); + loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false); Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1)); ind->addIncoming(newind, loopBB); diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index b8df5af..2cd10e8 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -52,7 +52,7 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; // Emission of machine code through MCJIT is not supported. - bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, + bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_pwrite_stream &, bool = true) override { return true; } diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index b8af04d..dc81802 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXTargetTransformInfo.h" +#include "NVPTXUtilities.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -19,6 +20,75 @@ using namespace llvm; #define DEBUG_TYPE "NVPTXtti" +// Whether the given intrinsic reads threadIdx.x/y/z. +static bool readsThreadIndex(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::nvvm_read_ptx_sreg_tid_x: + case Intrinsic::nvvm_read_ptx_sreg_tid_y: + case Intrinsic::nvvm_read_ptx_sreg_tid_z: + return true; + } +} + +static bool readsLaneId(const IntrinsicInst *II) { + return II->getIntrinsicID() == Intrinsic::ptx_read_laneid; +} + +// Whether the given intrinsic is an atomic instruction in PTX. +static bool isNVVMAtomic(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::nvvm_atomic_load_add_f32: + case Intrinsic::nvvm_atomic_load_inc_32: + case Intrinsic::nvvm_atomic_load_dec_32: + return true; + } +} + +bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { + // Without inter-procedural analysis, we conservatively assume that arguments + // to __device__ functions are divergent. + if (const Argument *Arg = dyn_cast<Argument>(V)) + return !isKernelFunction(*Arg->getParent()); + + if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Without pointer analysis, we conservatively assume values loaded from + // generic or local address space are divergent. + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + unsigned AS = LI->getPointerAddressSpace(); + return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL; + } + // Atomic instructions may cause divergence. Atomic instructions are + // executed sequentially across all threads in a warp. Therefore, an earlier + // executed thread may see different memory inputs than a later executed + // thread. For example, suppose *a = 0 initially. + // + // atom.global.add.s32 d, [a], 1 + // + // returns 0 for the first thread that enters the critical region, and 1 for + // the second thread. + if (I->isAtomic()) + return true; + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + // Instructions that read threadIdx are obviously divergent. + if (readsThreadIndex(II) || readsLaneId(II)) + return true; + // Handle the NVPTX atomic instrinsics that cannot be represented as an + // atomic IR instruction. + if (isNVVMAtomic(II)) + return true; + } + // Conservatively consider the return value of function calls as divergent. + // We could analyze callees with bodies more precisely using + // inter-procedural analysis. + if (isa<CallInst>(I)) + return true; + } + + return false; +} + unsigned NVPTXTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index bf21e88..4280888 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -61,6 +61,8 @@ public: bool hasBranchDivergence() { return true; } + bool isSourceOfDivergence(const Value *V); + unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 99a1633..90ab7a5 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1071,6 +1071,58 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, Inst = TmpInst; break; } + case PPC::RLWINMbm: + case PPC::RLWINMobm: { + unsigned MB, ME; + int64_t BM = Inst.getOperand(3).getImm(); + if (!isRunOfOnes(BM, MB, ME)) + break; + + MCInst TmpInst; + TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(MCOperand::CreateImm(MB)); + TmpInst.addOperand(MCOperand::CreateImm(ME)); + Inst = TmpInst; + break; + } + case PPC::RLWIMIbm: + case PPC::RLWIMIobm: { + unsigned MB, ME; + int64_t BM = Inst.getOperand(3).getImm(); + if (!isRunOfOnes(BM, MB, ME)) + break; + + MCInst TmpInst; + TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); // The tied operand. + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(MCOperand::CreateImm(MB)); + TmpInst.addOperand(MCOperand::CreateImm(ME)); + Inst = TmpInst; + break; + } + case PPC::RLWNMbm: + case PPC::RLWNMobm: { + unsigned MB, ME; + int64_t BM = Inst.getOperand(3).getImm(); + if (!isRunOfOnes(BM, MB, ME)) + break; + + MCInst TmpInst; + TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(MCOperand::CreateImm(MB)); + TmpInst.addOperand(MCOperand::CreateImm(ME)); + Inst = TmpInst; + break; + } } } diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index a9f5fc7..5cbf3d9 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -25,7 +25,7 @@ class PPCDisassembler : public MCDisassembler { public: PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - virtual ~PPCDisassembler() {} + ~PPCDisassembler() override {} DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 311a4f2..1576544 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -51,7 +51,7 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 8718743..eca37eb 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -32,7 +32,8 @@ public: } void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index bea88a2..420c5c8 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -208,7 +208,7 @@ namespace { public: DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCMachObjectWriter( OS, @@ -224,8 +224,7 @@ namespace { ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) : PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index b817394..3e3489f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -412,7 +412,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, } } -MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, bool IsLittleEndian, uint8_t OSABI) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index b9f0afb..725b47b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -44,7 +44,7 @@ public: : MCII(mcii), CTX(ctx), IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {} - ~PPCMCCodeEmitter() {} + ~PPCMCCodeEmitter() override {} unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 2f7a768..423e427 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -238,14 +238,12 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { return new PPCTargetMachOStreamer(S); } -static MCInstPrinter *createPPCMCInstPrinter(const Target &T, +static MCInstPrinter *createPPCMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin(); - return new PPCInstPrinter(MAI, MII, MRI, isDarwin); + const MCRegisterInfo &MRI) { + return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin()); } extern "C" void LLVMInitializePowerPCTargetMC() { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 8b1e3b4..5f2117c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -18,6 +18,7 @@ #undef PPC #include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" namespace llvm { class MCAsmBackend; @@ -29,6 +30,7 @@ class MCRegisterInfo; class MCSubtargetInfo; class Target; class StringRef; +class raw_pwrite_stream; class raw_ostream; extern Target ThePPC32Target; @@ -42,15 +44,42 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -/// createPPCELFObjectWriter - Construct an PPC ELF object writer. -MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, - bool Is64Bit, - bool IsLittleEndian, - uint8_t OSABI); -/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer. -MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, +/// Construct an PPC ELF object writer. +MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, + bool IsLittleEndian, uint8_t OSABI); +/// Construct a PPC Mach-O object writer. +MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype); + +/// Returns true iff Val consists of one contiguous run of 1s with any number of +/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so +/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not, +/// since all 1s are not contiguous. +static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { + if (!Val) + return false; + + if (isShiftedMask_32(Val)) { + // look for the first non-zero bit + MB = countLeadingZeros(Val); + // look for the first zero bit after the run of ones + ME = countLeadingZeros((Val - 1) ^ Val); + return true; + } else { + Val = ~Val; // invert mask + if (isShiftedMask_32(Val)) { + // effectively look for the first zero bit + ME = countLeadingZeros(Val) - 1; + // effectively look for the first one bit after the run of zeros + MB = countLeadingZeros((Val - 1) ^ Val) + 1; + return true; + } + } + // no run present + return false; +} + } // End llvm namespace // Generated files will use "namespace PPC". To avoid symbol clash, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index f7259b9..44e69b7 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -378,8 +378,8 @@ void PPCMachObjectWriter::RecordPPCRelocation( Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, - uint32_t CPUType, +MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { return createMachObjectWriter( new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS, diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index f175f6d..1a02bcc 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -86,6 +86,10 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", "Enable the popcnt[dw] instructions">; +def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true", + "Enable the bpermd instruction">; +def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true", + "Enable extended divide instructions">; def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", "Enable the ldbrx instruction">; def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true", @@ -118,6 +122,10 @@ def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true", def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; +def FeatureDirectMove : + SubtargetFeature<"direct-move", "HasDirectMove", "true", + "Enable Power8 direct move instructions", + [FeatureVSX]>; def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", "HasPartwordAtomics", "true", "Enable l[bh]arx and st[bh]cx.">; @@ -133,6 +141,38 @@ def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", "Treat vector data stream cache control instructions as deprecated">; +/* Since new processors generally contain a superset of features of those that + came before them, the idea is to make implementations of new processors + less error prone and easier to read. + Namely: + list<SubtargetFeature> Power8FeatureList = ... + list<SubtargetFeature> FutureProcessorSpecificFeatureList = + [ features that Power8 does not support ] + list<SubtargetFeature> FutureProcessorFeatureList = + !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) + + Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as + well as providing a single point of definition if the feature set will be + used elsewhere. +*/ +def ProcessorFeatures { + list<SubtargetFeature> Power7FeatureList = + [DirectivePwr7, FeatureAltivec, FeatureVSX, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, + FeatureBPERMD, FeatureExtDiv, + DeprecatedMFTB, DeprecatedDST]; + list<SubtargetFeature> Power8SpecificFeatures = + [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, + FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic]; + list<SubtargetFeature> Power8FeatureList = + !listconcat(Power7FeatureList, Power8SpecificFeatures); +} + // Note: Future features to add when support is extended to more // recent ISA levels: // @@ -243,33 +283,6 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec, def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec, FeatureFRES, FeatureFRSQRTE]>; -/* Since new processors generally contain a superset of features of those that - came before them, the idea is to make implementations of new processors - less error prone and easier to read. - Namely: - list<SubtargetFeature> Power8FeatureList = ... - list<SubtargetFeature> FutureProcessorSpecificFeatureList = - [ features that Power8 does not support ] - list<SubtargetFeature> FutureProcessorFeatureList = - !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) - - Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as - well as providing a single point of definition if the feature set will be - used elsewhere. - -*/ -def ProcessorFeatures { - list<SubtargetFeature> Power8FeatureList = - [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX, - FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, - FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM, - FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto, - Feature64Bit /*, Feature64BitRegs */, FeatureICBT, - FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]; -} - def : ProcessorModel<"970", G5Model, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, @@ -339,15 +352,7 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB, FeatureFPRND, Feature64Bit, DeprecatedMFTB, DeprecatedDST]>; -def : ProcessorModel<"pwr7", P7Model, - [DirectivePwr7, FeatureAltivec, FeatureVSX, - FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, - FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic, - DeprecatedMFTB, DeprecatedDST]>; +def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : ProcessorModel<"ppc64", G5Model, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index cd60906..383a1e2 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1105,25 +1105,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { } } - MachineModuleInfoELF &MMIELF = - MMI->getObjFileInfo<MachineModuleInfoELF>(); - - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .long _foo - OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(), - OutContext), - isPPC64 ? 8 : 4/*size*/); - } - - Stubs.clear(); - OutStreamer.AddBlankLine(); - } - return AsmPrinter::doFinalization(M); } diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index fbd7b6d..002616b 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -958,6 +958,8 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, } // Attempt to fast-select an integer-to-floating-point conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { MVT DstVT; Type *DstTy = I->getType(); @@ -1065,6 +1067,8 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, } // Attempt to fast-select a floating-point-to-integer conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { MVT DstVT, SrcVT; Type *DstTy = I->getType(); @@ -1444,6 +1448,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && RetVT != MVT::i8) return false; + else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits()) + // We can't handle boolean returns when CR bits are in use. + return false; // FIXME: No multi-register return values yet. if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 && diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3ac8e94..4f8d01b 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -105,13 +105,6 @@ namespace { return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy()); } - /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s - /// with any number of 0s on either side. The 1s are allowed to wrap from - /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. - /// 0x0F0F0000 is not, since all 1s are not contiguous. - static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME); - - /// isRotateAndMask - Returns true if Mask and Shift can be folded into a /// rotate and mask opcode and mask operation. static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, @@ -418,30 +411,6 @@ SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { getSmallIPtrImm(Offset)); } -bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { - if (!Val) - return false; - - if (isShiftedMask_32(Val)) { - // look for the first non-zero bit - MB = countLeadingZeros(Val); - // look for the first zero bit after the run of ones - ME = countLeadingZeros((Val - 1) ^ Val); - return true; - } else { - Val = ~Val; // invert mask - if (isShiftedMask_32(Val)) { - // effectively look for the first zero bit - ME = countLeadingZeros(Val) - 1; - // effectively look for the first one bit after the run of zeros - MB = countLeadingZeros((Val - 1) ^ Val) + 1; - return true; - } - } - // no run present - return false; -} - bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 871531e..4c0b6a6 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -996,6 +996,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; + case PPCISD::MFVSR: return "PPCISD::MFVSR"; + case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; + case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; @@ -1287,22 +1290,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -/// isAllNegativeZeroVector - Returns true if all elements of build_vector -/// are -0.0. -bool PPC::isAllNegativeZeroVector(SDNode *N) { - BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); - - APInt APVal, APUndef; - unsigned BitSize; - bool HasAnyUndefs; - - if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) - return CFP->getValueAPF().isNegZero(); - - return false; -} - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -2234,7 +2221,7 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(12, MVT::i32), 8, false, true, + DAG.getConstant(12, MVT::i32), 8, false, true, false, MachinePointerInfo(), MachinePointerInfo()); } @@ -3821,7 +3808,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, SDLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - false, false, MachinePointerInfo(), + false, false, false, MachinePointerInfo(), MachinePointerInfo()); } @@ -5927,8 +5914,46 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, RLI.MPI = MPI; } +/// \brief Custom lowers floating point to integer conversions to use +/// the direct move instructions available in ISA 2.07 to avoid the +/// need for load/store combinations. +SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, + SelectionDAG &DAG, + SDLoc dl) const { + assert(Op.getOperand(0).getValueType().isFloatingPoint()); + SDValue Src = Op.getOperand(0); + + if (Src.getValueType() == MVT::f32) + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + + SDValue Tmp; + switch (Op.getSimpleValueType().SimpleTy) { + default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); + case MVT::i32: + Tmp = DAG.getNode( + Op.getOpcode() == ISD::FP_TO_SINT + ? PPCISD::FCTIWZ + : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), + dl, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); + break; + case MVT::i64: + assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && + "i64 FP_TO_UINT is supported only with FPCVT"); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : + PPCISD::FCTIDUZ, + dl, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); + break; + } + return Tmp; +} + SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const { + if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) + return LowerFP_TO_INTDirectMove(Op, DAG, dl); + ReuseLoadInfo RLI; LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); @@ -6006,6 +6031,38 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain, DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } +/// \brief Custom lowers integer to floating point conversions to use +/// the direct move instructions available in ISA 2.07 to avoid the +/// need for load/store combinations. +SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, + SelectionDAG &DAG, + SDLoc dl) const { + assert((Op.getValueType() == MVT::f32 || + Op.getValueType() == MVT::f64) && + "Invalid floating point type as target of conversion"); + assert(Subtarget.hasFPCVT() && + "Int to FP conversions with direct moves require FPCVT"); + SDValue FP; + SDValue Src = Op.getOperand(0); + bool SinglePrec = Op.getValueType() == MVT::f32; + bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; + bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; + unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : + (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); + + if (WordInt) { + FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, + dl, MVT::f64, Src); + FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); + } + else { + FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); + FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); + } + + return FP; +} + SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -6041,6 +6098,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, DAG.getConstantFP(1.0, Op.getValueType()), DAG.getConstantFP(0.0, Op.getValueType())); + // If we have direct moves, we can do all the conversion, skip the store/load + // however, without FPCVT we can't do most conversions. + if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT()) + return LowerINT_TO_FPDirectMove(Op, DAG, dl); + assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); @@ -6609,7 +6671,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, 0, true) || SplatBitSize > 32) + HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || + SplatBitSize > 32) return SDValue(); unsigned SplatBits = APSplatBits.getZExtValue(); @@ -6676,22 +6739,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } - // The remaining cases assume either big endian element order or - // a splat-size that equates to the element size of the vector - // to be built. An example that doesn't work for little endian is - // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits - // and a vector element size of 16 bits. The code below will - // produce the vector in big endian element order, which for little - // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. - - // For now, just avoid these optimizations in that case. - // FIXME: Develop correct optimizations for LE with mismatched - // splat and element sizes. - - if (Subtarget.isLittleEndian() && - SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) - return SDValue(); - // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, @@ -7733,6 +7780,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. if (N->getOperand(0).getValueType() == MVT::ppcf128) return; @@ -11023,21 +11071,23 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { - const Function *F = MF.getFunction(); - // When expanding a memset, require at least two QPX instructions to cover - // the cost of loading the value to be stored from the constant pool. - if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && - (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && - !F->hasFnAttribute(Attribute::NoImplicitFloat)) { - return MVT::v4f64; - } - - // We should use Altivec/VSX loads and stores when available. For unaligned - // addresses, unaligned VSX loads are only fast starting with the P8. - if (Subtarget.hasAltivec() && Size >= 16 && - (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || - ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) - return MVT::v4i32; + if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { + const Function *F = MF.getFunction(); + // When expanding a memset, require at least two QPX instructions to cover + // the cost of loading the value to be stored from the constant pool. + if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && + (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && + !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + return MVT::v4f64; + } + + // We should use Altivec/VSX loads and stores when available. For unaligned + // addresses, unaligned VSX loads are only fast starting with the P8. + if (Subtarget.hasAltivec() && Size >= 16 && + (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || + ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) + return MVT::v4i32; + } if (Subtarget.isPPC64()) { return MVT::i64; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 8afd7ef..7e2ebd4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -119,6 +119,15 @@ namespace llvm { /// resultant GPR. Bits corresponding to other CR regs are undefined. MFOCRF, + /// Direct move from a VSX register to a GPR + MFVSR, + + /// Direct move from a GPR to a VSX register (algebraic) + MTVSRA, + + /// Direct move from a GPR to a VSX register (zero) + MTVSRZ, + // FIXME: Remove these once the ANDI glue bug is fixed: /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the /// eq or gt bit of CR0 after executing andi. x, 1. This is used to @@ -368,10 +377,6 @@ namespace llvm { /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); - /// isAllNegativeZeroVector - Returns true if all elements of build_vector - /// are -0.0. - bool isAllNegativeZeroVector(SDNode *N); - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); @@ -649,6 +654,10 @@ namespace llvm { void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, SDLoc dl) const; + SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, + SDLoc dl) const; + SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, + SDLoc dl) const; SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 183d088..d1d67cb 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -603,6 +603,10 @@ defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), "popcntd $rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctpop i64:$rS))]>; +def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "bpermd $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>, + isPPC64, Requires<[HasBPERMD]>; let isCodeGenOnly = 1, isCommutable = 1 in def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), @@ -616,14 +620,30 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS), "popcntw $rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctpop i32:$rS))]>; -defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divd", "$rT, $rA, $rB", IIC_IntDivD, - [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, - PPC970_DGroup_First, PPC970_DGroup_Cracked; -defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divdu", "$rT, $rA, $rB", IIC_IntDivD, - [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, - PPC970_DGroup_First, PPC970_DGroup_Cracked; +defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divd", "$rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64; +defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdu", "$rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64; +def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divde $rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>, + isPPC64, Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divde. $rT, $rA, $rB", IIC_IntDivD, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + isPPC64, Requires<[HasExtDiv]>; +def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdeu $rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>, + isPPC64, Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdeu. $rT, $rA, $rB", IIC_IntDivD, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + isPPC64, Requires<[HasExtDiv]>; let isCommutable = 1 in defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "mulld", "$rT, $rA, $rB", IIC_IntMulHD, diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index b7a7a1f..43c2158 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -764,6 +764,12 @@ class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = XT{5}; } +class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let B = 0; +} + class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : I<opcode, OOL, IOL, asmstr, itin> { diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 5eff156..8aecb65 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -726,6 +726,8 @@ def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; +def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">; +def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -802,6 +804,23 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, } } +// Multiclass for instructions for which the non record form is not cracked +// and the record form is cracked (i.e. divw, mullw, etc.) +multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel, PPC970_DGroup_First, + PPC970_DGroup_Cracked; + } +} + multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list<dag> pattern> { @@ -2300,14 +2319,30 @@ defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, PPC970_DGroup_Cracked; -defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divw", "$rT, $rA, $rB", IIC_IntDivW, - [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, - PPC970_DGroup_First, PPC970_DGroup_Cracked; -defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divwu", "$rT, $rA, $rB", IIC_IntDivW, - [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, - PPC970_DGroup_First, PPC970_DGroup_Cracked; +defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divw", "$rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>; +defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwu", "$rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>; +def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwe $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>, + Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwe. $rT, $rA, $rB", IIC_IntDivW, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + Requires<[HasExtDiv]>; +def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divweu $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>, + Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divweu. $rT, $rA, $rB", IIC_IntDivW, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + Requires<[HasExtDiv]>; let isCommutable = 1 in { defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "mulhw", "$rT, $rA, $rB", IIC_IntMulHW, @@ -3726,6 +3761,19 @@ def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0) def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; +def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; + // These generic branch instruction forms are used for the assembler parser only. // Defs and Uses are conservative, since we don't know the BO value. let PPC970_Unit = 7 in { diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index ec04da4..a98e58f 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -41,6 +41,9 @@ def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; +def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; +def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; +def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, @@ -946,6 +949,7 @@ def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), when the elements are larger than i32. */ def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; +def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; let Predicates = [HasP8Vector] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. let isCommutable = 1 in { @@ -965,3 +969,24 @@ def XXLORC : XX3Form<60, 170, [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; } // AddedComplexity = 500 } // HasP8Vector + +let Predicates = [HasDirectMove, HasVSX] in { +// VSX direct move instructions +def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + [(set i64:$rA, (PPCmfvsr f64:$XT))]>, + Requires<[In64BitMode]>; +def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + [(set i32:$rA, (PPCmfvsr f64:$XT))]>; +def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i64:$rA))]>, + Requires<[In64BitMode]>; +def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i32:$rA))]>; +def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; +} // HasDirectMove, HasVSX diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp index 005bcaf..2947c66 100644 --- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp +++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "ppc-loop-data-prefetch" #include "PPC.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" @@ -110,11 +111,9 @@ bool PPCLoopDataPrefetch::runOnFunction(Function &F) { bool MadeChange = false; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); - I != E; ++I) { - Loop *L = *I; - MadeChange |= runOnLoop(L); - } + for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) + for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) + MadeChange |= runOnLoop(*L); return MadeChange; } diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 092a4ef..b6e7799 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "ppc-loop-preinc-prep" #include "PPC.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -143,11 +144,9 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { bool MadeChange = false; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); - I != E; ++I) { - Loop *L = *I; - MadeChange |= runOnLoop(L); - } + for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) + for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) + MadeChange |= runOnLoop(*L); return MadeChange; } @@ -159,16 +158,15 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!L->empty()) return MadeChange; + DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); + BasicBlock *Header = L->getHeader(); const PPCSubtarget *ST = TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; - unsigned HeaderLoopPredCount = 0; - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - ++HeaderLoopPredCount; - } + unsigned HeaderLoopPredCount = + std::distance(pred_begin(Header), pred_end(Header)); // Collect buckets of comparable addresses used by loads and stores. typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket; @@ -205,9 +203,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (L->isLoopInvariant(PtrValue)) continue; - const SCEV *LSCEV = SE->getSCEV(PtrValue); - if (!isa<SCEVAddRecExpr>(LSCEV)) + const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); + if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) { + if (LARSCEV->getLoop() != L) + continue; + } else { continue; + } bool FoundBucket = false; for (unsigned i = 0, e = Buckets.size(); i != e; ++i) @@ -236,11 +238,16 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { // returns a value (which might contribute to determining the loop's // iteration space), insert a new preheader for the loop. if (!LoopPredecessor || - !LoopPredecessor->getTerminator()->getType()->isVoidTy()) + !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { LoopPredecessor = InsertPreheaderForLoop(L, this); + if (LoopPredecessor) + MadeChange = true; + } if (!LoopPredecessor) return MadeChange; + DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); + SmallSet<BasicBlock *, 16> BBChanged; for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { // The base address of each bucket is transformed into a phi and the others @@ -251,6 +258,10 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!BasePtrSCEV->isAffine()) continue; + DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); + assert(BasePtrSCEV->getLoop() == L && + "AddRec for the wrong loop?"); + Instruction *MemI = Buckets[i].begin()->second; Value *BasePtr = GetPointerOperand(MemI); assert(BasePtr && "No pointer operand"); @@ -271,6 +282,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!isSafeToExpand(BasePtrStartSCEV, *SE)) continue; + DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); + PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, MemI->hasName() ? MemI->getName() + ".phi" : "", Header->getFirstNonPHI()); diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 0965cb3..6df89fe 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -66,7 +66,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ unsigned OrigLen = Name.size() - PrefixLen; Name += Suffix; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); // If the target flags on the operand changes the name of the symbol, do that diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index ed88803..f313b0a 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -21,7 +21,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" #include <cstdlib> @@ -83,6 +82,8 @@ void PPCSubtarget::initializeEnvironment() { HasFPCVT = false; HasISEL = false; HasPOPCNTD = false; + HasBPERMD = false; + HasExtDiv = false; HasCMPB = false; HasLDBRX = false; IsBookE = false; @@ -96,6 +97,7 @@ void PPCSubtarget::initializeEnvironment() { HasICBT = false; HasInvariantFunctionDescriptors = false; HasPartwordAtomics = false; + HasDirectMove = false; IsQPXStackUnaligned = false; HasHTM = false; } @@ -110,11 +112,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { else CPUName = "generic"; } -#if (defined(__APPLE__) || defined(__linux__)) && \ - (defined(__ppc__) || defined(__powerpc__)) - if (CPUName == "generic") - CPUName = sys::getHostCPUName(); -#endif // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index b4c1bb1..8d95508 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -101,6 +101,8 @@ protected: bool HasFPCVT; bool HasISEL; bool HasPOPCNTD; + bool HasBPERMD; + bool HasExtDiv; bool HasCMPB; bool HasLDBRX; bool IsBookE; @@ -115,6 +117,7 @@ protected: bool HasICBT; bool HasInvariantFunctionDescriptors; bool HasPartwordAtomics; + bool HasDirectMove; bool HasHTM; /// When targeting QPX running a stock PPC64 Linux kernel where the stack @@ -225,6 +228,8 @@ public: bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } + bool hasBPERMD() const { return HasBPERMD; } + bool hasExtDiv() const { return HasExtDiv; } bool hasCMPB() const { return HasCMPB; } bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } @@ -239,6 +244,7 @@ public: return HasInvariantFunctionDescriptors; } bool hasPartwordAtomics() const { return HasPartwordAtomics; } + bool hasDirectMove() const { return HasDirectMove; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } unsigned getPlatformStackAlignment() const { diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h index 6493713..8aaf5e1 100644 --- a/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/lib/Target/PowerPC/PPCTargetStreamer.h @@ -16,7 +16,7 @@ namespace llvm { class PPCTargetStreamer : public MCTargetStreamer { public: PPCTargetStreamer(MCStreamer &S); - virtual ~PPCTargetStreamer(); + ~PPCTargetStreamer() override; virtual void emitTCEntry(const MCSymbol &S) = 0; virtual void emitMachine(StringRef CPU) = 0; virtual void emitAbiVersion(int AbiVersion) = 0; diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index dfe988f..01233ae 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -622,6 +622,25 @@ void foo() { __asm__("" ::: "cr2"); } +//===-------------------------------------------------------------------------=== +Naming convention for instruction formats is very haphazard. +We have agreed on a naming scheme as follows: + +<INST_form>{_<OP_type><OP_len>}+ + +Where: +INST_form is the instruction format (X-form, etc.) +OP_type is the operand type - one of OPC (opcode), RD (register destination), + RS (register source), + RDp (destination register pair), + RSp (source register pair), IM (immediate), + XO (extended opcode) +OP_len is the length of the operand in bits + +VSX register operands would be of length 6 (split across two fields), +condition register fields of length 3. +We would not need denote reserved fields in names of instruction formats. + //===----------------------------------------------------------------------===// Instruction fusion was introduced in ISA 2.06 and more opportunities added in diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt index 43d87d3..1d5b092 100644 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -277,7 +277,7 @@ This will generate the following instruction sequence: This will almost certainly cause a load-hit-store hazard. Since val is a value parameter, it should not need to be saved onto the stack, unless it's being done set up the vector register. Instead, -it would be better to splat teh value into a vector register, and then +it would be better to splat the value into a vector register, and then remove the (dead) stores to the stack. //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index e5d5ce2..2eb805e 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -133,6 +133,20 @@ class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature< !cast<string>(Value), "The size of local memory in bytes">; +def FeatureGCN : SubtargetFeature<"gcn", + "IsGCN", + "true", + "GCN or newer GPU">; + +def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding", + "GCN1Encoding", + "true", + "Encoding format for SI and CI">; + +def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", + "GCN3Encoding", + "true", + "Encoding format for VI">; class SubtargetFeatureGeneration <string Value, list<SubtargetFeature> Implies> : SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value, @@ -158,15 +172,17 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS", def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768, - FeatureWavefrontSize64]>; + FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding]>; def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536, - FeatureWavefrontSize64, FeatureFlatAddressSpace]>; + FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, + FeatureGCN1Encoding]>; def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536, - FeatureWavefrontSize64, FeatureFlatAddressSpace]>; + FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, + FeatureGCN3Encoding]>; //===----------------------------------------------------------------------===// @@ -197,8 +213,10 @@ def NullALU : InstrItinClass; class PredicateControl { Predicate SubtargetPredicate; + list<Predicate> AssemblerPredicates = []; list<Predicate> OtherPredicates = []; list<Predicate> Predicates = !listconcat([SubtargetPredicate], + AssemblerPredicates, OtherPredicates); } diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index d911014..b3480b4 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -17,6 +17,7 @@ // #include "AMDGPUAsmPrinter.h" +#include "InstPrinter/AMDGPUInstPrinter.h" #include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "AMDGPUSubtarget.h" @@ -574,3 +575,24 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header))); } + +bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) + return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); + case 'r': + break; + } + } + + AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O, + *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo()); + return false; +} diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h index 58ffb1e..1acff3a 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.h +++ b/lib/Target/R600/AMDGPUAsmPrinter.h @@ -99,6 +99,10 @@ public: void EmitEndOfAsmFile(Module &M) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) override; + protected: std::vector<std::string> DisasmLines, HexLines; size_t DisasmLineMaxLen; diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 7341cd9..def252a 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -345,7 +345,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned NOps = N->getNumOperands(); for (unsigned i = 0; i < NOps; i++) { // XXX: Why is this here? - if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { + if (isa<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 62a33fa..7c5235d 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -126,6 +126,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FRINT, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Custom); setOperationAction(ISD::FROUND, MVT::f64, Custom); @@ -1685,14 +1687,8 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, const unsigned bitPos = halfBitWidth - i - 1; SDValue POS = DAG.getConstant(bitPos, HalfVT); // Get value of high bit - // TODO: Remove the BFE part when the optimization is fixed - SDValue HBit; - if (halfBitWidth == 32 && Subtarget->hasBFE()) { - HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one); - } else { - HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); - HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one); - } + SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); + HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one); HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit); // Shift diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 4d08201..eeb7f3f 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -358,7 +358,7 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>; def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }]>; @@ -389,7 +389,7 @@ def flat_store : PatFrag<(ops node:$val, node:$ptr), def mskor_flat : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]>; class global_binary_atomic_op<SDNode atomic_op> : PatFrag< diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index f047ed0..7e274a9 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -124,7 +124,8 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *MF->getSubtarget().getInstrInfo(), *MF->getSubtarget().getRegisterInfo()); - InstPrinter.printInst(&TmpInst, DisasmStream, StringRef()); + InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(), + MF->getSubtarget()); // Disassemble instruction/operands to hex representation. SmallVector<MCFixup, 4> Fixups; diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp index 175dcd8..6d5f94e 100644 --- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp @@ -366,8 +366,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { Function *F = Call->getCalledFunction(); FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, F->isVarArg()); - Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType, - F->getAttributes()); + Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(), + NewType, F->getAttributes()); Function *NewF = cast<Function>(C); Call->setCalledFunction(NewF); continue; diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 0ead652..259224a 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -71,6 +71,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS, EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), + IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16, // Maximum stack alignment (long16) 0), diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 403a3e4..aeb0817 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -71,6 +71,9 @@ private: int LocalMemorySize; bool EnableVGPRSpilling; bool SGPRInitBug; + bool IsGCN; + bool GCN1Encoding; + bool GCN3Encoding; AMDGPUFrameLowering FrameLowering; std::unique_ptr<AMDGPUTargetLowering> TLInfo; diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index ee6551b..c9b25a1 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -623,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); ++It) { MachineInstr *instr = &(*It); - if (!instr->getDebugLoc().isUnknown()) + if (instr->getDebugLoc()) DL = instr->getDebugLoc(); } return DL; diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp index 49f0f23..aaf9b32 100644 --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" @@ -27,76 +29,105 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" using namespace llvm; namespace { -class AMDGPUAsmParser : public MCTargetAsmParser { - MCSubtargetInfo &STI; - MCAsmParser &Parser; - - - /// @name Auto-generated Match Functions - /// { - -#define GET_ASSEMBLER_HEADER -#include "AMDGPUGenAsmMatcher.inc" - - /// } - -public: - AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser, - const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(STI), Parser(Parser) { - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); - } - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, MCStreamer &Out, - uint64_t &ErrorInfo, - bool MatchingInlineAsm) override; - bool ParseDirective(AsmToken DirectiveID) override; - OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, OperandVector &Operands) override; - - bool parseCnt(int64_t &IntVal); - OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); -}; +struct OptionalOperand; class AMDGPUOperand : public MCParsedAsmOperand { enum KindTy { Token, - Immediate + Immediate, + Register, + Expression } Kind; + SMLoc StartLoc, EndLoc; + public: AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {} + MCContext *Ctx; + + enum ImmTy { + ImmTyNone, + ImmTyDSOffset0, + ImmTyDSOffset1, + ImmTyGDS, + ImmTyOffset, + ImmTyGLC, + ImmTySLC, + ImmTyTFE, + ImmTyClamp, + ImmTyOMod + }; + struct TokOp { const char *Data; unsigned Length; }; struct ImmOp { + bool IsFPImm; + ImmTy Type; int64_t Val; }; + struct RegOp { + unsigned RegNo; + int Modifiers; + const MCRegisterInfo *TRI; + }; + union { TokOp Tok; ImmOp Imm; + RegOp Reg; + const MCExpr *Expr; }; void addImmOperands(MCInst &Inst, unsigned N) const { Inst.addOperand(MCOperand::CreateImm(getImm())); } - void addRegOperands(MCInst &Inst, unsigned N) const { - llvm_unreachable("addRegOperands"); - } + StringRef getToken() const { return StringRef(Tok.Data, Tok.Length); } + + void addRegOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + void addRegOrImmOperands(MCInst &Inst, unsigned N) const { + if (isReg()) + addRegOperands(Inst, N); + else + addImmOperands(Inst, N); + } + + void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateImm(Reg.Modifiers)); + addRegOperands(Inst, N); + } + + void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { + if (isImm()) + addImmOperands(Inst, N); + else { + assert(isExpr()); + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + } + + bool defaultTokenHasSuffix() const { + StringRef Token(Tok.Data, Tok.Length); + + return Token.endswith("_e32") || Token.endswith("_e64"); + } + bool isToken() const override { return Kind == Token; } @@ -105,52 +136,369 @@ public: return Kind == Immediate; } + bool isInlineImm() const { + float F = BitsToFloat(Imm.Val); + // TODO: Add 0.5pi for VI + return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) || + (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 || + F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0)); + } + + bool isDSOffset0() const { + assert(isImm()); + return Imm.Type == ImmTyDSOffset0; + } + + bool isDSOffset1() const { + assert(isImm()); + return Imm.Type == ImmTyDSOffset1; + } + int64_t getImm() const { return Imm.Val; } + enum ImmTy getImmTy() const { + assert(isImm()); + return Imm.Type; + } + bool isReg() const override { - return false; + return Kind == Register && Reg.Modifiers == -1; + } + + bool isRegWithInputMods() const { + return Kind == Register && Reg.Modifiers != -1; + } + + void setModifiers(unsigned Mods) { + assert(isReg()); + Reg.Modifiers = Mods; } unsigned getReg() const override { - return 0; + return Reg.RegNo; + } + + bool isRegOrImm() const { + return isReg() || isImm(); + } + + bool isRegClass(unsigned RCID) const { + return Reg.TRI->getRegClass(RCID).contains(getReg()); + } + + bool isSCSrc32() const { + return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID)); + } + + bool isSSrc32() const { + return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID)); + } + + bool isSSrc64() const { + return isImm() || isInlineImm() || + (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)); + } + + bool isVCSrc32() const { + return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); + } + + bool isVCSrc64() const { + return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID)); + } + + bool isVSrc32() const { + return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); + } + + bool isVSrc64() const { + return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID)); } bool isMem() const override { return false; } + bool isExpr() const { + return Kind == Expression; + } + + bool isSoppBrTarget() const { + return isExpr() || isImm(); + } + SMLoc getStartLoc() const override { - return SMLoc(); + return StartLoc; } SMLoc getEndLoc() const override { - return SMLoc(); + return EndLoc; } void print(raw_ostream &OS) const override { } - static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) { + static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc, + enum ImmTy Type = ImmTyNone, + bool IsFPImm = false) { auto Op = llvm::make_unique<AMDGPUOperand>(Immediate); Op->Imm.Val = Val; + Op->Imm.IsFPImm = IsFPImm; + Op->Imm.Type = Type; + Op->StartLoc = Loc; + Op->EndLoc = Loc; return Op; } - static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) { + static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc, + bool HasExplicitEncodingSize = true) { auto Res = llvm::make_unique<AMDGPUOperand>(Token); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); + Res->StartLoc = Loc; + Res->EndLoc = Loc; return Res; } + static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S, + SMLoc E, + const MCRegisterInfo *TRI) { + auto Op = llvm::make_unique<AMDGPUOperand>(Register); + Op->Reg.RegNo = RegNo; + Op->Reg.TRI = TRI; + Op->Reg.Modifiers = -1; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) { + auto Op = llvm::make_unique<AMDGPUOperand>(Expression); + Op->Expr = Expr; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + bool isDSOffset() const; + bool isDSOffset01() const; bool isSWaitCnt() const; + bool isMubufOffset() const; }; +class AMDGPUAsmParser : public MCTargetAsmParser { + MCSubtargetInfo &STI; + const MCInstrInfo &MII; + MCAsmParser &Parser; + + unsigned ForcedEncodingSize; + /// @name Auto-generated Match Functions + /// { + +#define GET_ASSEMBLER_HEADER +#include "AMDGPUGenAsmMatcher.inc" + + /// } + +public: + AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser, + const MCInstrInfo &MII, + const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser), + ForcedEncodingSize(0){ + + if (!STI.getFeatureBits()) { + // Set default features. + STI.ToggleFeature("SOUTHERN_ISLANDS"); + } + + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + + unsigned getForcedEncodingSize() const { + return ForcedEncodingSize; + } + + void setForcedEncodingSize(unsigned Size) { + ForcedEncodingSize = Size; + } + + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + unsigned checkTargetMatchPredicate(MCInst &Inst) override; + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + bool ParseDirective(AsmToken DirectiveID) override; + OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + + OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int, + int64_t Default = 0); + OperandMatchResultTy parseIntWithPrefix(const char *Prefix, + OperandVector &Operands, + enum AMDGPUOperand::ImmTy ImmTy = + AMDGPUOperand::ImmTyNone); + OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands, + enum AMDGPUOperand::ImmTy ImmTy = + AMDGPUOperand::ImmTyNone); + OperandMatchResultTy parseOptionalOps( + const ArrayRef<OptionalOperand> &OptionalOps, + OperandVector &Operands); + + + void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); + void cvtDS(MCInst &Inst, const OperandVector &Operands); + OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands); + OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands); + OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands); + + bool parseCnt(int64_t &IntVal); + OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); + OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); + + void cvtMubuf(MCInst &Inst, const OperandVector &Operands); + OperandMatchResultTy parseOffset(OperandVector &Operands); + OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands); + OperandMatchResultTy parseGLC(OperandVector &Operands); + OperandMatchResultTy parseSLC(OperandVector &Operands); + OperandMatchResultTy parseTFE(OperandVector &Operands); + + OperandMatchResultTy parseDMask(OperandVector &Operands); + OperandMatchResultTy parseUNorm(OperandVector &Operands); + OperandMatchResultTy parseR128(OperandVector &Operands); + + void cvtVOP3(MCInst &Inst, const OperandVector &Operands); + OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands); +}; + +struct OptionalOperand { + const char *Name; + AMDGPUOperand::ImmTy Type; + bool IsBit; + int64_t Default; + bool (*ConvertResult)(int64_t&); +}; + +} + +static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) { + if (IsVgpr) { + switch (RegWidth) { + default: llvm_unreachable("Unknown register width"); + case 1: return AMDGPU::VGPR_32RegClassID; + case 2: return AMDGPU::VReg_64RegClassID; + case 3: return AMDGPU::VReg_96RegClassID; + case 4: return AMDGPU::VReg_128RegClassID; + case 8: return AMDGPU::VReg_256RegClassID; + case 16: return AMDGPU::VReg_512RegClassID; + } + } + + switch (RegWidth) { + default: llvm_unreachable("Unknown register width"); + case 1: return AMDGPU::SGPR_32RegClassID; + case 2: return AMDGPU::SGPR_64RegClassID; + case 4: return AMDGPU::SReg_128RegClassID; + case 8: return AMDGPU::SReg_256RegClassID; + case 16: return AMDGPU::SReg_512RegClassID; + } +} + +static unsigned getRegForName(const StringRef &RegName) { + + return StringSwitch<unsigned>(RegName) + .Case("exec", AMDGPU::EXEC) + .Case("vcc", AMDGPU::VCC) + .Case("flat_scr", AMDGPU::FLAT_SCR) + .Case("m0", AMDGPU::M0) + .Case("scc", AMDGPU::SCC) + .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO) + .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI) + .Case("vcc_lo", AMDGPU::VCC_LO) + .Case("vcc_hi", AMDGPU::VCC_HI) + .Case("exec_lo", AMDGPU::EXEC_LO) + .Case("exec_hi", AMDGPU::EXEC_HI) + .Default(0); } bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { - return true; + const AsmToken Tok = Parser.getTok(); + StartLoc = Tok.getLoc(); + EndLoc = Tok.getEndLoc(); + const StringRef &RegName = Tok.getString(); + RegNo = getRegForName(RegName); + + if (RegNo) { + Parser.Lex(); + return false; + } + + // Match vgprs and sgprs + if (RegName[0] != 's' && RegName[0] != 'v') + return true; + + bool IsVgpr = RegName[0] == 'v'; + unsigned RegWidth; + unsigned RegIndexInClass; + if (RegName.size() > 1) { + // We have a 32-bit register + RegWidth = 1; + if (RegName.substr(1).getAsInteger(10, RegIndexInClass)) + return true; + Parser.Lex(); + } else { + // We have a register greater than 32-bits. + + int64_t RegLo, RegHi; + Parser.Lex(); + if (getLexer().isNot(AsmToken::LBrac)) + return true; + + Parser.Lex(); + if (getParser().parseAbsoluteExpression(RegLo)) + return true; + + if (getLexer().isNot(AsmToken::Colon)) + return true; + + Parser.Lex(); + if (getParser().parseAbsoluteExpression(RegHi)) + return true; + + if (getLexer().isNot(AsmToken::RBrac)) + return true; + + Parser.Lex(); + RegWidth = (RegHi - RegLo) + 1; + if (IsVgpr) { + // VGPR registers aren't aligned. + RegIndexInClass = RegLo; + } else { + // SGPR registers are aligned. Max alignment is 4 dwords. + RegIndexInClass = RegLo / std::min(RegWidth, 4u); + } + } + + const MCRegisterInfo *TRC = getContext().getRegisterInfo(); + unsigned RC = getRegClass(IsVgpr, RegWidth); + if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs()) + return true; + RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass); + return false; +} + +unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { + + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + + if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || + (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3))) + return Match_InvalidOperand; + + return Match_Success; } @@ -162,22 +510,30 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst Inst; switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) { - case Match_Success: - Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, STI); - return false; - case Match_MissingFeature: - return Error(IDLoc, "instruction use requires an option to be enabled"); - case Match_MnemonicFail: - return Error(IDLoc, "unrecognized instruction mnemonic"); - case Match_InvalidOperand: { - if (ErrorInfo != ~0ULL) { - if (ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); + default: break; + case Match_Success: + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst, STI); + return false; + case Match_MissingFeature: + return Error(IDLoc, "missing feature"); + + case Match_MnemonicFail: + return Error(IDLoc, "unrecognized instruction mnemonic"); + + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) { + return Error(IDLoc, "too few operands for instruction"); + } + ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Error(ErrorLoc, "invalid operand for instruction"); } - return Error(IDLoc, "invalid operand for instruction"); - } } llvm_unreachable("Implement any new match types added!"); } @@ -186,6 +542,19 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { return true; } +static bool operandsHaveModifiers(const OperandVector &Operands) { + + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]); + if (Op.isRegWithInputMods()) + return true; + if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod || + Op.getImmTy() == AMDGPUOperand::ImmTyClamp)) + return true; + } + return false; +} + AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { @@ -194,17 +563,104 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { // If we successfully parsed the operand or if there as an error parsing, // we are done. - if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) + // + // If we are parsing after we reach EndOfStatement then this means we + // are appending default values to the Operands list. This is only done + // by custom parser, so we shouldn't continue on to the generic parsing. + if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || + getLexer().is(AsmToken::EndOfStatement)) return ResTy; + bool Negate = false, Abs = false; + if (getLexer().getKind()== AsmToken::Minus) { + Parser.Lex(); + Negate = true; + } + + if (getLexer().getKind() == AsmToken::Pipe) { + Parser.Lex(); + Abs = true; + } + switch(getLexer().getKind()) { case AsmToken::Integer: { + SMLoc S = Parser.getTok().getLoc(); int64_t IntVal; if (getParser().parseAbsoluteExpression(IntVal)) return MatchOperand_ParseFail; - Operands.push_back(AMDGPUOperand::CreateImm(IntVal)); + APInt IntVal32(32, IntVal); + if (IntVal32.getSExtValue() != IntVal) { + Error(S, "invalid immediate: only 32-bit values are legal"); + return MatchOperand_ParseFail; + } + + IntVal = IntVal32.getSExtValue(); + if (Negate) + IntVal *= -1; + Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S)); return MatchOperand_Success; } + case AsmToken::Real: { + // FIXME: We should emit an error if a double precisions floating-point + // value is used. I'm not sure the best way to detect this. + SMLoc S = Parser.getTok().getLoc(); + int64_t IntVal; + if (getParser().parseAbsoluteExpression(IntVal)) + return MatchOperand_ParseFail; + + APFloat F((float)BitsToDouble(IntVal)); + if (Negate) + F.changeSign(); + Operands.push_back( + AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S)); + return MatchOperand_Success; + } + case AsmToken::Identifier: { + SMLoc S, E; + unsigned RegNo; + if (!ParseRegister(RegNo, S, E)) { + + bool HasModifiers = operandsHaveModifiers(Operands); + unsigned Modifiers = 0; + + if (Negate) + Modifiers |= 0x1; + + if (Abs) { + if (getLexer().getKind() != AsmToken::Pipe) + return MatchOperand_ParseFail; + Parser.Lex(); + Modifiers |= 0x2; + } + + if (Modifiers && !HasModifiers) { + // We are adding a modifier to src1 or src2 and previous sources + // don't have modifiers, so we need to go back and empty modifers + // for each previous source. + for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1; + --PrevRegIdx) { + + AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]); + RegOp.setModifiers(0); + } + } + + + Operands.push_back(AMDGPUOperand::CreateReg( + RegNo, S, E, getContext().getRegisterInfo())); + + if (HasModifiers || Modifiers) { + AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]); + RegOp.setModifiers(Modifiers); + + } + } else { + Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(), + S)); + Parser.Lex(); + } + return MatchOperand_Success; + } default: return MatchOperand_NoMatch; } @@ -213,22 +669,282 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { + + // Clear any forced encodings from the previous instruction. + setForcedEncodingSize(0); + + if (Name.endswith("_e64")) + setForcedEncodingSize(64); + else if (Name.endswith("_e32")) + setForcedEncodingSize(32); + // Add the instruction mnemonic Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc)); - if (getLexer().is(AsmToken::EndOfStatement)) - return false; + while (!getLexer().is(AsmToken::EndOfStatement)) { + AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name); + + // Eat the comma or space if there is one. + if (getLexer().is(AsmToken::Comma)) + Parser.Lex(); - AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name); - switch (Res) { - case MatchOperand_Success: return false; - case MatchOperand_ParseFail: return Error(NameLoc, - "Failed parsing operand"); - case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand"); + switch (Res) { + case MatchOperand_Success: break; + case MatchOperand_ParseFail: return Error(getLexer().getLoc(), + "failed parsing operand."); + case MatchOperand_NoMatch: return Error(getLexer().getLoc(), + "not a valid operand."); + } } - return true; + + // Once we reach end of statement, continue parsing so we can add default + // values for optional arguments. + AMDGPUAsmParser::OperandMatchResultTy Res; + while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) { + if (Res != MatchOperand_Success) + return Error(getLexer().getLoc(), "failed parsing operand."); + } + return false; +} + +//===----------------------------------------------------------------------===// +// Utility functions +//===----------------------------------------------------------------------===// + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int, + int64_t Default) { + + // We are at the end of the statement, and this is a default argument, so + // use a default value. + if (getLexer().is(AsmToken::EndOfStatement)) { + Int = Default; + return MatchOperand_Success; + } + + switch(getLexer().getKind()) { + default: return MatchOperand_NoMatch; + case AsmToken::Identifier: { + StringRef OffsetName = Parser.getTok().getString(); + if (!OffsetName.equals(Prefix)) + return MatchOperand_NoMatch; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Colon)) + return MatchOperand_ParseFail; + + Parser.Lex(); + if (getLexer().isNot(AsmToken::Integer)) + return MatchOperand_ParseFail; + + if (getParser().parseAbsoluteExpression(Int)) + return MatchOperand_ParseFail; + break; + } + } + return MatchOperand_Success; +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, + enum AMDGPUOperand::ImmTy ImmTy) { + + SMLoc S = Parser.getTok().getLoc(); + int64_t Offset = 0; + + AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset); + if (Res != MatchOperand_Success) + return Res; + + Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy)); + return MatchOperand_Success; +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, + enum AMDGPUOperand::ImmTy ImmTy) { + int64_t Bit = 0; + SMLoc S = Parser.getTok().getLoc(); + + // We are at the end of the statement, and this is a default argument, so + // use a default value. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + switch(getLexer().getKind()) { + case AsmToken::Identifier: { + StringRef Tok = Parser.getTok().getString(); + if (Tok == Name) { + Bit = 1; + Parser.Lex(); + } else if (Tok.startswith("no") && Tok.endswith(Name)) { + Bit = 0; + Parser.Lex(); + } else { + return MatchOperand_NoMatch; + } + break; + } + default: + return MatchOperand_NoMatch; + } + } + + Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy)); + return MatchOperand_Success; +} + +static bool operandsHasOptionalOp(const OperandVector &Operands, + const OptionalOperand &OOp) { + for (unsigned i = 0; i < Operands.size(); i++) { + const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]); + if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) || + (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name)) + return true; + + } + return false; +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps, + OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + for (const OptionalOperand &Op : OptionalOps) { + if (operandsHasOptionalOp(Operands, Op)) + continue; + AMDGPUAsmParser::OperandMatchResultTy Res; + int64_t Value; + if (Op.IsBit) { + Res = parseNamedBit(Op.Name, Operands, Op.Type); + if (Res == MatchOperand_NoMatch) + continue; + return Res; + } + + Res = parseIntWithPrefix(Op.Name, Value, Op.Default); + + if (Res == MatchOperand_NoMatch) + continue; + + if (Res != MatchOperand_Success) + return Res; + + if (Op.ConvertResult && !Op.ConvertResult(Value)) { + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type)); + return MatchOperand_Success; + } + return MatchOperand_NoMatch; +} + +//===----------------------------------------------------------------------===// +// ds +//===----------------------------------------------------------------------===// + +static const OptionalOperand DSOptionalOps [] = { + {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr}, + {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr} +}; + +static const OptionalOperand DSOptionalOpsOff01 [] = { + {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr}, + {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr}, + {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr} +}; + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) { + return parseOptionalOps(DSOptionalOps, Operands); +} +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) { + return parseOptionalOps(DSOptionalOpsOff01, Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + AMDGPUAsmParser::OperandMatchResultTy Res = + parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); + if (Res == MatchOperand_NoMatch) { + Operands.push_back(AMDGPUOperand::CreateImm(0, S, + AMDGPUOperand::ImmTyOffset)); + Res = MatchOperand_Success; + } + return Res; +} + +bool AMDGPUOperand::isDSOffset() const { + return isImm() && isUInt<16>(getImm()); +} + +bool AMDGPUOperand::isDSOffset01() const { + return isImm() && isUInt<8>(getImm()); +} + +void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, + const OperandVector &Operands) { + + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0]; + unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1]; + unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS]; + + ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0 + ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1 + ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds + Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0 } +void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) { + + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx; + bool GDSOnly = false; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + if (Op.isToken() && Op.getToken() == "gds") { + GDSOnly = true; + continue; + } + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset]; + ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset + + if (!GDSOnly) { + unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS]; + ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds + } + Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0 +} + + //===----------------------------------------------------------------------===// // s_waitcnt //===----------------------------------------------------------------------===// @@ -283,6 +999,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { // expcnt [6:4] // lgkmcnt [10:8] int64_t CntVal = 0x77f; + SMLoc S = Parser.getTok().getLoc(); switch(getLexer().getKind()) { default: return MatchOperand_ParseFail; @@ -299,7 +1016,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { } while(getLexer().isNot(AsmToken::EndOfStatement)); break; } - Operands.push_back(AMDGPUOperand::CreateImm(CntVal)); + Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S)); return MatchOperand_Success; } @@ -307,6 +1024,245 @@ bool AMDGPUOperand::isSWaitCnt() const { return isImm(); } +//===----------------------------------------------------------------------===// +// sopp branch targets +//===----------------------------------------------------------------------===// + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + + switch (getLexer().getKind()) { + default: return MatchOperand_ParseFail; + case AsmToken::Integer: { + int64_t Imm; + if (getParser().parseAbsoluteExpression(Imm)) + return MatchOperand_ParseFail; + Operands.push_back(AMDGPUOperand::CreateImm(Imm, S)); + return MatchOperand_Success; + } + + case AsmToken::Identifier: + Operands.push_back(AMDGPUOperand::CreateExpr( + MCSymbolRefExpr::Create(getContext().GetOrCreateSymbol( + Parser.getTok().getString()), getContext()), S)); + Parser.Lex(); + return MatchOperand_Success; + } +} + +//===----------------------------------------------------------------------===// +// mubuf +//===----------------------------------------------------------------------===// + +static const OptionalOperand MubufOptionalOps [] = { + {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr}, + {"glc", AMDGPUOperand::ImmTyGLC, true, 0, nullptr}, + {"slc", AMDGPUOperand::ImmTySLC, true, 0, nullptr}, + {"tfe", AMDGPUOperand::ImmTyTFE, true, 0, nullptr} +}; + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) { + return parseOptionalOps(MubufOptionalOps, Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseOffset(OperandVector &Operands) { + return parseIntWithPrefix("offset", Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseGLC(OperandVector &Operands) { + return parseNamedBit("glc", Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseSLC(OperandVector &Operands) { + return parseNamedBit("slc", Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseTFE(OperandVector &Operands) { + return parseNamedBit("tfe", Operands); +} + +bool AMDGPUOperand::isMubufOffset() const { + return isImm() && isUInt<12>(getImm()); +} + +void AMDGPUAsmParser::cvtMubuf(MCInst &Inst, + const OperandVector &Operands) { + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle the case where soffset is an immediate + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { + Op.addImmOperands(Inst, 1); + continue; + } + + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + continue; + } + assert(Op.isImm()); + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + assert(OptionalIdx.size() == 4); + + unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset]; + unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC]; + unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC]; + unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE]; + + ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); + ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1); + ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1); + ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1); +} + +//===----------------------------------------------------------------------===// +// mimg +//===----------------------------------------------------------------------===// + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseDMask(OperandVector &Operands) { + return parseIntWithPrefix("dmask", Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseUNorm(OperandVector &Operands) { + return parseNamedBit("unorm", Operands); +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseR128(OperandVector &Operands) { + return parseNamedBit("r128", Operands); +} + +//===----------------------------------------------------------------------===// +// vop3 +//===----------------------------------------------------------------------===// + +static bool ConvertOmodMul(int64_t &Mul) { + if (Mul != 1 && Mul != 2 && Mul != 4) + return false; + + Mul >>= 1; + return true; +} + +static bool ConvertOmodDiv(int64_t &Div) { + if (Div == 1) { + Div = 0; + return true; + } + + if (Div == 2) { + Div = 3; + return true; + } + + return false; +} + +static const OptionalOperand VOP3OptionalOps [] = { + {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr}, + {"mul", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul}, + {"div", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv}, +}; + +static bool isVOP3(OperandVector &Operands) { + if (operandsHaveModifiers(Operands)) + return true; + + AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]); + + if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID)) + return true; + + if (Operands.size() >= 5) + return true; + + if (Operands.size() > 3) { + AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]); + if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) || + Src1Op.isRegClass(AMDGPU::SReg_64RegClassID))) + return true; + } + return false; +} + +AMDGPUAsmParser::OperandMatchResultTy +AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) { + + // The value returned by this function may change after parsing + // an operand so store the original value here. + bool HasModifiers = operandsHaveModifiers(Operands); + + bool IsVOP3 = isVOP3(Operands); + if (HasModifiers || IsVOP3 || + getLexer().isNot(AsmToken::EndOfStatement) || + getForcedEncodingSize() == 64) { + + AMDGPUAsmParser::OperandMatchResultTy Res = + parseOptionalOps(VOP3OptionalOps, Operands); + + if (!HasModifiers && Res == MatchOperand_Success) { + // We have added a modifier operation, so we need to make sure all + // previous register operands have modifiers + for (unsigned i = 2, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]); + if (Op.isReg()) + Op.setModifiers(0); + } + } + return Res; + } + return MatchOperand_NoMatch; +} + +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { + ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); + unsigned i = 2; + + std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx; + + if (operandsHaveModifiers(Operands)) { + for (unsigned e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + if (Op.isRegWithInputMods()) { + ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2); + continue; + } + OptionalIdx[Op.getImmTy()] = i; + } + + unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp]; + unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod]; + + ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1); + ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1); + } else { + for (unsigned e = Operands.size(); i != e; ++i) + ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1); + } +} + /// Force static initialization. extern "C" void LLVMInitializeR600AsmParser() { RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget); diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index d62fd3f..279c3eb 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -20,7 +20,7 @@ using namespace llvm; void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { OS.flush(); printInstruction(MI, OS); @@ -89,14 +89,18 @@ void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << " offset0:"; - printU8ImmDecOperand(MI, OpNo, O); + if (MI->getOperand(OpNo).getImm()) { + O << " offset0:"; + printU8ImmDecOperand(MI, OpNo, O); + } } void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << " offset1:"; - printU8ImmDecOperand(MI, OpNo, O); + if (MI->getOperand(OpNo).getImm()) { + O << " offset1:"; + printU8ImmDecOperand(MI, OpNo, O); + } } void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo, @@ -123,7 +127,8 @@ void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo, O << " tfe"; } -void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) { +void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O, + const MCRegisterInfo &MRI) { switch (reg) { case AMDGPU::VCC: O << "vcc"; @@ -293,7 +298,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, break; default: - printRegOperand(Op.getReg(), O); + printRegOperand(Op.getReg(), O, MRI); break; } } else if (Op.isImm()) { diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 5289718..14fb511 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -29,7 +29,10 @@ public: void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + static void printRegOperand(unsigned RegNo, raw_ostream &O, + const MCRegisterInfo &MRI); private: void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index d0c634f..f33e692 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -24,7 +24,7 @@ namespace { class AMDGPUMCObjectWriter : public MCObjectWriter { public: - AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { } + AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {} void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override { //XXX: Implement if necessary. @@ -131,7 +131,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { public: ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createAMDGPUELFObjectWriter(OS); } }; diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 5fb94d5..59f45ff 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -33,7 +33,7 @@ protected: AMDGPUELFObjectWriter::AMDGPUELFObjectWriter() : MCELFObjectTargetWriter(false, 0, 0, false) { } -MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) { +MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) { MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(); return createELFObjectWriter(MOTW, OS, true); } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp index fb2deef..7b280a4 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -64,12 +64,11 @@ static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T, +static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { return new AMDGPUInstPrinter(MAI, MII, MRI); } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h index 23f0196..9a7548e 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -28,6 +28,7 @@ class MCObjectWriter; class MCRegisterInfo; class MCSubtargetInfo; class Target; +class raw_pwrite_stream; class raw_ostream; extern Target TheAMDGPUTarget; @@ -44,7 +45,7 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS); +MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS); } // End llvm namespace #define GET_REGINFO_ENUM diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index 760aa37..24f2b6d 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -49,7 +49,7 @@ public: MCContext &ctx) : MCII(mcii), MRI(mri), Ctx(ctx) { } - ~SIMCCodeEmitter() { } + ~SIMCCodeEmitter() override {} /// \brief Encode the instruction and write it to the OS. void EncodeInstruction(const MCInst &MI, raw_ostream &OS, diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a34e2dc..b6b7067 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1811,7 +1811,7 @@ SDValue Swz[4], SelectionDAG &DAG) const { BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); for (unsigned i = 0; i < 4; i++) { - unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue(); + unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32); } @@ -1819,7 +1819,7 @@ SDValue Swz[4], SelectionDAG &DAG) const { SwizzleRemap.clear(); BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); for (unsigned i = 0; i < 4; i++) { - unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue(); + unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32); } diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 291fb04..7126c82 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -683,6 +683,11 @@ def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; // TODO: Do these actually match the regular fmin/fmax behavior? def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>; def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>; +// According to https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050%28v=vs.85%29.aspx +// DX10 min/max returns the other operand if one is NaN, +// this matches http://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic +def MAX_DX10 : R600_2OP_Helper <0x5, "MAX_DX10", fmaxnum>; +def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>; // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, // so some of the instruction names don't match the asm string. diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp index 419ec8b..2fc7b02 100644 --- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp +++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp @@ -162,7 +162,7 @@ class R600TextureIntrinsicsReplacer : Value *SamplerId = I.getArgOperand(2); unsigned TextureType = - dyn_cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); unsigned SrcSelect[4] = { 0, 1, 2, 3 }; unsigned CT[4] = {1, 1, 1, 1}; @@ -186,7 +186,7 @@ class R600TextureIntrinsicsReplacer : Value *SamplerId = I.getArgOperand(5); unsigned TextureType = - dyn_cast<ConstantInt>(I.getArgOperand(6))->getZExtValue(); + cast<ConstantInt>(I.getArgOperand(6))->getZExtValue(); unsigned SrcSelect[4] = { 0, 1, 2, 3 }; unsigned CT[4] = {1, 1, 1, 1}; diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index 79f6532..d39ab3f 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -83,7 +83,7 @@ class SIAnnotateControlFlow : public FunctionPass { void insertElse(BranchInst *Term); - Value *handleLoopCondition(Value *Cond, PHINode *Broken); + Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L); void handleLoop(BranchInst *Term); @@ -207,7 +207,8 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) { } /// \brief Recursively handle the condition leading to a loop -Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) { +Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken, + llvm::Loop *L) { if (PHINode *Phi = dyn_cast<PHINode>(Cond)) { BasicBlock *Parent = Phi->getParent(); PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front()); @@ -223,7 +224,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) } Phi->setIncomingValue(i, BoolFalse); - Value *PhiArg = handleLoopCondition(Incoming, Broken); + Value *PhiArg = handleLoopCondition(Incoming, Broken, L); NewPhi->addIncoming(PhiArg, From); } @@ -253,7 +254,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) { BasicBlock *Parent = Inst->getParent(); - TerminatorInst *Insert = Parent->getTerminator(); + Instruction *Insert; + if (L->contains(Inst)) { + Insert = Parent->getTerminator(); + } else { + Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime(); + } Value *Args[] = { Cond, Broken }; return CallInst::Create(IfBreak, Args, "", Insert); @@ -265,14 +271,15 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) /// \brief Handle a back edge (loop) void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { + BasicBlock *BB = Term->getParent(); + llvm::Loop *L = LI->getLoopFor(BB); BasicBlock *Target = Term->getSuccessor(1); PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front()); Value *Cond = Term->getCondition(); Term->setCondition(BoolTrue); - Value *Arg = handleLoopCondition(Cond, Broken); + Value *Arg = handleLoopCondition(Cond, Broken, L); - BasicBlock *BB = Term->getParent(); for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target); PI != PE; ++PI) { diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index bd0c3c2..43507d8 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -76,8 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setOperationAction(ISD::FSIN, MVT::f32, Custom); setOperationAction(ISD::FCOS, MVT::f32, Custom); - setOperationAction(ISD::FMINNUM, MVT::f32, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); @@ -2089,3 +2087,38 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()), cast<RegisterSDNode>(VReg)->getReg(), VT); } + +//===----------------------------------------------------------------------===// +// SI Inline Assembly Support +//===----------------------------------------------------------------------===// + +std::pair<unsigned, const TargetRegisterClass *> +SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, + MVT VT) const { + if (Constraint == "r") { + switch(VT.SimpleTy) { + default: llvm_unreachable("Unhandled type for 'r' inline asm constraint"); + case MVT::i64: + return std::make_pair(0U, &AMDGPU::SGPR_64RegClass); + case MVT::i32: + return std::make_pair(0U, &AMDGPU::SGPR_32RegClass); + } + } + + if (Constraint.size() > 1) { + const TargetRegisterClass *RC = nullptr; + if (Constraint[1] == 'v') { + RC = &AMDGPU::VGPR_32RegClass; + } else if (Constraint[1] == 's') { + RC = &AMDGPU::SGPR_32RegClass; + } + + if (RC) { + unsigned Idx = std::atoi(Constraint.substr(2).c_str()); + if (Idx < RC->getNumRegs()) + return std::make_pair(RC->getRegister(Idx), RC); + } + } + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 92f5847..a6bc7c6 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -113,6 +113,10 @@ public: MachineSDNode *buildScratchRSRC(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const; + + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const override; }; } // End namespace llvm diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 4167590..bc693c3 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -130,6 +130,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> : let AddedComplexity = -1000; let VOP3 = 1; + let VALU = 1; + + let AsmMatchConverter = "cvtVOP3"; + let isCodeGenOnly = 0; + int Size = 8; } @@ -181,6 +186,19 @@ class SOPKe <bits<5> op> : Enc32 { let Inst{31-28} = 0xb; //encoding } +class SOPK64e <bits<5> op> : Enc64 { + bits <7> sdst = 0; + bits <16> simm16; + bits <32> imm; + + let Inst{15-0} = simm16; + let Inst{22-16} = sdst; + let Inst{27-23} = op; + let Inst{31-28} = 0xb; + + let Inst{63-32} = imm; +} + class SOPPe <bits<7> op> : Enc32 { bits <16> simm16; @@ -208,6 +226,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> : let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let isCodeGenOnly = 0; let SALU = 1; let SOP1 = 1; } @@ -218,6 +237,7 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> : let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let isCodeGenOnly = 0; let SALU = 1; let SOP2 = 1; @@ -233,6 +253,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : let hasSideEffects = 0; let SALU = 1; let SOPC = 1; + let isCodeGenOnly = 0; let UseNamedOperandTable = 1; } @@ -550,10 +571,14 @@ let Uses = [EXEC] in { class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : VOP1Common <outs, ins, asm, pattern>, - VOP1e<op>; + VOP1e<op> { + let isCodeGenOnly = 0; +} class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> : - VOP2Common <outs, ins, asm, pattern>, VOP2e<op>; + VOP2Common <outs, ins, asm, pattern>, VOP2e<op> { + let isCodeGenOnly = 0; +} class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> : VOPCCommon <ins, asm, pattern>, VOPCe <op>; @@ -586,6 +611,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> : let mayStore = 1; let hasSideEffects = 0; + let AsmMatchConverter = "cvtDS"; let SchedRW = [WriteLDS]; } @@ -598,6 +624,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> : let hasSideEffects = 0; let UseNamedOperandTable = 1; + let AsmMatchConverter = "cvtMubuf"; let SchedRW = [WriteVMEM]; } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index d603ecb..076a0ce 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -6,6 +6,15 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +def isSICI : Predicate< + "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" +>, AssemblerPredicate<"FeatureGCN1Encoding">; +def isCI : Predicate<"Subtarget->getGeneration() " + ">= AMDGPUSubtarget::SEA_ISLANDS">; +def isVI : Predicate < + "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, + AssemblerPredicate<"FeatureGCN3Encoding">; class vop { field bits<9> SI3; @@ -233,14 +242,88 @@ def FRAMEri32 : Operand<iPTR> { let MIOperandInfo = (ops i32:$ptr, i32imm:$index); } +def SoppBrTarget : AsmOperandClass { + let Name = "SoppBrTarget"; + let ParserMethod = "parseSOppBrTarget"; +} + def sopp_brtarget : Operand<OtherVT> { let EncoderMethod = "getSOPPBrEncoding"; let OperandType = "OPERAND_PCREL"; + let ParserMatchClass = SoppBrTarget; } include "SIInstrFormats.td" include "VIInstrFormats.td" +def MubufOffsetMatchClass : AsmOperandClass { + let Name = "MubufOffset"; + let ParserMethod = "parseMubufOptionalOps"; + let RenderMethod = "addImmOperands"; +} + +class DSOffsetBaseMatchClass <string parser> : AsmOperandClass { + let Name = "DSOffset"#parser; + let ParserMethod = parser; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isDSOffset"; +} + +def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">; +def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">; + +def DSOffset01MatchClass : AsmOperandClass { + let Name = "DSOffset1"; + let ParserMethod = "parseDSOff01OptionalOps"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isDSOffset01"; +} + +class GDSBaseMatchClass <string parser> : AsmOperandClass { + let Name = "GDS"#parser; + let PredicateMethod = "isImm"; + let ParserMethod = parser; + let RenderMethod = "addImmOperands"; +} + +def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">; +def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">; + +def GLCMatchClass : AsmOperandClass { + let Name = "GLC"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseMubufOptionalOps"; + let RenderMethod = "addImmOperands"; +} + +def SLCMatchClass : AsmOperandClass { + let Name = "SLC"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseMubufOptionalOps"; + let RenderMethod = "addImmOperands"; +} + +def TFEMatchClass : AsmOperandClass { + let Name = "TFE"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseMubufOptionalOps"; + let RenderMethod = "addImmOperands"; +} + +def OModMatchClass : AsmOperandClass { + let Name = "OMod"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseVOP3OptionalOps"; + let RenderMethod = "addImmOperands"; +} + +def ClampMatchClass : AsmOperandClass { + let Name = "Clamp"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseVOP3OptionalOps"; + let RenderMethod = "addImmOperands"; +} + let OperandType = "OPERAND_IMMEDIATE" in { def offen : Operand<i1> { @@ -254,35 +337,52 @@ def addr64 : Operand<i1> { } def mbuf_offset : Operand<i16> { let PrintMethod = "printMBUFOffset"; + let ParserMatchClass = MubufOffsetMatchClass; } -def ds_offset : Operand<i16> { +class ds_offset_base <AsmOperandClass mc> : Operand<i16> { let PrintMethod = "printDSOffset"; + let ParserMatchClass = mc; } +def ds_offset : ds_offset_base <DSOffsetMatchClass>; +def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>; + def ds_offset0 : Operand<i8> { let PrintMethod = "printDSOffset0"; + let ParserMatchClass = DSOffset01MatchClass; } def ds_offset1 : Operand<i8> { let PrintMethod = "printDSOffset1"; + let ParserMatchClass = DSOffset01MatchClass; } -def gds : Operand <i1> { +class gds_base <AsmOperandClass mc> : Operand <i1> { let PrintMethod = "printGDS"; + let ParserMatchClass = mc; } +def gds : gds_base <GDSMatchClass>; + +def gds01 : gds_base <GDS01MatchClass>; + def glc : Operand <i1> { let PrintMethod = "printGLC"; + let ParserMatchClass = GLCMatchClass; } def slc : Operand <i1> { let PrintMethod = "printSLC"; + let ParserMatchClass = SLCMatchClass; } def tfe : Operand <i1> { let PrintMethod = "printTFE"; + let ParserMatchClass = TFEMatchClass; } def omod : Operand <i32> { let PrintMethod = "printOModSI"; + let ParserMatchClass = OModMatchClass; } def ClampMod : Operand <i1> { let PrintMethod = "printClampSI"; + let ParserMatchClass = ClampMatchClass; } } // End OperandType = "OPERAND_IMMEDIATE" @@ -392,12 +492,18 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> : SOP1 <outs, ins, asm, []>, SOP1e <op.SI>, - SIMCInstr<opName, SISubtarget.SI>; + SIMCInstr<opName, SISubtarget.SI> { + let isCodeGenOnly = 0; + let AssemblerPredicates = [isSICI]; +} class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> : SOP1 <outs, ins, asm, []>, SOP1e <op.VI>, - SIMCInstr<opName, SISubtarget.VI>; + SIMCInstr<opName, SISubtarget.VI> { + let isCodeGenOnly = 0; + let AssemblerPredicates = [isVI]; +} multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm, list<dag> pattern> { @@ -473,12 +579,16 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> : class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> : SOP2<outs, ins, asm, []>, SOP2e<op.SI>, - SIMCInstr<opName, SISubtarget.SI>; + SIMCInstr<opName, SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; +} class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> : SOP2<outs, ins, asm, []>, SOP2e<op.VI>, - SIMCInstr<opName, SISubtarget.VI>; + SIMCInstr<opName, SISubtarget.VI> { + let AssemblerPredicates = [isVI]; +} multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> { def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst), @@ -540,12 +650,28 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> : SOPK <outs, ins, asm, []>, SOPKe <op.SI>, - SIMCInstr<opName, SISubtarget.SI>; + SIMCInstr<opName, SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; + let isCodeGenOnly = 0; +} class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> : SOPK <outs, ins, asm, []>, SOPKe <op.VI>, - SIMCInstr<opName, SISubtarget.VI>; + SIMCInstr<opName, SISubtarget.VI> { + let AssemblerPredicates = [isVI]; + let isCodeGenOnly = 0; +} + +multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm, + string asm = opName#opAsm> { + def "" : SOPK_Pseudo <opName, outs, ins, []>; + + def _si : SOPK_Real_si <op, opName, outs, ins, asm>; + + def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>; + +} multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> { def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0), @@ -562,13 +688,39 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> { def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst), (ins SReg_32:$src0, u16imm:$src1), pattern>; - def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst), - (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">; + let DisableEncoding = "$dst" in { + def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst), + (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">; - def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst), - (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">; + def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst), + (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">; + } } +multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m < + op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16), + " $sdst, $simm16" +>; + +multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins, + string argAsm, string asm = opName#argAsm> { + + def "" : SOPK_Pseudo <opName, outs, ins, []>; + + def _si : SOPK <outs, ins, asm, []>, + SOPK64e <op.SI>, + SIMCInstr<opName, SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; + let isCodeGenOnly = 0; + } + + def _vi : SOPK <outs, ins, asm, []>, + SOPK64e <op.VI>, + SIMCInstr<opName, SISubtarget.VI> { + let AssemblerPredicates = [isVI]; + let isCodeGenOnly = 0; + } +} //===----------------------------------------------------------------------===// // SMRD classes //===----------------------------------------------------------------------===// @@ -584,13 +736,17 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins, string asm> : SMRD <outs, ins, asm, []>, SMRDe <op, imm>, - SIMCInstr<opName, SISubtarget.SI>; + SIMCInstr<opName, SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; +} class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins, string asm> : SMRD <outs, ins, asm, []>, SMEMe_vi <op, imm>, - SIMCInstr<opName, SISubtarget.VI>; + SIMCInstr<opName, SISubtarget.VI> { + let AssemblerPredicates = [isVI]; +} multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins, string asm, list<dag> pattern> { @@ -629,8 +785,14 @@ multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass, def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> { let PrintMethod = "printOperandAndMods"; } + +def InputModsMatchClass : AsmOperandClass { + let Name = "RegWithInputMods"; +} + def InputModsNoDefault : Operand <i32> { let PrintMethod = "printOperandAndMods"; + let ParserMatchClass = InputModsMatchClass; } class getNumSrcArgs<ValueType Src1, ValueType Src2> { @@ -838,7 +1000,8 @@ class AtomicNoRet <string noRetOp, bit isRet> { class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOP1Common <outs, ins, "", pattern>, VOP <opName>, - SIMCInstr <opName#"_e32", SISubtarget.NONE> { + SIMCInstr <opName#"_e32", SISubtarget.NONE>, + MnemonicAlias<opName#"_e32", opName> { let isPseudo = 1; let isCodeGenOnly = 1; @@ -873,18 +1036,23 @@ multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern, class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOP2Common <outs, ins, "", pattern>, VOP <opName>, - SIMCInstr<opName#"_e32", SISubtarget.NONE> { + SIMCInstr<opName#"_e32", SISubtarget.NONE>, + MnemonicAlias<opName#"_e32", opName> { let isPseudo = 1; let isCodeGenOnly = 1; } class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> : VOP2 <op.SI, outs, ins, opName#asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.SI>; + SIMCInstr <opName#"_e32", SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; +} class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> : - VOP2 <op.SI, outs, ins, opName#asm, []>, - SIMCInstr <opName#"_e32", SISubtarget.VI>; + VOP2 <op.VI, outs, ins, opName#asm, []>, + SIMCInstr <opName#"_e32", SISubtarget.VI> { + let AssemblerPredicates = [isVI]; +} multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern, string opName, string revOp> { @@ -930,7 +1098,8 @@ class VOP3DisableModFields <bit HasSrc0Mods, class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOP3Common <outs, ins, "", pattern>, VOP <opName>, - SIMCInstr<opName#"_e64", SISubtarget.NONE> { + SIMCInstr<opName#"_e64", SISubtarget.NONE>, + MnemonicAlias<opName#"_e64", opName> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -938,22 +1107,30 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> : VOP3Common <outs, ins, asm, []>, VOP3e <op>, - SIMCInstr<opName#"_e64", SISubtarget.SI>; + SIMCInstr<opName#"_e64", SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; +} class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> : VOP3Common <outs, ins, asm, []>, VOP3e_vi <op>, - SIMCInstr <opName#"_e64", SISubtarget.VI>; + SIMCInstr <opName#"_e64", SISubtarget.VI> { + let AssemblerPredicates = [isVI]; +} class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> : VOP3Common <outs, ins, asm, []>, VOP3be <op>, - SIMCInstr<opName#"_e64", SISubtarget.SI>; + SIMCInstr<opName#"_e64", SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; +} class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> : VOP3Common <outs, ins, asm, []>, VOP3be_vi <op>, - SIMCInstr <opName#"_e64", SISubtarget.VI>; + SIMCInstr <opName#"_e64", SISubtarget.VI> { + let AssemblerPredicates = [isVI]; +} multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern, string opName, int NumSrcArgs, bit HasMods = 1> { @@ -1095,12 +1272,16 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins, } def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>, - SIMCInstr <opName, SISubtarget.SI>; + SIMCInstr <opName, SISubtarget.SI> { + let AssemblerPredicates = [isSICI]; + } def _vi : VOP3Common <outs, ins, asm, []>, VOP3e_vi <op.VI3>, VOP3DisableFields <1, 0, 0>, - SIMCInstr <opName, SISubtarget.VI>; + SIMCInstr <opName, SISubtarget.VI> { + let AssemblerPredicates = [isVI]; + } } multiclass VOP1_Helper <vop1 op, string opName, dag outs, @@ -1253,7 +1434,8 @@ let isCodeGenOnly = 0 in { class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOPCCommon <ins, "", pattern>, VOP <opName>, - SIMCInstr<opName#"_e32", SISubtarget.NONE> { + SIMCInstr<opName#"_e32", SISubtarget.NONE>, + MnemonicAlias<opName#"_e32", opName> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -1504,7 +1686,9 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> : DS <outs, ins, asm, []>, DSe <op>, - SIMCInstr <opName, SISubtarget.SI>; + SIMCInstr <opName, SISubtarget.SI> { + let isCodeGenOnly = 0; +} class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> : DS <outs, ins, asm, []>, @@ -1518,6 +1702,7 @@ class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm bits<16> offset; let offset0 = offset{7-0}; let offset1 = offset{15-8}; + let isCodeGenOnly = 0; } class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> : @@ -1545,12 +1730,12 @@ multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc, multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc, dag outs = (outs rc:$vdst), dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1, - gds:$gds, M0Reg:$m0), + gds01:$gds, M0Reg:$m0), string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> { def "" : DS_Pseudo <opName, outs, ins, []>; - let data0 = 0, data1 = 0 in { + let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in { def _si : DS_Real_si <op, opName, outs, ins, asm>; def _vi : DS_Real_vi <op, opName, outs, ins, asm>; } @@ -1574,12 +1759,12 @@ multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc, multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc, dag outs = (outs), dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1, - ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0), + ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds, M0Reg:$m0), string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> { def "" : DS_Pseudo <opName, outs, ins, []>; - let vdst = 0 in { + let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in { def _si : DS_Real_si <op, opName, outs, ins, asm>; def _vi : DS_Real_vi <op, opName, outs, ins, asm>; } @@ -1653,7 +1838,7 @@ multiclass DS_0A_RET <bits<8> op, string opName, multiclass DS_1A_RET_GDS <bits<8> op, string opName, dag outs = (outs VGPR_32:$vdst), - dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0), + dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset, M0Reg:$m0), string asm = opName#" $vdst, $addr"#"$offset gds"> { def "" : DS_Pseudo <opName, outs, ins, []>; @@ -1762,6 +1947,20 @@ class mubuf <bits<7> si, bits<7> vi = si> { field bits<7> VI = vi; } +let isCodeGenOnly = 0 in { + +class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + MUBUF <outs, ins, asm, pattern>, MUBUFe <op> { + let lds = 0; +} + +} // End let isCodeGenOnly = 0 + +class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> { + let lds = 0; +} + class MUBUFAddr64Table <bit is_addr64, string suffix = ""> { bit IsAddr64 = is_addr64; string OpName = NAME # suffix; @@ -1805,7 +2004,7 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm, def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, MUBUFAddr64Table <0>; - let addr64 = 0 in { + let addr64 = 0, isCodeGenOnly = 0 in { def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; } @@ -1818,7 +2017,7 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs, def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, MUBUFAddr64Table <1>; - let addr64 = 1 in { + let addr64 = 1, isCodeGenOnly = 0 in { def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; } @@ -1826,11 +2025,6 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs, // for VI appropriately. } -class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : - MUBUF <outs, ins, asm, pattern>, MUBUFe <op> { - let lds = 0; -} - multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins, string asm, list<dag> pattern, bit is_return> { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 95b2470..91e8c8c 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -27,18 +27,10 @@ def SendMsgImm : Operand<i32> { } def isGCN : Predicate<"Subtarget->getGeneration() " - ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">; + ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">, + AssemblerPredicate<"FeatureGCN">; def isSI : Predicate<"Subtarget->getGeneration() " "== AMDGPUSubtarget::SOUTHERN_ISLANDS">; -def isSICI : Predicate< - "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" - "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" ->; -def isCI : Predicate<"Subtarget->getGeneration() " - ">= AMDGPUSubtarget::SEA_ISLANDS">; -def isVI : Predicate < - "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS" ->; def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">; @@ -242,9 +234,9 @@ defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32", >; } // End Defs = [SCC] -defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>; let Uses = [SCC] in { + defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>; defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>; } // End Uses = [SCC] @@ -387,6 +379,7 @@ defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", >; */ +defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>; defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>; defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>; defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>; @@ -400,18 +393,27 @@ defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>; defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>; } // End isCompare = 1 -let isCommutable = 1 in { - let Defs = [SCC], isCommutable = 1 in { - defm S_ADDK_I32 : SOPK_32 <sopk<0x0f, 0x0e>, "s_addk_i32", []>; - } - defm S_MULK_I32 : SOPK_32 <sopk<0x10, 0x0f>, "s_mulk_i32", []>; +let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0", + Constraints = "$sdst = $src0" in { + defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>; + defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>; } -//defm S_CBRANCH_I_FORK : SOPK_ <sopk<0x11, 0x10>, "s_cbranch_i_fork", []>; +defm S_CBRANCH_I_FORK : SOPK_m < + sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs), + (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16" +>; defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>; -defm S_SETREG_B32 : SOPK_32 <sopk<0x13, 0x12>, "s_setreg_b32", []>; -defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>; -//defm S_SETREG_IMM32_B32 : SOPK_32 <sopk<0x15, 0x14>, "s_setreg_imm32_b32", []>; +defm S_SETREG_B32 : SOPK_m < + sopk<0x13, 0x12>, "s_setreg_b32", (outs), + (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16" +>; +// FIXME: Not on SI? +//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>; +defm S_SETREG_IMM32_B32 : SOPK_IMM32 < + sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs), + (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16" +>; //===----------------------------------------------------------------------===// // SOPP Instructions @@ -1630,7 +1632,6 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp >; - defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst" diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 7bb5dc2..f289014 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -66,7 +66,7 @@ foreach Index = 0-255 in { //===----------------------------------------------------------------------===// // SGPR 32-bit registers -def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, +def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add (sequence "SGPR%u", 0, 101))>; // SGPR 64-bit registers @@ -113,7 +113,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, (add (decimate (shl SGPR_32, 15), 4))]>; // VGPR 32-bit registers -def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, +def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add (sequence "VGPR%u", 0, 255))>; // VGPR 64-bit registers @@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, // Register classes used as source and destination //===----------------------------------------------------------------------===// +class RegImmMatcher<string name> : AsmOperandClass { + let Name = name; + let RenderMethod = "addRegOrImmOperands"; +} + // Special register classes for predicates and the M0 register def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> { let CopyCost = -1; // Theoretically it is possible to read from SCC, @@ -180,7 +185,7 @@ def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>; def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; // Register class for all scalar registers (SGPRs + Special Registers) -def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, +def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI) >; @@ -227,15 +232,21 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> { // SSrc_* Operands with an SGPR or a 32-bit immediate //===----------------------------------------------------------------------===// -def SSrc_32 : RegImmOperand<SReg_32>; +def SSrc_32 : RegImmOperand<SReg_32> { + let ParserMatchClass = RegImmMatcher<"SSrc32">; +} -def SSrc_64 : RegImmOperand<SReg_64>; +def SSrc_64 : RegImmOperand<SReg_64> { + let ParserMatchClass = RegImmMatcher<"SSrc64">; +} //===----------------------------------------------------------------------===// // SCSrc_* Operands with an SGPR or a inline constant //===----------------------------------------------------------------------===// -def SCSrc_32 : RegInlineOperand<SReg_32>; +def SCSrc_32 : RegInlineOperand<SReg_32> { + let ParserMatchClass = RegImmMatcher<"SCSrc32">; +} //===----------------------------------------------------------------------===// // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate @@ -245,14 +256,30 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>; def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; -def VSrc_32 : RegImmOperand<VS_32>; +def VSrc_32 : RegisterOperand<VS_32> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_IMM32"; + let ParserMatchClass = RegImmMatcher<"VSrc32">; +} -def VSrc_64 : RegImmOperand<VS_64>; +def VSrc_64 : RegisterOperand<VS_64> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_IMM32"; + let ParserMatchClass = RegImmMatcher<"VSrc64">; +} //===----------------------------------------------------------------------===// // VCSrc_* Operands with an SGPR, VGPR or an inline constant //===----------------------------------------------------------------------===// -def VCSrc_32 : RegInlineOperand<VS_32>; +def VCSrc_32 : RegisterOperand<VS_32> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_INLINE_C"; + let ParserMatchClass = RegImmMatcher<"VCSrc32">; +} -def VCSrc_64 : RegInlineOperand<VS_64>; +def VCSrc_64 : RegisterOperand<VS_64> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_INLINE_C"; + let ParserMatchClass = RegImmMatcher<"VCSrc64">; +} diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp index 27bbf4f..591ce85 100644 --- a/lib/Target/R600/SITypeRewriter.cpp +++ b/lib/Target/R600/SITypeRewriter.cpp @@ -104,7 +104,7 @@ void SITypeRewriter::visitCallInst(CallInst &I) { SmallVector <Type*, 8> Types; bool NeedToReplace = false; Function *F = I.getCalledFunction(); - std::string Name = F->getName().str(); + std::string Name = F->getName(); for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { Value *Arg = I.getArgOperand(i); if (Arg->getType() == v16i8) { diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp index 5975a51..b6eebb0 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp @@ -34,7 +34,7 @@ namespace Sparc { #define PRINT_ALIAS_INSTR #include "SparcGenAsmWriter.inc" -bool SparcInstPrinter::isV9() const { +bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const { return (STI.getFeatureBits() & Sparc::FeatureV9) != 0; } @@ -44,15 +44,15 @@ void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const } void SparcInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) -{ - if (!printAliasInstr(MI, O) && !printSparcAliasInstr(MI, O)) - printInstruction(MI, O); + StringRef Annot, const MCSubtargetInfo &STI) { + if (!printAliasInstr(MI, STI, O) && !printSparcAliasInstr(MI, STI, O)) + printInstruction(MI, STI, O); printAnnotation(O, Annot); } -bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O) -{ +bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, + const MCSubtargetInfo &STI, + raw_ostream &O) { switch (MI->getOpcode()) { default: return false; case SP::JMPLrr: @@ -72,16 +72,16 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O) case SP::O7: O << "\tretl"; return true; } } - O << "\tjmp "; printMemOperand(MI, 1, O); + O << "\tjmp "; printMemOperand(MI, 1, STI, O); return true; case SP::O7: // call $addr - O << "\tcall "; printMemOperand(MI, 1, O); + O << "\tcall "; printMemOperand(MI, 1, STI, O); return true; } } case SP::V9FCMPS: case SP::V9FCMPD: case SP::V9FCMPQ: case SP::V9FCMPES: case SP::V9FCMPED: case SP::V9FCMPEQ: { - if (isV9() + if (isV9(STI) || (MI->getNumOperands() != 3) || (!MI->getOperand(0).isReg()) || (MI->getOperand(0).getReg() != SP::FCC0)) @@ -96,17 +96,17 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O) case SP::V9FCMPED: O << "\tfcmped "; break; case SP::V9FCMPEQ: O << "\tfcmpeq "; break; } - printOperand(MI, 1, O); + printOperand(MI, 1, STI, O); O << ", "; - printOperand(MI, 2, O); + printOperand(MI, 2, STI, O); return true; } } } void SparcInstPrinter::printOperand(const MCInst *MI, int opNum, - raw_ostream &O) -{ + const MCSubtargetInfo &STI, + raw_ostream &O) { const MCOperand &MO = MI->getOperand (opNum); if (MO.isReg()) { @@ -124,14 +124,14 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum, } void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum, - raw_ostream &O, const char *Modifier) -{ - printOperand(MI, opNum, O); + const MCSubtargetInfo &STI, + raw_ostream &O, const char *Modifier) { + printOperand(MI, opNum, STI, O); // If this is an ADD operand, emit it like normal operands. if (Modifier && !strcmp(Modifier, "arith")) { O << ", "; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum+1, STI, O); return; } const MCOperand &MO = MI->getOperand(opNum+1); @@ -143,12 +143,12 @@ void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum, O << "+"; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum+1, STI, O); } void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum, - raw_ostream &O) -{ + const MCSubtargetInfo &STI, + raw_ostream &O) { int CC = (int)MI->getOperand(opNum).getImm(); switch (MI->getOpcode()) { default: break; @@ -171,8 +171,8 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum, } bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum, - raw_ostream &O) -{ + const MCSubtargetInfo &STI, + raw_ostream &O) { llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX."); return true; } diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h index c96d5ad..0b01b88 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h @@ -22,32 +22,36 @@ namespace llvm { class MCOperand; class SparcInstPrinter : public MCInstPrinter { - const MCSubtargetInfo &STI; public: - SparcInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &sti) - : MCInstPrinter(MAI, MII, MRI), STI(sti) {} + SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; - bool printSparcAliasInstr(const MCInst *MI, raw_ostream &OS); - bool isV9() const; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + bool printSparcAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &OS); + bool isV9(const MCSubtargetInfo &STI) const; // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); - bool printAliasInstr(const MCInst *MI, raw_ostream &O); + void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, - unsigned PrintMethodIdx, raw_ostream &O); + unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - void printOperand(const MCInst *MI, int opNum, raw_ostream &OS); - void printMemOperand(const MCInst *MI, int opNum, raw_ostream &OS, - const char *Modifier = nullptr); - void printCCOperand(const MCInst *MI, int opNum, raw_ostream &OS); - bool printGetPCX(const MCInst *MI, unsigned OpNo, raw_ostream &OS); - + void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &OS); + void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &OS, const char *Modifier = nullptr); + void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &OS); + bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &OS); }; } // end namespace llvm diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index dcd81e3..4abb6b8 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -244,7 +244,7 @@ namespace { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType); return createSparcELFObjectWriter(OS, is64Bit(), OSABI); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp index 5ba82f1..98ba7e6 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp @@ -26,7 +26,8 @@ namespace { Is64Bit ? ELF::EM_SPARCV9 : ELF::EM_SPARC, /*HasRelocationAddend*/ true) {} - virtual ~SparcELFObjectWriter() {} + ~SparcELFObjectWriter() override {} + protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -104,9 +105,8 @@ unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target, return ELF::R_SPARC_NONE; } -MCObjectWriter *llvm::createSparcELFObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint8_t OSABI) { +MCObjectWriter *llvm::createSparcELFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint8_t OSABI) { MCELFObjectTargetWriter *MOTW = new SparcELFObjectWriter(Is64Bit, OSABI); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index 598856f..b447ab3 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -38,7 +38,7 @@ class SparcMCCodeEmitter : public MCCodeEmitter { public: SparcMCCodeEmitter(MCContext &ctx): Ctx(ctx) {} - ~SparcMCCodeEmitter() {} + ~SparcMCCodeEmitter() override {} void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index 630ed1b..7895404 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -134,13 +134,12 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, return new SparcTargetAsmStreamer(S, OS); } -static MCInstPrinter *createSparcMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - return new SparcInstPrinter(MAI, MII, MRI, STI); +static MCInstPrinter *createSparcMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new SparcInstPrinter(MAI, MII, MRI); } extern "C" void LLVMInitializeSparcTargetMC() { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index d2ec991..5f38b12 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -26,6 +26,7 @@ class MCRegisterInfo; class MCSubtargetInfo; class Target; class StringRef; +class raw_pwrite_stream; class raw_ostream; extern Target TheSparcTarget; @@ -38,8 +39,7 @@ MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createSparcELFObjectWriter(raw_ostream &OS, - bool Is64Bit, +MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, uint8_t OSABI); } // End llvm namespace diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td index 3159a46..c34122e 100644 --- a/lib/Target/Sparc/Sparc.td +++ b/lib/Target/Sparc/Sparc.td @@ -92,8 +92,15 @@ def : Proc<"niagara4", [FeatureV9, FeatureV8Deprecated, UsePopc, // Declare the target which we are implementing //===----------------------------------------------------------------------===// +def SparcAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; +} + def Sparc : Target { // Pull in Instruction Info: let InstructionSet = SparcInstrInfo; let AssemblyParsers = [SparcAsmParser]; + let AssemblyWriters = [SparcAsmWriter]; } diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 0439f9d..56290e2 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -58,7 +58,6 @@ namespace { void EmitFunctionBodyStart() override; void EmitInstruction(const MachineInstr *MI) override; - void EmitEndOfAsmFile(Module &M) override; static const char *getRegisterName(unsigned RegNo) { return SparcInstPrinter::getRegisterName(RegNo); @@ -442,23 +441,6 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -void SparcAsmPrinter::EmitEndOfAsmFile(Module &M) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Generate stubs for global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataSection()); - unsigned PtrSize = TM.getDataLayout()->getPointerSize(0); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), PtrSize); - } - } -} - // Force static initialization. extern "C" void LLVMInitializeSparcAsmPrinter() { RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index c8b0570..5b964af 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -727,7 +727,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align, false, // isVolatile, - (Size <= 32), // AlwaysInline if size <= 32 + (Size <= 32), // AlwaysInline if size <= 32, + false, // isTailCall MachinePointerInfo(), MachinePointerInfo()); ByValArgs.push_back(FIPtr); } diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h index a3a21d6..6818291 100644 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.h +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h @@ -23,7 +23,7 @@ class SparcTargetMachine; class SparcSelectionDAGInfo : public TargetSelectionDAGInfo { public: explicit SparcSelectionDAGInfo(const DataLayout &DL); - ~SparcSelectionDAGInfo(); + ~SparcSelectionDAGInfo() override; }; } diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 60a3912..336f037 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -29,6 +29,7 @@ add_llvm_target(SystemZCodeGen SystemZShortenInst.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp + SystemZTargetTransformInfo.cpp ) add_subdirectory(AsmParser) diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 23173bf..84400f8 100644 --- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -25,7 +25,7 @@ class SystemZDisassembler : public MCDisassembler { public: SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - virtual ~SystemZDisassembler() {} + ~SystemZDisassembler() override {} DecodeStatus getInstruction(MCInst &instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index 996a492..cf1ee54 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -43,7 +43,8 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) { } void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { printInstruction(MI, O); printAnnotation(O, Annot); } diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index 732e5fa..6f56c7b 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -39,7 +39,8 @@ public: // Override MCInstPrinter. void printRegName(raw_ostream &O, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; private: // Print various types of operand. diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt index 542aaee..6f8431d 100644 --- a/lib/Target/SystemZ/LLVMBuild.txt +++ b/lib/Target/SystemZ/LLVMBuild.txt @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = SystemZCodeGen parent = SystemZ -required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target +required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target add_to_library_groups = SystemZ diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index b79b1d8..1c3887a 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -62,7 +62,7 @@ public: llvm_unreachable("SystemZ does do not have assembler relaxation"); } bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createSystemZObjectWriter(OS, OSABI); } }; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index 40dc48e..8dd70b9 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -32,7 +32,7 @@ public: : MCII(mcii), Ctx(ctx) { } - ~SystemZMCCodeEmitter() {} + ~SystemZMCCodeEmitter() override {} // OVerride MCCodeEmitter. void EncodeInstruction(const MCInst &MI, raw_ostream &OS, diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 2632518..ee1af02 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -20,7 +20,7 @@ class SystemZObjectWriter : public MCELFObjectTargetWriter { public: SystemZObjectWriter(uint8_t OSABI); - virtual ~SystemZObjectWriter(); + ~SystemZObjectWriter() override; protected: // Override MCELFObjectTargetWriter. @@ -152,7 +152,7 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target, } } -MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index ffd05a9..ea56fb1 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -172,12 +172,11 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createSystemZMCInstPrinter(const Target &T, +static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { return new SystemZInstPrinter(MAI, MII, MRI); } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 962c950..2b2647b 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -23,6 +23,7 @@ class MCRegisterInfo; class MCSubtargetInfo; class StringRef; class Target; +class raw_pwrite_stream; class raw_ostream; extern Target TheSystemZTarget; @@ -77,7 +78,7 @@ MCAsmBackend *createSystemZMCAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI); +MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); } // end namespace llvm // Defines symbolic names for SystemZ registers. diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index 5f17edb..b3a7310 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -68,6 +68,18 @@ const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1; const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3; const unsigned CCMASK_TM = CCMASK_ANY; +// Condition-code mask assignments for TRANSACTION_BEGIN. +const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0; +const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1; +const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2; +const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3; +const unsigned CCMASK_TBEGIN = CCMASK_ANY; + +// Condition-code mask assignments for TRANSACTION_END. +const unsigned CCMASK_TEND_TX = CCMASK_0; +const unsigned CCMASK_TEND_NOTX = CCMASK_2; +const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX; + // The position of the low CC bit in an IPM result. const unsigned IPM_CC = 28; diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 18e37e3..2524733 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -255,29 +255,6 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Triple(TM.getTargetTriple()).isOSBinFormatELF()) { - auto &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } - } -} - // Force static initialization. extern "C" void LLVMInitializeSystemZAsmPrinter() { RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget); diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h index a4d5b78..7f6e823 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.h +++ b/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -38,7 +38,6 @@ public: bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS) override; - void EmitEndOfAsmFile(Module &M) override; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index a52aa25..1a58b53 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -898,6 +898,9 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { } unsigned Opcode = SystemZ::RISBG; + // Prefer RISBGN if available, since it does not clobber CC. + if (Subtarget->hasMiscellaneousExtensions()) + Opcode = SystemZ::RISBGN; EVT OpcodeVT = MVT::i64; if (VT == MVT::i32 && Subtarget->hasHighWord()) { Opcode = SystemZ::RISBMux; @@ -945,9 +948,13 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { // See whether we can avoid an AND in the first operand by converting // ROSBG to RISBG. - if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) + if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) { Opcode = SystemZ::RISBG; - + // Prefer RISBGN if available, since it does not clobber CC. + if (Subtarget->hasMiscellaneousExtensions()) + Opcode = SystemZ::RISBGN; + } + EVT VT = N->getValueType(0); SDValue Ops[5] = { convertTo(SDLoc(N), MVT::i64, Op0), diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 0ca8bcd..21882cb 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Intrinsics.h" #include <cctype> using namespace llvm; @@ -163,8 +164,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, // available, or if the operand is constant. setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + // Use POPCNT on z196 and above. + if (Subtarget.hasPopulationCount()) + setOperationAction(ISD::CTPOP, VT, Custom); + else + setOperationAction(ISD::CTPOP, VT, Expand); + // No special instructions for these. - setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); @@ -299,6 +305,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, // Codes for which we want to perform some z-specific combinations. setTargetDAGCombine(ISD::SIGN_EXTEND); + // Handle intrinsics. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; MaxStoresPerMemcpyOptSize = 0; @@ -342,6 +351,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return Imm.isZero() || Imm.isNegZero(); } +bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // We can use CGFI or CLGFI. + return isInt<32>(Imm) || isUInt<32>(Imm); +} + +bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { + // We can use ALGFI or SLGFI. + return isUInt<32>(Imm) || isUInt<32>(-Imm); +} + bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, @@ -1016,6 +1035,53 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); } +// Return true if Op is an intrinsic node with chain that returns the CC value +// as its only (other) argument. Provide the associated SystemZISD opcode and +// the mask of valid CC values if so. +static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, + unsigned &CCValid) { + unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_tbegin: + Opcode = SystemZISD::TBEGIN; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tbegin_nofloat: + Opcode = SystemZISD::TBEGIN_NOFLOAT; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tend: + Opcode = SystemZISD::TEND; + CCValid = SystemZ::CCMASK_TEND; + return true; + + default: + return false; + } +} + +// Emit an intrinsic with chain with a glued value instead of its CC result. +static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector<SDValue, 6> Ops; + Ops.reserve(NumOps - 1); + Ops.push_back(Op.getOperand(0)); + for (unsigned I = 2; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); + SDValue OldChain = SDValue(Op.getNode(), 1); + SDValue NewChain = SDValue(Intr.getNode(), 0); + DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); + return Intr; +} + // CC is a comparison that will be implemented using an integer or // floating-point comparison. Return the condition code mask for // a branch on true. In the integer case, CCMASK_CMP_UO is set for @@ -1530,6 +1596,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { MaskVal = -(CmpVal & -CmpVal); NewC.ICmpType = SystemZICMP::UnsignedOnly; } + if (!MaskVal) + return; // Check whether the combination of mask, comparison value and comparison // type are suitable. @@ -1571,9 +1639,53 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { C.CCMask = NewCCMask; } +// Return a Comparison that tests the condition-code result of intrinsic +// node Call against constant integer CC using comparison code Cond. +// Opcode is the opcode of the SystemZISD operation for the intrinsic +// and CCValid is the set of possible condition-code results. +static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, + SDValue Call, unsigned CCValid, uint64_t CC, + ISD::CondCode Cond) { + Comparison C(Call, SDValue()); + C.Opcode = Opcode; + C.CCValid = CCValid; + if (Cond == ISD::SETEQ) + // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. + C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; + else if (Cond == ISD::SETNE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; + else if (Cond == ISD::SETLT || Cond == ISD::SETULT) + // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, + // always true for CC>3. + C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; + else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; + else if (Cond == ISD::SETLE || Cond == ISD::SETULE) + // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), + // always true for CC>3. + C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; + else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; + else + llvm_unreachable("Unexpected integer comparison type"); + C.CCMask &= CCValid; + return C; +} + // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond) { + if (CmpOp1.getOpcode() == ISD::Constant) { + uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); + unsigned Opcode, CCValid; + if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && + CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && + isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); + } Comparison C(CmpOp0, CmpOp1); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { @@ -1615,6 +1727,17 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, // Emit the comparison instruction described by C. static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + if (!C.Op1.getNode()) { + SDValue Op; + switch (C.Op0.getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); + break; + default: + llvm_unreachable("Invalid comparison operands"); + } + return SDValue(Op.getNode(), Op->getNumValues() - 1); + } if (C.Opcode == SystemZISD::ICMP) return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, DAG.getConstant(C.ICmpType, MVT::i32)); @@ -1696,7 +1819,6 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); SDValue CmpOp1 = Op.getOperand(3); @@ -1706,7 +1828,7 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); SDValue Glue = emitCmp(DAG, DL, C); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Chain, DAG.getConstant(C.CCValid, MVT::i32), + Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32), DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue); } @@ -2100,6 +2222,7 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32), /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, + /*isTailCall*/false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } @@ -2292,6 +2415,46 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { MVT::i64, HighOp, Low32); } +SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int64_t OrigBitSize = VT.getSizeInBits(); + SDLoc DL(Op); + + // Get the known-zero mask for the operand. + Op = Op.getOperand(0); + APInt KnownZero, KnownOne; + DAG.computeKnownBits(Op, KnownZero, KnownOne); + unsigned NumSignificantBits = (~KnownZero).getActiveBits(); + if (NumSignificantBits == 0) + return DAG.getConstant(0, VT); + + // Skip known-zero high parts of the operand. + int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); + BitSize = std::min(BitSize, OrigBitSize); + + // The POPCNT instruction counts the number of bits in each byte. + Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); + Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); + Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + + // Add up per-byte counts in a binary tree. All bits of Op at + // position larger than BitSize remain zero throughout. + for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { + SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT)); + if (BitSize != OrigBitSize) + Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, + DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT)); + Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); + } + + // Extract overall result from high byte. + if (BitSize > 8) + Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT)); + + return Op; +} + // Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { @@ -2505,6 +2668,30 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, Node->getMemoryVT(), Node->getMemOperand()); } +// Return an i32 that contains the value of CC immediately after After, +// whose final operand must be MVT::Glue. +static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { + SDValue Glue = SDValue(After, After->getNumValues() - 1); + SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue); + return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, MVT::i32)); +} + +SDValue +SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, Glued.getNode()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); + return SDValue(); + } + + return SDValue(); +} + SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -2542,6 +2729,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerUDIVREM(Op, DAG); case ISD::OR: return lowerOR(Op, DAG); + case ISD::CTPOP: + return lowerCTPOP(Op, DAG); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: @@ -2576,6 +2765,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSTACKRESTORE(Op, DAG); case ISD::PREFETCH: return lowerPREFETCH(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } @@ -2616,6 +2807,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(SERIALIZE); + OPCODE(TBEGIN); + OPCODE(TBEGIN_NOFLOAT); + OPCODE(TEND); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -3443,6 +3637,50 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, return DoneMBB; } +// Update TBEGIN instruction with final opcode and register clobbers. +MachineBasicBlock * +SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + bool NoFloat) const { + MachineFunction &MF = *MBB->getParent(); + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + + // Update opcode. + MI->setDesc(TII->get(Opcode)); + + // We cannot handle a TBEGIN that clobbers the stack or frame pointer. + // Make sure to add the corresponding GRSM bits if they are missing. + uint64_t Control = MI->getOperand(2).getImm(); + static const unsigned GPRControlBit[16] = { + 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, + 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 + }; + Control |= GPRControlBit[15]; + if (TFI->hasFP(MF)) + Control |= GPRControlBit[11]; + MI->getOperand(2).setImm(Control); + + // Add GPR clobbers. + for (int I = 0; I < 16; I++) { + if ((Control & GPRControlBit[I]) == 0) { + unsigned Reg = SystemZMC::GR64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + // Add FPR clobbers. + if (!NoFloat && (Control & 4) != 0) { + for (int I = 0; I < 16; I++) { + unsigned Reg = SystemZMC::FP64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + return MBB; +} + MachineBasicBlock *SystemZTargetLowering:: EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { @@ -3684,6 +3922,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { return emitStringWrapper(MI, MBB, SystemZ::MVST); case SystemZ::SRSTLoop: return emitStringWrapper(MI, MBB, SystemZ::SRST); + case SystemZ::TBEGIN: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); + case SystemZ::TBEGIN_nofloat: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); + case SystemZ::TBEGINC: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 23c62c9..56d7ef4 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -87,6 +87,9 @@ enum { // the number of the register. EXTRACT_ACCESS, + // Count number of bits set in operand 0 per byte. + POPCNT, + // Wrappers around the ISD opcodes of the same name. The output and // first input operands are GR128s. The trailing numbers are the // widths of the second operand in bits. @@ -143,6 +146,15 @@ enum { // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) SERIALIZE, + // Transaction begin. The first operand is the chain, the second + // the TDB pointer, and the third the immediate control field. + // Returns chain and glue. + TBEGIN, + TBEGIN_NOFLOAT, + + // Transaction end. Just the chain operand. Returns chain and glue. + TEND, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_<op>. // @@ -213,6 +225,8 @@ public: EVT getSetCCResultType(LLVMContext &, EVT) const override; bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + bool isLegalICmpImmediate(int64_t Imm) const override; + bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, @@ -302,6 +316,7 @@ private: SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, @@ -312,6 +327,7 @@ private: SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. @@ -349,6 +365,10 @@ private: MachineBasicBlock *emitStringWrapper(MachineInstr *MI, MachineBasicBlock *BB, unsigned Opcode) const; + MachineBasicBlock *emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + bool NoFloat) const; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 9f59a1c..2d3c9e2 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -473,6 +473,17 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{15-0} = BD2; } +class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<16> BD2; + + let Inst{31-16} = op; + let Inst{15-0} = BD2; +} + //===----------------------------------------------------------------------===// // Instruction definitions with semantics //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 5128993..3a02859 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -723,9 +723,12 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Start, End; if (isRxSBGMask(Imm, And.RegSize, Start, End)) { unsigned NewOpcode; - if (And.RegSize == 64) + if (And.RegSize == 64) { NewOpcode = SystemZ::RISBG; - else { + // Prefer RISBGN if available, since it does not clobber CC. + if (STI.hasMiscellaneousExtensions()) + NewOpcode = SystemZ::RISBGN; + } else { NewOpcode = SystemZ::RISBMux; Start &= 31; End &= 31; @@ -1146,17 +1149,22 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { switch (Opcode) { - case SystemZ::L: return SystemZ::LT; - case SystemZ::LY: return SystemZ::LT; - case SystemZ::LG: return SystemZ::LTG; - case SystemZ::LGF: return SystemZ::LTGF; - case SystemZ::LR: return SystemZ::LTR; - case SystemZ::LGFR: return SystemZ::LTGFR; - case SystemZ::LGR: return SystemZ::LTGR; - case SystemZ::LER: return SystemZ::LTEBR; - case SystemZ::LDR: return SystemZ::LTDBR; - case SystemZ::LXR: return SystemZ::LTXBR; - default: return 0; + case SystemZ::L: return SystemZ::LT; + case SystemZ::LY: return SystemZ::LT; + case SystemZ::LG: return SystemZ::LTG; + case SystemZ::LGF: return SystemZ::LTGF; + case SystemZ::LR: return SystemZ::LTR; + case SystemZ::LGFR: return SystemZ::LTGFR; + case SystemZ::LGR: return SystemZ::LTGR; + case SystemZ::LER: return SystemZ::LTEBR; + case SystemZ::LDR: return SystemZ::LTDBR; + case SystemZ::LXR: return SystemZ::LTXBR; + // On zEC12 we prefer to use RISBGN. But if there is a chance to + // actually use the condition code, we may turn it back into RISGB. + // Note that RISBG is not really a "load-and-test" instruction, + // but sets the same condition code values, so is OK to use here. + case SystemZ::RISBGN: return SystemZ::RISBG; + default: return 0; } } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index a7f7747..820f30b 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1061,6 +1061,10 @@ let Defs = [CC] in { def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; } +// On zEC12 we have a variant of RISBG that does not set CC. +let Predicates = [FeatureMiscellaneousExtensions] in + def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>; + // Forms of RISBG that only affect one word of the destination register. // They do not set CC. let Predicates = [FeatureHighWord] in { @@ -1358,6 +1362,60 @@ let Defs = [CC] in { } //===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureTransactionalExecution] in { + // Transaction Begin + let hasSideEffects = 1, mayStore = 1, + usesCustomInserter = 1, Defs = [CC] in { + def TBEGIN : InstSIL<0xE560, + (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + "tbegin\t$BD1, $I2", + [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>; + def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + [(z_tbegin_nofloat bdaddr12only:$BD1, + imm32zx16:$I2)]>; + def TBEGINC : InstSIL<0xE561, + (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + "tbeginc\t$BD1, $I2", + [(int_s390_tbeginc bdaddr12only:$BD1, + imm32zx16:$I2)]>; + } + + // Transaction End + let hasSideEffects = 1, Defs = [CC], BD2 = 0 in + def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>; + + // Transaction Abort + let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in + def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2), + "tabort\t$BD2", + [(int_s390_tabort bdaddr12only:$BD2)]>; + + // Nontransactional Store + let hasSideEffects = 1 in + def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>; + + // Extract Transaction Nesting Depth + let hasSideEffects = 1 in + def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>; +} + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureProcessorAssist] in { + let hasSideEffects = 1, R4 = 0 in + def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3), + "ppa\t$R1, $R2, $R3", []>; + def : Pat<(int_s390_ppa_txassist GR32:$src), + (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), + 0, 1)>; +} + +//===----------------------------------------------------------------------===// // Miscellaneous Instructions. //===----------------------------------------------------------------------===// @@ -1382,6 +1440,13 @@ let Defs = [CC] in { def : Pat<(ctlz GR64:$src), (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; +// Population count. Counts bits set per byte. +let Predicates = [FeaturePopulationCount], Defs = [CC] in { + def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2), + "popcnt\t$R1, $R2", + [(set GR64:$R1, (z_popcnt GR64:$R2))]>; +} + // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. def : Pat<(i64 (anyext GR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 51ac5da..3151052 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -79,6 +79,9 @@ def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; def SDT_ZPrefetch : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; +def SDT_ZTBegin : SDTypeProfile<0, 2, + [SDTCisPtrTy<0>, + SDTCisVT<1, i32>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -121,6 +124,7 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", SDT_ZExtractAccess>; +def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; @@ -179,6 +183,15 @@ def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin, + [SDNPHasChain, SDNPOutGlue, SDNPMayStore, + SDNPSideEffect]>; +def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin, + [SDNPHasChain, SDNPOutGlue, SDNPMayStore, + SDNPSideEffect]>; +def z_tend : SDNode<"SystemZISD::TEND", SDTNone, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // Pattern fragments //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td index 1594854..15614c9 100644 --- a/lib/Target/SystemZ/SystemZProcessors.td +++ b/lib/Target/SystemZ/SystemZProcessors.td @@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature< "Assume that the floating-point extension facility is installed" >; +def FeaturePopulationCount : SystemZFeature< + "population-count", "PopulationCount", + "Assume that the population-count facility is installed" +>; + def FeatureFastSerialization : SystemZFeature< "fast-serialization", "FastSerialization", "Assume that the fast-serialization facility is installed" @@ -50,13 +55,30 @@ def FeatureInterlockedAccess1 : SystemZFeature< >; def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">; +def FeatureMiscellaneousExtensions : SystemZFeature< + "miscellaneous-extensions", "MiscellaneousExtensions", + "Assume that the miscellaneous-extensions facility is installed" +>; + +def FeatureTransactionalExecution : SystemZFeature< + "transactional-execution", "TransactionalExecution", + "Assume that the transactional-execution facility is installed" +>; + +def FeatureProcessorAssist : SystemZFeature< + "processor-assist", "ProcessorAssist", + "Assume that the processor-assist facility is installed" +>; + def : Processor<"generic", NoItineraries, []>; def : Processor<"z10", NoItineraries, []>; def : Processor<"z196", NoItineraries, [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, - FeatureFPExtension, FeatureFastSerialization, - FeatureInterlockedAccess1]>; + FeatureFPExtension, FeaturePopulationCount, + FeatureFastSerialization, FeatureInterlockedAccess1]>; def : Processor<"zEC12", NoItineraries, [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, - FeatureFPExtension, FeatureFastSerialization, - FeatureInterlockedAccess1]>; + FeatureFPExtension, FeaturePopulationCount, + FeatureFastSerialization, FeatureInterlockedAccess1, + FeatureMiscellaneousExtensions, + FeatureTransactionalExecution, FeatureProcessorAssist]>; diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index 31a2bff..de725ae 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -10,7 +10,6 @@ #include "SystemZSubtarget.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/Support/Host.h" using namespace llvm; @@ -28,10 +27,6 @@ SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { std::string CPUName = CPU; if (CPUName.empty()) CPUName = "generic"; -#if defined(__linux__) && defined(__s390x__) - if (CPUName == "generic") - CPUName = sys::getHostCPUName(); -#endif // Parse features string. ParseSubtargetFeatures(CPUName, FS); return *this; @@ -43,7 +38,9 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT, const TargetMachine &TM) : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), - HasFastSerialization(false), HasInterlockedAccess1(false), + HasPopulationCount(false), HasFastSerialization(false), + HasInterlockedAccess1(false), HasMiscellaneousExtensions(false), + HasTransactionalExecution(false), HasProcessorAssist(false), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {} diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 99cb1ad..c99e552 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -38,8 +38,12 @@ protected: bool HasLoadStoreOnCond; bool HasHighWord; bool HasFPExtension; + bool HasPopulationCount; bool HasFastSerialization; bool HasInterlockedAccess1; + bool HasMiscellaneousExtensions; + bool HasTransactionalExecution; + bool HasProcessorAssist; private: Triple TargetTriple; @@ -86,12 +90,26 @@ public: // Return true if the target has the floating-point extension facility. bool hasFPExtension() const { return HasFPExtension; } + // Return true if the target has the population-count facility. + bool hasPopulationCount() const { return HasPopulationCount; } + // Return true if the target has the fast-serialization facility. bool hasFastSerialization() const { return HasFastSerialization; } // Return true if the target has interlocked-access facility 1. bool hasInterlockedAccess1() const { return HasInterlockedAccess1; } + // Return true if the target has the miscellaneous-extensions facility. + bool hasMiscellaneousExtensions() const { + return HasMiscellaneousExtensions; + } + + // Return true if the target has the transactional-execution facility. + bool hasTransactionalExecution() const { return HasTransactionalExecution; } + + // Return true if the target has the processor-assist facility. + bool hasProcessorAssist() const { return HasProcessorAssist; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 86baccb..b2f8175 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "SystemZTargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" @@ -108,3 +109,9 @@ void SystemZPassConfig::addPreEmitPass() { TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { return new SystemZPassConfig(this, PM); } + +TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(SystemZTTIImpl(this, F)); + }); +} diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 181b926..5ded07c 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -39,6 +39,7 @@ public: } // Override LLVMTargetMachine TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetIRAnalysis getTargetIRAnalysis() override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp new file mode 100644 index 0000000..3337f63 --- /dev/null +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -0,0 +1,240 @@ +//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a TargetTransformInfo analysis pass specific to the +// SystemZ target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +#define DEBUG_TYPE "systemztti" + +//===----------------------------------------------------------------------===// +// +// SystemZ cost model. +// +//===----------------------------------------------------------------------===// + +unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + if (Imm == 0) + return TTI::TCC_Free; + + if (Imm.getBitWidth() <= 64) { + // Constants loaded via lgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llihf: + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Basic; + + return 2 * TTI::TCC_Basic; + } + + return 4 * TTI::TCC_Basic; +} + +unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (Opcode) { + default: + return TTI::TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. This prevents the + // creation of new constants for every base constant that gets constant + // folded with the offset. + if (Idx == 0) + return 2 * TTI::TCC_Basic; + return TTI::TCC_Free; + case Instruction::Store: + if (Idx == 0 && Imm.getBitWidth() <= 64) { + // Any 8-bit immediate store can by implemented via mvi. + if (BitSize == 8) + return TTI::TCC_Free; + // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi. + if (isInt<16>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::ICmp: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Comparisons against signed 32-bit immediates implemented via cgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + // Comparisons against unsigned 32-bit immediates implemented via clgfi. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Add: + case Instruction::Sub: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use algfi/slgfi to add/subtract 32-bit unsigned immediates. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Or their negation, by swapping addition vs. subtraction. + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Mul: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use msgfi to multiply by 32-bit signed immediates. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Or: + case Instruction::Xor: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Masks supported by oilf/xilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Masks supported by oihf/xihf. + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Free; + } + break; + case Instruction::And: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Any 32-bit AND operation can by implemented via nilf. + if (BitSize <= 32) + return TTI::TCC_Free; + // 64-bit masks supported by nilf. + if (isUInt<32>(~Imm.getZExtValue())) + return TTI::TCC_Free; + // 64-bit masks supported by nilh. + if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff) + return TTI::TCC_Free; + // Some 64-bit AND operations can be implemented via risbg. + const SystemZInstrInfo *TII = ST->getInstrInfo(); + unsigned Start, End; + if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End)) + return TTI::TCC_Free; + } + break; + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + // Always return TCC_Free for the shift value of a shift instruction. + if (Idx == 1) + return TTI::TCC_Free; + break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + case Instruction::BitCast: + case Instruction::PHI: + case Instruction::Call: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + break; + } + + return SystemZTTIImpl::getIntImmCost(Imm, Ty); +} + +unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (IID) { + default: + return TTI::TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + // These get expanded to include a normal addition/subtraction. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + // These get expanded to include a normal multiplication. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::experimental_stackmap: + if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + } + return SystemZTTIImpl::getIntImmCost(Imm, Ty); +} + +TargetTransformInfo::PopcntSupportKind +SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2"); + if (ST->hasPopulationCount() && TyWidth <= 64) + return TTI::PSK_FastHardware; + return TTI::PSK_Software; +} + diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h new file mode 100644 index 0000000..d498913 --- /dev/null +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -0,0 +1,70 @@ +//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H + +#include "SystemZTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { + +class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> { + typedef BasicTTIImplBase<SystemZTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const SystemZSubtarget *ST; + const SystemZTargetLowering *TLI; + + const SystemZSubtarget *getST() const { return ST; } + const SystemZTargetLowering *getTLI() const { return TLI; } + +public: + explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F) + : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + SystemZTTIImpl(const SystemZTTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + SystemZTTIImpl(SystemZTTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) { + BaseT::operator=(static_cast<const BaseT &>(RHS)); + ST = RHS.ST; + TLI = RHS.TLI; + return *this; + } + SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) { + BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); + ST = std::move(RHS.ST); + TLI = std::move(RHS.TLI); + return *this; + } + + /// \name Scalar TTI Implementations + /// @{ + + unsigned getIntImmCost(const APInt &Imm, Type *Ty); + + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty); + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty); + + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 75100fb..db543f3 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -110,7 +110,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase( NameStr += DL->getPrivateGlobalPrefix(); TM.getNameWithPrefix(NameStr, GV, Mang); NameStr.append(Suffix.begin(), Suffix.end()); - return Ctx->GetOrCreateSymbol(NameStr.str()); + return Ctx->GetOrCreateSymbol(NameStr); } MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol( diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index dd07f81..5807cf7 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -187,5 +187,5 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const { SmallString<60> NameStr; getNameWithPrefix(NameStr, GV, Mang); const TargetLoweringObjectFile *TLOF = getObjFileLowering(); - return TLOF->getContext().GetOrCreateSymbol(NameStr.str()); + return TLOF->getContext().GetOrCreateSymbol(NameStr); } diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index 236cb1b..1a5bf16 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -183,7 +183,9 @@ void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T, } static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M, - formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) { + raw_pwrite_stream &OS, + LLVMCodeGenFileType codegen, + char **ErrorMessage) { TargetMachine* TM = unwrap(T); Module* Mod = unwrap(M); @@ -229,8 +231,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, *ErrorMessage = strdup(EC.message().c_str()); return true; } - formatted_raw_ostream destf(dest); - bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage); + bool Result = LLVMTargetMachineEmit(T, M, dest, codegen, ErrorMessage); dest.flush(); return Result; } @@ -238,15 +239,14 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T, LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage, LLVMMemoryBufferRef *OutMemBuf) { - std::string CodeString; - raw_string_ostream OStream(CodeString); - formatted_raw_ostream Out(OStream); - bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage); + SmallString<0> CodeString; + raw_svector_ostream OStream(CodeString); + bool Result = LLVMTargetMachineEmit(T, M, OStream, codegen, ErrorMessage); OStream.flush(); - std::string &Data = OStream.str(); - *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(), - Data.length(), ""); + StringRef Data = OStream.str(); + *OutMemBuf = + LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), Data.size(), ""); return Result; } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index c24805a..93c6ea0 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2571,7 +2571,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, SmallString<16> Tmp; Tmp += Base; Tmp += ' '; - Op.setTokenValue(Tmp.str()); + Op.setTokenValue(Tmp); // If this instruction starts with an 'f', then it is a floating point stack // instruction. These come in up to three forms for 32-bit, 64-bit, and diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 65461af..f265f1d 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -33,15 +33,12 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter.inc" -void X86ATTInstPrinter::printRegName(raw_ostream &OS, - unsigned RegNo) const { - OS << markup("<reg:") - << '%' << getRegisterName(RegNo) - << markup(">"); +void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">"); } void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { const MCInstrDesc &Desc = MII.get(MI->getOpcode()); uint64_t TSFlags = Desc.TSFlags; @@ -60,7 +57,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, // InstrInfo.td as soon as Requires clause is supported properly // for InstAlias. if (MI->getOpcode() == X86::CALLpcrel32 && - (getAvailableFeatures() & X86::Mode64Bit) != 0) { + (STI.getFeatureBits() & X86::Mode64Bit) != 0) { OS << "\tcallq\t"; printPCRelImm(MI, 0, OS); } @@ -169,8 +166,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, printRegName(O, Op.getReg()); } else if (Op.isImm()) { // Print X86 immediates as signed values. - O << markup("<imm:") - << '$' << formatImm((int64_t)Op.getImm()) + O << markup("<imm:") << '$' << formatImm((int64_t)Op.getImm()) << markup(">"); // If there are no instruction-specific comments, add a comment clarifying @@ -182,24 +178,22 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << markup("<imm:") - << '$' << *Op.getExpr() - << markup(">"); + O << markup("<imm:") << '$' << *Op.getExpr() << markup(">"); } } void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O) { - const MCOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg); - const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg); - const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp); - const MCOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg); + const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg); + const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg); + const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp); + const MCOperand &SegReg = MI->getOperand(Op + X86::AddrSegmentReg); O << markup("<mem:"); // If this has a segment register, print it. if (SegReg.getReg()) { - printOperand(MI, Op+X86::AddrSegmentReg, O); + printOperand(MI, Op + X86::AddrSegmentReg, O); O << ':'; } @@ -215,16 +209,14 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, if (IndexReg.getReg() || BaseReg.getReg()) { O << '('; if (BaseReg.getReg()) - printOperand(MI, Op+X86::AddrBaseReg, O); + printOperand(MI, Op + X86::AddrBaseReg, O); if (IndexReg.getReg()) { O << ','; - printOperand(MI, Op+X86::AddrIndexReg, O); - unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm(); + printOperand(MI, Op + X86::AddrIndexReg, O); + unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm(); if (ScaleVal != 1) { - O << ',' - << markup("<imm:") - << ScaleVal // never printed in hex. + O << ',' << markup("<imm:") << ScaleVal // never printed in hex. << markup(">"); } } @@ -236,13 +228,13 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op, raw_ostream &O) { - const MCOperand &SegReg = MI->getOperand(Op+1); + const MCOperand &SegReg = MI->getOperand(Op + 1); O << markup("<mem:"); // If this has a segment register, print it. if (SegReg.getReg()) { - printOperand(MI, Op+1, O); + printOperand(MI, Op + 1, O); O << ':'; } @@ -267,13 +259,13 @@ void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op, void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &DispSpec = MI->getOperand(Op); - const MCOperand &SegReg = MI->getOperand(Op+1); + const MCOperand &SegReg = MI->getOperand(Op + 1); O << markup("<mem:"); // If this has a segment register, print it. if (SegReg.getReg()) { - printOperand(MI, Op+1, O); + printOperand(MI, Op + 1, O); O << ':'; } @@ -289,7 +281,6 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O) { - O << markup("<imm:") - << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff) + O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff) << markup(">"); } diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index f71cb81..62b6b73 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -24,14 +24,12 @@ class MCOperand; class X86ATTInstPrinter final : public MCInstPrinter { public: X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); - } + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen, returns true if we successfully printed an // alias. @@ -142,7 +140,6 @@ public: private: bool HasCustomInstComment; }; - } #endif diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 91d1828..4d92daf 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -33,7 +33,8 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { const MCInstrDesc &Desc = MII.get(MI->getOpcode()); uint64_t TSFlags = Desc.TSFlags; diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 2150144..6e371da 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -28,7 +28,8 @@ public: : MCInstPrinter(MAI, MII, MRI) {} void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index a400d46..b84c983 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -360,7 +360,7 @@ public: ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : ELFX86AsmBackend(T, OSABI, CPU) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386); } }; @@ -370,7 +370,7 @@ public: ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : ELFX86AsmBackend(T, OSABI, CPU) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_X86_64); } @@ -381,7 +381,7 @@ public: ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : ELFX86AsmBackend(T, OSABI, CPU) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64); } }; @@ -395,7 +395,7 @@ public: , Is64Bit(is64Bit) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86WinCOFFObjectWriter(OS, Is64Bit); } }; @@ -752,7 +752,7 @@ public: StringRef CPU) : DarwinX86AsmBackend(T, MRI, CPU, false) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_I386, MachO::CPU_SUBTYPE_I386_ALL); @@ -772,7 +772,7 @@ public: StringRef CPU, MachO::CPUSubTypeX86 st) : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/true, MachO::CPU_TYPE_X86_64, Subtype); } diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 76a9d2b..4508883 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -22,7 +22,8 @@ namespace { public: X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine); - virtual ~X86ELFObjectWriter(); + ~X86ELFObjectWriter() override; + protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -248,9 +249,8 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, return getRelocType32(Modifier, getType32(Type), IsPCRel); } -MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS, - bool IsELF64, - uint8_t OSABI, +MCObjectWriter *llvm::createX86ELFObjectWriter(raw_pwrite_stream &OS, + bool IsELF64, uint8_t OSABI, uint16_t EMachine) { MCELFObjectTargetWriter *MOTW = new X86ELFObjectWriter(IsELF64, OSABI, EMachine); diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 9b98a3e..e27b7cb 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -39,7 +39,7 @@ public: : MCII(mcii), Ctx(ctx) { } - ~X86MCCodeEmitter() {} + ~X86MCCodeEmitter() override {} bool is64BitMode(const MCSubtargetInfo &STI) const { return (STI.getFeatureBits() & X86::Mode64Bit) != 0; diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 0946326..5bdd844 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -80,7 +80,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, std::string ArchFS = X86_MC::ParseX86Triple(TT); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS.str(); + ArchFS = (Twine(ArchFS) + "," + FS).str(); else ArchFS = FS; } @@ -207,14 +207,13 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createX86MCInstPrinter(const Target &T, +static MCInstPrinter *createX86MCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(MAI, MII, MRI, STI); + return new X86ATTInstPrinter(MAI, MII, MRI); if (SyntaxVariant == 1) return new X86IntelInstPrinter(MAI, MII, MRI); return nullptr; diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 6f50f11..dcdae1d 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -31,10 +31,11 @@ class Target; class Triple; class StringRef; class raw_ostream; +class raw_pwrite_stream; extern Target TheX86_32Target, TheX86_64Target; -/// DWARFFlavour - Flavour of dwarf regnumbers +/// Flavour of dwarf regnumbers /// namespace DWARFFlavour { enum { @@ -42,7 +43,7 @@ namespace DWARFFlavour { }; } -/// N86 namespace - Native X86 register numbers +/// Native X86 register numbers /// namespace N86 { enum { @@ -57,9 +58,8 @@ namespace X86_MC { void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI); - /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance. - /// This is exposed so Asm parser, etc. do not need to go through - /// TargetRegistry. + /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc. + /// do not need to go through TargetRegistry. MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS); } @@ -78,27 +78,25 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, /// /// Takes ownership of \p AB and \p CE. MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, - raw_ostream &OS, MCCodeEmitter *CE, + raw_pwrite_stream &OS, MCCodeEmitter *CE, bool RelaxAll); -/// createX86MachObjectWriter - Construct an X86 Mach-O object writer. -MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, - bool Is64Bit, +/// Construct an X86 Mach-O object writer. +MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype); -/// createX86ELFObjectWriter - Construct an X86 ELF object writer. -MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS, - bool IsELF64, - uint8_t OSABI, - uint16_t EMachine); -/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer. -MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit); +/// Construct an X86 ELF object writer. +MCObjectWriter *createX86ELFObjectWriter(raw_pwrite_stream &OS, bool IsELF64, + uint8_t OSABI, uint16_t EMachine); +/// Construct an X86 Win COFF object writer. +MCObjectWriter *createX86WinCOFFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit); -/// createX86_64MachORelocationInfo - Construct X86-64 Mach-O relocation info. +/// Construct X86-64 Mach-O relocation info. MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx); -/// createX86_64ELFORelocationInfo - Construct X86-64 ELF relocation info. +/// Construct X86-64 ELF relocation info. MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx); } // End llvm namespace diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 7a83f4c..38539cd 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -575,9 +575,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint32_t CPUType, +MCObjectWriter *llvm::createX86MachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { return createMachObjectWriter(new X86MachObjectWriter(Is64Bit, CPUType, diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index e1df5c2..bd1bc99 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -25,7 +25,7 @@ namespace { class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { public: X86WinCOFFObjectWriter(bool Is64Bit); - virtual ~X86WinCOFFObjectWriter(); + ~X86WinCOFFObjectWriter() override; unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, @@ -90,7 +90,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target, llvm_unreachable("Unsupported COFF machine type."); } -MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit) { MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit); return createWinCOFFObjectWriter(MOTW, OS); diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index 5690efe..92f42b6 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -18,8 +18,8 @@ class X86WinCOFFStreamer : public MCWinCOFFStreamer { Win64EH::UnwindEmitter EHStreamer; public: X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE, - raw_ostream &OS) - : MCWinCOFFStreamer(C, AB, *CE, OS) { } + raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, AB, *CE, OS) {} void EmitWinEHHandlerData() override; void EmitWindowsUnwindTables() override; @@ -49,8 +49,8 @@ void X86WinCOFFStreamer::FinishImpl() { } MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, - raw_ostream &OS, MCCodeEmitter *CE, - bool RelaxAll) { + raw_pwrite_stream &OS, + MCCodeEmitter *CE, bool RelaxAll) { X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS); S->getAssembler().setRelaxAll(RelaxAll); return S; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 4f9836d..d13f155 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -273,17 +273,15 @@ def : SilvermontProc<"silvermont">; def : SilvermontProc<"slm">; // Legacy alias. // "Arrandale" along with corei3 and corei5 -class NehalemProc<string Name, list<SubtargetFeature> AdditionalFeatures> - : ProcessorModel<Name, SandyBridgeModel, !listconcat([ - FeatureSSE42, - FeatureCMPXCHG16B, - FeatureSlowBTMem, - FeatureFastUAMem, - FeaturePOPCNT - ], - AdditionalFeatures)>; -def : NehalemProc<"nehalem", []>; -def : NehalemProc<"corei7", [FeatureAES]>; +class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ + FeatureSSE42, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeatureFastUAMem, + FeaturePOPCNT + ]>; +def : NehalemProc<"nehalem">; +def : NehalemProc<"corei7">; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index f6033a7..2ed4975 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -523,7 +523,6 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { // must be registered in .sxdata. Use of any unregistered handlers will // cause the process to terminate immediately. LLVM does not know how to // register any SEH handlers, so its object files should be safe. - S->setAbsolute(); OutStreamer.EmitSymbolAttribute(S, MCSA_Global); OutStreamer.EmitAssignment( S, MCConstantExpr::Create(int64_t(1), MMI->getContext())); @@ -723,28 +722,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } - if (TT.isOSBinFormatELF()) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); - - for (const auto &Stub : Stubs) { - OutStreamer.EmitLabel(Stub.first); - OutStreamer.EmitSymbolValue(Stub.second.getPointer(), - TD->getPointerSize()); - } - Stubs.clear(); - } - + if (TT.isOSBinFormatELF()) SM.serializeToStackMapSection(); - } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index cba140f..cdf10a7 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2417,6 +2417,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); // FIXME may need to add RegState::Debug to any registers produced, // although ESP/EBP should be the only ones at the moment. + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM) .addImm(0) .addMetadata(DI->getVariable()) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index fb12ce5..5da7acf 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2187,7 +2187,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) break; - unsigned ShlOp, Op; + unsigned ShlOp, AddOp, Op; MVT CstVT = NVT; // Check the minimum bitwidth for the new constant. @@ -2208,6 +2208,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i32: assert(CstVT == MVT::i8); ShlOp = X86::SHL32ri; + AddOp = X86::ADD32rr; switch (Opcode) { default: llvm_unreachable("Impossible opcode"); @@ -2219,6 +2220,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i64: assert(CstVT == MVT::i8 || CstVT == MVT::i32); ShlOp = X86::SHL64ri; + AddOp = X86::ADD64rr; switch (Opcode) { default: llvm_unreachable("Impossible opcode"); @@ -2232,6 +2234,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Emit the smaller op and the shift. SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT); SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); + if (ShlVal == 1) + return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0), + SDValue(New, 0)); return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), getI8Imm(ShlVal)); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8b92e70..c32412a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -2142,6 +2143,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, + /*isTailCall*/false, MachinePointerInfo(), MachinePointerInfo()); } @@ -2277,6 +2279,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); + const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); const Function* Fn = MF.getFunction(); if (Fn->hasExternalLinkage() && @@ -2416,6 +2419,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, MFI->CreateFixedObject(1, StackSize, true)); } + MachineModuleInfo &MMI = MF.getMMI(); + const Function *WinEHParent = nullptr; + if (IsWin64 && MMI.hasWinEHFuncInfo(Fn)) + WinEHParent = MMI.getWinEHParent(Fn); + bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn; + bool IsWinEHParent = WinEHParent && WinEHParent == Fn; + // Figure out if XMM registers are in use. assert(!(MF.getTarget().Options.UseSoftFloat && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && @@ -2452,7 +2462,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } if (IsWin64) { - const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); // Get to the caller-allocated home save location. Add 8 to account // for the return address. int HomeOffset = TFI.getOffsetOfLocalArea() + 8; @@ -2505,6 +2514,27 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); + } else if (IsWinEHOutlined) { + // Get to the caller-allocated home save location. Add 8 to account + // for the return address. + int HomeOffset = TFI.getOffsetOfLocalArea() + 8; + FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject( + /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false)); + + MMI.getWinEHFuncInfo(Fn) + .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] = + FuncInfo->getRegSaveFrameIndex(); + + // Store the second integer parameter (rdx) into rsp+16 relative to the + // stack pointer at the entry of the function. + SDValue RSFIN = + DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); + unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64); + Chain = DAG.getStore( + Val.getValue(1), dl, Val, RSFIN, + MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()), + /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0); } if (isVarArg && MFI->hasMustTailInVarArgFunc()) { @@ -2571,6 +2601,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setArgumentStackSize(StackSize); + if (IsWinEHParent) { + int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); + SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64); + MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI; + SDValue Neg2 = DAG.getConstant(-2, MVT::i64); + Chain = DAG.getStore(Chain, dl, Neg2, StackSlot, + MachinePointerInfo::getFixedStack(UnwindHelpFI), + /*isVolatile=*/true, + /*isNonTemporal=*/false, /*Alignment=*/0); + } + return Chain; } @@ -4420,6 +4461,29 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, SDLoc dl(Op); SDValue V; bool First = true; + + // SSE4.1 - use PINSRB to insert each byte directly. + if (Subtarget->hasSSE41()) { + for (unsigned i = 0; i < 16; ++i) { + bool isNonZero = (NonZeros & (1 << i)) != 0; + if (isNonZero) { + if (First) { + if (NumZero) + V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl); + else + V = DAG.getUNDEF(MVT::v16i8); + First = false; + } + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + MVT::v16i8, V, Op.getOperand(i), + DAG.getIntPtrConstant(i)); + } + } + + return V; + } + + // Pre-SSE4.1 - merge byte pairs and insert with PINSRW. for (unsigned i = 0; i < 16; ++i) { bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; if (ThisIsNonZero && First) { @@ -5650,14 +5714,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } + // We can't directly insert an i8 or i16 into a vector, so zero extend + // it to i32 first. if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); if (VT.is256BitVector()) { - SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); - Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); + if (Subtarget->hasAVX()) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); + } else { + // Without AVX, we need to extend to a 128-bit vector and then + // insert into the 256-bit vector. + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); + SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); + Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); + } } else { assert(VT.is128BitVector() && "Expected an SSE value type!"); + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } return DAG.getNode(ISD::BITCAST, dl, VT, Item); @@ -5877,7 +5951,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); unsigned NumElems = ResVT.getVectorNumElements(); - if(ResVT.is256BitVector()) + if (ResVT.is256BitVector()) return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl); if (Op.getNumOperands() == 4) { @@ -9281,15 +9355,6 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3})) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1); - // If we have a single input to the zero element, insert that into V1 if we - // can do so cheaply. - int NumV2Elements = - std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }); - if (NumV2Elements == 1 && Mask[0] >= 4) - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( - DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) - return Insertion; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return Blend; @@ -9432,15 +9497,6 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - // If we have a single input to the zero element, insert that into V1 if we - // can do so cheaply. - int NumV2Elements = - std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; }); - if (NumV2Elements == 1 && Mask[0] >= 8) - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( - DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) - return Insertion; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Blend; @@ -9811,6 +9867,18 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); + // If we have a single input to the zero element, insert that into V1 if we + // can do so cheaply. + int NumElts = VT.getVectorNumElements(); + int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [NumElts](int M) { + return M >= NumElts; + }); + + if (NumV2Elements == 1 && Mask[0] >= NumElts) + if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + DL, VT, V1, V2, Mask, Subtarget, DAG)) + return Insertion; + // There is a really nice hard cut-over between AVX1 and AVX2 that means we can // check for those subtargets here and avoid much of the subtarget querying in // the per-vector-type lowering routines. With AVX1 we have essentially *zero* @@ -11903,7 +11971,7 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, // Now we have only mask extension assert(InVT.getVectorElementType() == MVT::i1); SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType()); - const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue(); + const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue(); SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, @@ -11979,7 +12047,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { } SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType()); - const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue(); + const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue(); SDValue CP = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, @@ -12750,6 +12818,16 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, return SDValue(); } +/// If we have at least two divisions that use the same divisor, convert to +/// multplication by a reciprocal. This may need to be adjusted for a given +/// CPU if a division's cost is not at least twice the cost of a multiplication. +/// This is because we still need one division to calculate the reciprocal and +/// then we need two multiplies by that reciprocal as replacements for the +/// original divisions. +bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { + return NumUsers > 1; +} + static bool isAllOnes(SDValue V) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(V); return C && C->isAllOnesValue(); @@ -14427,7 +14505,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, - false, + false, false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } @@ -15220,10 +15298,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, } case PREFETCH: { SDValue Hint = Op.getOperand(6); - unsigned HintVal; - if (dyn_cast<ConstantSDNode> (Hint) == nullptr || - (HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1) - llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1"); + unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue(); + assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1"); unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0); SDValue Chain = Op.getOperand(0); SDValue Mask = Op.getOperand(2); @@ -24175,7 +24251,7 @@ TargetLowering::ConstraintWeight break; case 'G': case 'C': - if (dyn_cast<ConstantFP>(CallOperandVal)) { + if (isa<ConstantFP>(CallOperandVal)) { weight = CW_Constant; } break; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index dd20ec2..5130c37 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1072,6 +1072,9 @@ namespace llvm { /// Use rcp* to speed up fdiv calculations. SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const override; + + /// Reassociate floating point divisions into multiply by reciprocal. + bool combineRepeatedFPDivisors(unsigned NumUsers) const override; }; namespace X86 { diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 509602f..0959162 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2971,60 +2971,36 @@ multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, itins, HasBWI, IsCommutable>; } -multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT, - ValueType SrcVT, RegisterClass KRC, RegisterClass RC, - PatFrag memop_frag, X86MemOperand x86memop, - PatFrag scalar_mfrag, X86MemOperand x86scalar_mop, - string BrdcstStr, OpndItins itins, bit IsCommutable = 0> { - let isCommutable = IsCommutable in - { - def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, EVEX_4V; - def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, RC:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"), - [], itins.rr>, EVEX_4V, EVEX_K; - def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, RC:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}} {z}" , - "|$dst {${mask}} {z}, $src1, $src2}"), - [], itins.rr>, EVEX_4V, EVEX_KZ; - } +multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins, + SDNode OpNode,X86VectorVTInfo _Src, + X86VectorVTInfo _Dst, bit IsCommutable = 0> { + defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), + (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, + "$src2, $src1","$src1, $src2", + (_Dst.VT (OpNode + (_Src.VT _Src.RC:$src1), + (_Src.VT _Src.RC:$src2))), + "",itins.rr, IsCommutable>, + AVX512BIBase, EVEX_4V; let mayLoad = 1 in { - def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, EVEX_4V; - def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, x86memop:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"), - [], itins.rm>, EVEX_4V, EVEX_K; - def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, x86memop:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"), - [], itins.rm>, EVEX_4V, EVEX_KZ; - def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86scalar_mop:$src2), - !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr, - ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"), - [], itins.rm>, EVEX_4V, EVEX_B; - def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), - !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr, - ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", - BrdcstStr, "}"), - [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K; - def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst), - (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), - !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr, - ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}", - BrdcstStr, "}"), - [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ; + defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), + (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), + (bitconvert (_Src.LdFrag addr:$src2)))), + "", itins.rm>, + AVX512BIBase, EVEX_4V; + + defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), + (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2), + OpcodeStr, + "${src2}"##_Dst.BroadcastStr##", $src1", + "$src1, ${src2}"##_Dst.BroadcastStr, + (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bc_v16i32 + (_Dst.VT (X86VBroadcast + (_Dst.ScalarLdFrag addr:$src2)))))), + "", itins.rm>, + AVX512BIBase, EVEX_4V, EVEX_B; } } @@ -3039,24 +3015,13 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512, - loadv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, - EVEX_CD8<64, CD8VF>, VEX_W; - -defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512, - loadv8i64, i512mem, loadi64, i64mem, "{1to8}", - SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; +defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", SSE_INTALU_ITINS_P, + X86pmuldq, v16i32_info, v8i64_info, 1>, + T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; -def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))), - (VPMULUDQZrr VR512:$src1, VR512:$src2)>; - -def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMULUDQZrr VR512:$src1, VR512:$src2)>; -def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1), - (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPMULDQZrr VR512:$src1, VR512:$src2)>; +defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, + X86pmuludq, v16i32_info, v8i64_info, 1>, + EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; @@ -3208,7 +3173,7 @@ defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_INTALU_ITINS_P, HasAVX512, 1>; defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, - SSE_INTALU_ITINS_P, HasAVX512, 1>; + SSE_INTALU_ITINS_P, HasAVX512, 0>; //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic @@ -3743,16 +3708,19 @@ multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3), - OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), - (OpNode _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>, + OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), + !strconcat("$src2, ${src3}", _.BroadcastStr ), + (OpNode _.RC:$src1, + _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>, AVX512FMA3Base, EVEX_B; } } // Constraints = "$src1 = $dst" let Constraints = "$src1 = $dst" in { // Omitting the parameter OpNode (= null_frag) disables ISel pattern matching. -multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - SDPatternOperator OpNode> { +multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, + X86VectorVTInfo _, + SDPatternOperator OpNode> { defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", @@ -3772,7 +3740,6 @@ multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231, SDPatternOperator OpNode> { defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix), VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>; - defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix), VTI>, EVEX_CD8<VTI.EltSize, CD8VF>; } @@ -3794,12 +3761,14 @@ let ExeDomain = SSEPackedSingle in { let ExeDomain = SSEPackedDouble in { defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr, v8f64_info, OpNode>, - avx512_fma3_round_forms<opc213, OpcodeStr, - v8f64_info, OpNodeRnd>, EVEX_V512, VEX_W; + avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info, + OpNodeRnd>, EVEX_V512, VEX_W; defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr, - v4f64x_info, OpNode>, EVEX_V256, VEX_W; + v4f64x_info, OpNode>, + EVEX_V256, VEX_W; defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr, - v2f64x_info, OpNode>, EVEX_V128, VEX_W; + v2f64x_info, OpNode>, + EVEX_V128, VEX_W; } } @@ -3830,26 +3799,29 @@ multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode, } } // Constraints = "$src1 = $dst" - -multiclass avx512_fma3p_m132_f<bits<8> opc, - string OpcodeStr, - SDNode OpNode> { +multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> { let ExeDomain = SSEPackedSingle in { defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps, - OpNode,v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; + OpNode,v16f32_info>, EVEX_V512, + EVEX_CD8<32, CD8VF>; defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps, - OpNode, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>; + OpNode, v8f32x_info>, EVEX_V256, + EVEX_CD8<32, CD8VF>; defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps, - OpNode, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>; + OpNode, v4f32x_info>, EVEX_V128, + EVEX_CD8<32, CD8VF>; } let ExeDomain = SSEPackedDouble in { defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd, - OpNode, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>; + OpNode, v8f64_info>, EVEX_V512, + VEX_W, EVEX_CD8<32, CD8VF>; defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd, - OpNode, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>; + OpNode, v4f64x_info>, EVEX_V256, + VEX_W, EVEX_CD8<32, CD8VF>; defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd, - OpNode, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>; + OpNode, v2f64x_info>, EVEX_V128, + VEX_W, EVEX_CD8<32, CD8VF>; } } @@ -3860,7 +3832,6 @@ defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>; defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>; defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>; - // Scalar FMA let Constraints = "$src1 = $dst" in { multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -3883,7 +3854,6 @@ multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpVT (OpNode RC:$src2, RC:$src1, (mem_frag addr:$src3))))]>; } - } // Constraints = "$src1 = $dst" defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X, @@ -3920,6 +3890,7 @@ let hasSideEffects = 0 in { EVEX_4V; } // hasSideEffects = 0 } + let Predicates = [HasAVX512] in { defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 78efc4d..5e19ad4 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1216,10 +1216,10 @@ def X86testpat : PatFrag<(ops node:$lhs, node:$rhs), let isCompare = 1 in { let Defs = [EFLAGS] in { let isCommutable = 1 in { - def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>; - def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>; - def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>; - def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>; + def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat>; + def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat>; + def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat>; + def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>; } // isCommutable def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>; diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 18bbe5d..45e6d0a 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1232,7 +1232,11 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), // with implicit zero-extension instead of a 64-bit and if the immediate has at // least 32 bits of leading zeros. If in addition the last 32 bits can be // represented with a sign extension of a 8 bit constant, use that. +// This can also reduce instruction size by eliminating the need for the REX +// prefix. +// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32. +let AddedComplexity = 1 in { def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm), (SUBREG_TO_REG (i64 0), @@ -1248,8 +1252,13 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm), (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo32XForm imm:$imm))), sub_32bit)>; +} // AddedComplexity = 1 +// AddedComplexity is needed due to the increased complexity on the +// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all +// the MOVZX patterns keeps thems together in DAGIsel tables. +let AddedComplexity = 1 in { // r & (2^16-1) ==> movz def : Pat<(and GR32:$src1, 0xffff), (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; @@ -1272,6 +1281,7 @@ def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)), sub_32bit)>; // r & (2^16-1) ==> movz +let AddedComplexity = 1 in // Give priority over i64immZExt32. def : Pat<(and GR64:$src, 0xffff), (SUBREG_TO_REG (i64 0), (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))), @@ -1290,6 +1300,7 @@ def : Pat<(and GR16:$src1, 0xff), (EXTRACT_SUBREG (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>, Requires<[In64BitMode]>; +} // AddedComplexity = 1 // sext_inreg patterns diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 0bdabdf..b75a9f4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -631,53 +631,53 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec, def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_load node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedLoadSDNode>(N)) - return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16; + if (auto *Load = dyn_cast<MaskedLoadSDNode>(N)) + return Load->getAlignment() >= 16; return false; }]>; def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_load node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedLoadSDNode>(N)) - return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32; + if (auto *Load = dyn_cast<MaskedLoadSDNode>(N)) + return Load->getAlignment() >= 32; return false; }]>; def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_load node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedLoadSDNode>(N)) - return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64; + if (auto *Load = dyn_cast<MaskedLoadSDNode>(N)) + return Load->getAlignment() >= 64; return false; }]>; def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_load node:$src1, node:$src2, node:$src3), [{ - return (dyn_cast<MaskedLoadSDNode>(N) != 0); + return isa<MaskedLoadSDNode>(N); }]>; def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_store node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedStoreSDNode>(N)) - return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16; + if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) + return Store->getAlignment() >= 16; return false; }]>; def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_store node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedStoreSDNode>(N)) - return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32; + if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) + return Store->getAlignment() >= 32; return false; }]>; def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_store node:$src1, node:$src2, node:$src3), [{ - if (dyn_cast<MaskedStoreSDNode>(N)) - return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64; + if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) + return Store->getAlignment() >= 64; return false; }]>; def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_store node:$src1, node:$src2, node:$src3), [{ - return (dyn_cast<MaskedStoreSDNode>(N) != 0); + return isa<MaskedStoreSDNode>(N); }]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 538ec1c..fbfd868 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -559,6 +559,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 }, { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 }, + // 3DNow! version of foldable instructions + { X86::PF2IDrr, X86::PF2IDrm, 0 }, + { X86::PF2IWrr, X86::PF2IWrm, 0 }, + { X86::PFRCPrr, X86::PFRCPrm, 0 }, + { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 }, + { X86::PI2FDrr, X86::PI2FDrm, 0 }, + { X86::PI2FWrr, X86::PI2FWrm, 0 }, + { X86::PSWAPDrr, X86::PSWAPDrm, 0 }, + // AVX 128-bit versions of foldable instructions { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, @@ -943,6 +952,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, { X86::CMPSDrr, X86::CMPSDrm, 0 }, { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::CRC32r32r32, X86::CRC32r32m32, 0 }, + { X86::CRC32r64r64, X86::CRC32r64m64, 0 }, { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 }, { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 }, { X86::DIVSDrr, X86::DIVSDrm, 0 }, @@ -1201,6 +1212,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 }, { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 }, + // 3DNow! version of foldable instructions + { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 }, + { X86::PFACCrr, X86::PFACCrm, 0 }, + { X86::PFADDrr, X86::PFADDrm, 0 }, + { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 }, + { X86::PFCMPGErr, X86::PFCMPGErm, 0 }, + { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 }, + { X86::PFMAXrr, X86::PFMAXrm, 0 }, + { X86::PFMINrr, X86::PFMINrm, 0 }, + { X86::PFMULrr, X86::PFMULrm, 0 }, + { X86::PFNACCrr, X86::PFNACCrm, 0 }, + { X86::PFPNACCrr, X86::PFPNACCrm, 0 }, + { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 }, + { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 }, + { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 }, + { X86::PFSUBrr, X86::PFSUBrm, 0 }, + { X86::PFSUBRrr, X86::PFSUBRrm, 0 }, + { X86::PMULHRWrr, X86::PMULHRWrm, 0 }, + // AVX 128-bit versions of foldable instructions { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 }, { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 }, @@ -5969,6 +5999,7 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, + { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr }, { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, @@ -5984,6 +6015,7 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, + // TODO: Add the AVX versions of MOVLPSmr { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ccdbf0e..65b155c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -643,9 +643,6 @@ let Predicates = [UseAVX] in { // Represent the same patterns above but in the form they appear for // 256-bit types - def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; @@ -653,9 +650,6 @@ let Predicates = [UseAVX] in { (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; } - def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), @@ -793,7 +787,7 @@ let Predicates = [UseSSE2] in { (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem - // is during lowering, where it's not possible to recognize the fold cause + // is during lowering, where it's not possible to recognize the fold because // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), @@ -3678,13 +3672,30 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), PS, Requires<[HasSSE2]>; } // SchedRW = [WriteStore] +let Predicates = [HasAVX2, NoVLX] in { + def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst), + (VMOVNTDQYmr addr:$dst, VR256:$src)>; + def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), + (VMOVNTDQYmr addr:$dst, VR256:$src)>; + def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), + (VMOVNTDQYmr addr:$dst, VR256:$src)>; +} + let Predicates = [HasAVX, NoVLX] in { def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), - (VMOVNTPSmr addr:$dst, VR128:$src)>; + (VMOVNTDQmr addr:$dst, VR128:$src)>; + def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>; + def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>; } def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), - (MOVNTPSmr addr:$dst, VR128:$src)>; + (MOVNTDQmr addr:$dst, VR128:$src)>; +def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>; +def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>; } // AddedComplexity @@ -4890,7 +4901,8 @@ let Predicates = [UseAVX] in { def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), (VMOVDI2PDIrr GR32:$src)>; - // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. + // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. + // These instructions also write zeros in the high part of a 256-bit register. let AddedComplexity = 20 in { def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), (VMOVDI2PDIrm addr:$src)>; @@ -4898,6 +4910,9 @@ let Predicates = [UseAVX] in { (VMOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), (VMOVDI2PDIrm addr:$src)>; + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>; } // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, @@ -5016,6 +5031,9 @@ let Predicates = [UseAVX], AddedComplexity = 20 in { (VMOVZQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzload addr:$src)), (VMOVZQI2PQIrm addr:$src)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrm addr:$src), sub_xmm)>; } let Predicates = [UseSSE2], AddedComplexity = 20 in { @@ -7150,6 +7168,10 @@ let Predicates = [HasAVX2] in { } // Patterns +// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or +// on targets where they have equal performance. These were changed to use +// blends because blends have better throughput on SandyBridge and Haswell, but +// movs[s/d] are 1-2 byte shorter instructions. let Predicates = [UseAVX] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a @@ -7166,8 +7188,10 @@ let Predicates = [UseAVX] in { // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), (VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>; - def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), - (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>; + + // Move low f64 and clear high bits. + def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), + (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>; } def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, @@ -7181,14 +7205,19 @@ let Predicates = [UseAVX] in { (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), sub_xmm)>; - // Move low f64 and clear high bits. - def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), - (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>; - + // These will incur an FP/int domain crossing penalty, but it may be the only + // way without AVX2. Do not add any complexity because we may be able to match + // more optimal patterns defined earlier in this file. + def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), + (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>; def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), (VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>; } +// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or +// on targets where they have equal performance. These were changed to use +// blends because blends have better throughput on SandyBridge and Haswell, but +// movs[s/d] are 1-2 byte shorter instructions. let Predicates = [UseSSE41] in { // With SSE41 we can use blends for these patterns. def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), @@ -8341,7 +8370,7 @@ let Predicates = [HasAVX2] in { def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), (VBROADCASTSDYrr VR128:$src)>; - // Provide aliases for broadcast from the same regitser class that + // Provide aliases for broadcast from the same register class that // automatically does the extract. def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))), (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 42256b2..28a3b7b 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -334,6 +334,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL, X86ISD::FMUL_RND), + X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0), + X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0), + X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0), @@ -358,6 +362,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK, + X86ISD::PMULDQ, 0), + X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK, + X86ISD::PMULUDQ, 0), + X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0), + X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0), @@ -376,6 +386,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0), + X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0), + X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM, diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index ca8fc9c..4bfc7f9 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -193,7 +193,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, DAG.getConstant(Offset, AddrVT)), Src, DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, DstPtrInfo.getWithOffset(Offset)); + Align, isVolatile, false, + DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. @@ -282,7 +283,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, AlwaysInline, + Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp index 215fe89..36b3b02 100644 --- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp @@ -30,7 +30,7 @@ void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { printInstruction(MI, O); printAnnotation(O, Annot); } diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h index 78521fd..6fd2dec 100644 --- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h @@ -32,7 +32,8 @@ public: static const char *getRegisterName(unsigned RegNo); void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; private: void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O); void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O); diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index d0a09b2..4a790c8 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -81,12 +81,11 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCInstPrinter *createXCoreMCInstPrinter(const Target &T, +static MCInstPrinter *createXCoreMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { return new XCoreInstPrinter(MAI, MII, MRI); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 6e8a95a..c4e3bb8 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -308,7 +308,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const Constant *GA = ConstantExpr::getBitCast(const_cast<GlobalValue*>(GV), Ty); Ty = Type::getInt32Ty(*DAG.getContext()); Constant *Idx = ConstantInt::get(Ty, Offset); - Constant *GAI = ConstantExpr::getGetElementPtr(GA, Idx); + Constant *GAI = ConstantExpr::getGetElementPtr( + Type::getInt8Ty(*DAG.getContext()), GA, Idx); SDValue CP = DAG.getConstantPool(GAI, MVT::i32); return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP, MachinePointerInfo(), false, false, false, 0); @@ -1422,7 +1423,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, InVals.push_back(FIN); MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV, DAG.getConstant(Size, MVT::i32), - Align, false, false, + Align, false, false, false, MachinePointerInfo(), MachinePointerInfo())); } else { @@ -1833,10 +1834,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, LD->getAlignment() == Alignment && !LD->isVolatile() && !LD->isIndexed() && Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) { + bool isTail = isInTailCallPosition(DAG, ST, Chain); return DAG.getMemmove(Chain, dl, ST->getBasePtr(), LD->getBasePtr(), DAG.getConstant(StoreBits/8, MVT::i32), - Alignment, false, ST->getPointerInfo(), + Alignment, false, isTail, ST->getPointerInfo(), LD->getPointerInfo()); } } diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp index b4c6a50..9fb63e9 100644 --- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp +++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -82,8 +82,9 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) { case Instruction::GetElementPtr: { SmallVector<Value *,4> CEOpVec(CE->op_begin(), CE->op_end()); ArrayRef<Value *> CEOps(CEOpVec); - return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(CEOps[0], - CEOps.slice(1))); + return dyn_cast<Instruction>(Builder.CreateInBoundsGEP( + cast<GEPOperator>(CE)->getSourceElementType(), CEOps[0], + CEOps.slice(1))); } case Instruction::Add: case Instruction::Sub: @@ -212,7 +213,8 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) { SmallVector<Value *, 2> Indices; Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx))); Indices.push_back(ThreadID); - Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices); + Value *Addr = + Builder.CreateInBoundsGEP(NewGV->getValueType(), NewGV, Indices); U->replaceUsesOfWith(GV, Addr); } diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h index 48bf0fa..3563dbc 100644 --- a/lib/Target/XCore/XCoreTargetStreamer.h +++ b/lib/Target/XCore/XCoreTargetStreamer.h @@ -16,7 +16,7 @@ namespace llvm { class XCoreTargetStreamer : public MCTargetStreamer { public: XCoreTargetStreamer(MCStreamer &S); - virtual ~XCoreTargetStreamer(); + ~XCoreTargetStreamer() override; virtual void emitCCTopData(StringRef Name) = 0; virtual void emitCCTopFunction(StringRef Name) = 0; virtual void emitCCBottomData(StringRef Name) = 0; |