diff options
Diffstat (limited to 'lib/Target/ARM')
53 files changed, 926 insertions, 927 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 387f1f6..7a1865c 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -27,12 +27,15 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMSubtarget.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <map> @@ -678,8 +681,13 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { } bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { - TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); - TRI = Fn.getSubtarget().getRegisterInfo(); + const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); + // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be + // enabled when NEON is available. + if (!(STI.isCortexA15() && STI.hasNEON())) + return false; + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); MRI = &Fn.getRegInfo(); bool Modified = false; diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index f080c60..ce0aed9 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -167,9 +167,12 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", "Support ARM v6M instructions", [HasV6Ops]>; +def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", + "Support ARM v6k instructions", + [HasV6Ops]>; def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", "Support ARM v6t2 instructions", - [HasV6MOps, FeatureThumb2]>; + [HasV6MOps, HasV6KOps, FeatureThumb2]>; def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", [HasV6T2Ops, FeaturePerfMon]>; @@ -177,6 +180,9 @@ def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", [HasV7Ops, FeatureVirtualization, FeatureMP]>; +def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", + "Support ARM v8.1a instructions", + [HasV8Ops, FeatureAClass, FeatureCRC]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -320,12 +326,6 @@ def : ProcNoItin<"iwmmxt", [HasV5TEOps]>; def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>; def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, FeatureHasSlowFPVMLx]>; -def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6Ops]>; -def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, - FeatureHasSlowFPVMLx]>; -def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6Ops]>; -def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, - FeatureHasSlowFPVMLx]>; // V6M Processors. def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, @@ -337,6 +337,14 @@ def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, FeatureDB, FeatureMClass]>; +// V6K Processors. +def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6KOps]>; +def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6KOps, FeatureVFP2, + FeatureHasSlowFPVMLx]>; +def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6KOps]>; +def : Processor<"mpcore", ARMV6Itineraries, [HasV6KOps, FeatureVFP2, + FeatureHasSlowFPVMLx]>; + // V6T2 Processors. def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops, FeatureDSPThumb2]>; @@ -449,6 +457,14 @@ def : ProcessorModel<"cyclone", SwiftModel, FeatureDB,FeatureDSPThumb2, FeatureHasRAS, FeatureZCZeroing]>; +// V8.1 Processors +def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a, + FeatureDB, FeatureFPARMv8, + FeatureNEON, FeatureDSPThumb2, + FeatureHWDiv, FeatureHWDivARM, + FeatureTrustZone, FeatureT2XtPk, + FeatureCrypto]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 2544a01..102def1 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -120,9 +120,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer.EndCOFFSymbolDef(); } - // Have common code print out the function header with linkage info etc. - EmitFunctionHeader(); - // Emit the rest of the function body. EmitFunctionBody(); @@ -438,65 +435,6 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { Triple TT(TM.getTargetTriple()); - if (TT.isOSBinFormatMachO()) { - Reloc::Model RelocM = TM.getRelocationModel(); - if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) { - // Declare all the text sections up front (before the DWARF sections - // emitted by AsmPrinter::doInitialization) so the assembler will keep - // them together at the beginning of the object file. This helps - // avoid out-of-range branches that are due a fundamental limitation of - // the way symbol offsets are encoded with the current Darwin ARM - // relocations. - const TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<const TargetLoweringObjectFileMachO &>( - getObjFileLowering()); - - // Collect the set of sections our functions will go into. - SetVector<const MCSection *, SmallVector<const MCSection *, 8>, - SmallPtrSet<const MCSection *, 8> > TextSections; - // Default text section comes first. - TextSections.insert(TLOFMacho.getTextSection()); - // Now any user defined text sections from function attributes. - for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F) - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) - TextSections.insert(TLOFMacho.SectionForGlobal(F, *Mang, TM)); - // Now the coalescable sections. - TextSections.insert(TLOFMacho.getTextCoalSection()); - TextSections.insert(TLOFMacho.getConstTextCoalSection()); - - // Emit the sections in the .s file header to fix the order. - for (unsigned i = 0, e = TextSections.size(); i != e; ++i) - OutStreamer.SwitchSection(TextSections[i]); - - if (RelocM == Reloc::DynamicNoPIC) { - const MCSection *sect = - OutContext.getMachOSection("__TEXT", "__symbol_stub4", - MachO::S_SYMBOL_STUBS, - 12, SectionKind::getText()); - OutStreamer.SwitchSection(sect); - } else { - const MCSection *sect = - OutContext.getMachOSection("__TEXT", "__picsymbolstub4", - MachO::S_SYMBOL_STUBS, - 16, SectionKind::getText()); - OutStreamer.SwitchSection(sect); - } - const MCSection *StaticInitSect = - OutContext.getMachOSection("__TEXT", "__StaticInit", - MachO::S_REGULAR | - MachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - OutStreamer.SwitchSection(StaticInitSect); - } - - // Compiling with debug info should not affect the code - // generation. Ensure the cstring section comes before the - // optional __DWARF secion. Otherwise, PC-relative loads would - // have to use different instruction sequences at "-g" in order to - // reach global data in the same object file. - OutStreamer.SwitchSection(getObjFileLowering().getCStringSection()); - } - // Use unified assembler syntax. OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified); @@ -669,7 +607,7 @@ void ARMAsmPrinter::emitAttributes() { std::string CPUString = STI.getCPUString(); - if (CPUString != "generic") { + if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic" // FIXME: remove krait check when GNU tools support krait cpu if (STI.isKrait()) { ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); @@ -723,7 +661,8 @@ void ARMAsmPrinter::emitAttributes() { // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::AllowNeonARMv8); + STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a: + ARMBuildAttrs::AllowNeonARMv8); } else { if (STI.hasFPARMv8()) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one @@ -960,10 +899,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { MCSymbol *MCSym; if (ACPV->isLSDA()) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - OS << DL->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); - MCSym = OutContext.GetOrCreateSymbol(OS.str()); + MCSym = getCurExceptionSym(); } else if (ACPV->isBlockAddress()) { const BlockAddress *BA = cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(); diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 50cb954..e475ae4 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -103,13 +103,16 @@ private: const MachineInstr *MI); public: - unsigned getISAEncoding(const Function *F) override { + unsigned getISAEncoding() override { // ARM/Darwin adds ISA to the DWARF info for each function. Triple TT(TM.getTargetTriple()); if (!TT.isOSBinFormatMachO()) return 0; - const ARMSubtarget &STI = TM.getSubtarget<ARMSubtarget>(*F); - return STI.isThumb() ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; + bool isThumb = TT.getArch() == Triple::thumb || + TT.getArch() == Triple::thumbeb || + TT.getSubArch() == Triple::ARMSubArch_v7m || + TT.getSubArch() == Triple::ARMSubArch_v6m; + return isThumb ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; } private: diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 29ee22e..7ee3cb0 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -4115,19 +4116,21 @@ enum ARMExeDomain { // std::pair<uint16_t, uint16_t> ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { - // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON - // if they are not predicated. - if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) - return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - - // CortexA9 is particularly picky about mixing the two and wants these - // converted. - if (Subtarget.isCortexA9() && !isPredicated(MI) && - (MI->getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR || - MI->getOpcode() == ARM::VMOVS)) - return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - + // If we don't have access to NEON instructions then we won't be able + // to swizzle anything to the NEON domain. Check to make sure. + if (Subtarget.hasNEON()) { + // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON + // if they are not predicated. + if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) + return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); + + // CortexA9 is particularly picky about mixing the two and wants these + // converted. + if (Subtarget.isCortexA9() && !isPredicated(MI) && + (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) + return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); + } // No other instructions can be swizzled, so just determine their domain. unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; @@ -4220,6 +4223,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Zap the predicate operands. assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + // Make sure we've got NEON instructions. + assert(Subtarget.hasNEON() && "VORRd requires NEON"); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); @@ -4507,7 +4513,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, } bool ARMBaseInstrInfo::hasNOP() const { - return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; + return (Subtarget.getFeatureBits() & ARM::HasV6KOps) != 0; } bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 7574727..a8c7657 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -45,26 +45,27 @@ using namespace llvm; -ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) { +ARMBaseRegisterInfo::ARMBaseRegisterInfo() + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {} + +static unsigned getFramePointerReg(const ARMSubtarget &STI) { if (STI.isTargetMachO()) { if (STI.isTargetDarwin() || STI.isThumb1Only()) - FramePtr = ARM::R7; + return ARM::R7; else - FramePtr = ARM::R11; + return ARM::R11; } else if (STI.isTargetWindows()) - FramePtr = ARM::R11; + return ARM::R11; else // ARM EABI - FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11; + return STI.isThumb() ? ARM::R7 : ARM::R11; } const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>(); const MCPhysReg *RegList = STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; - if (!MF) return RegList; - const Function *F = MF->getFunction(); if (F->getCallingConv() == CallingConv::GHC) { // GHC set of callee saved regs is empty as all those regs are @@ -89,8 +90,10 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return RegList; } -const uint32_t* -ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +const uint32_t * +ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; @@ -102,8 +105,10 @@ ARMBaseRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } -const uint32_t* -ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { +const uint32_t * +ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); // This should return a register mask that is the same as that returned by // getCallPreservedMask but that additionally preserves the register used for // the first i32 argument (which must also be the register used to return a @@ -121,7 +126,8 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + const TargetFrameLowering *TFI = STI.getFrameLowering(); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); @@ -130,7 +136,7 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(ARM::FPSCR); Reserved.set(ARM::APSR_NZCV); if (TFI->hasFP(MF)) - Reserved.set(FramePtr); + Reserved.set(getFramePointerReg(STI)); if (hasBasePointer(MF)) Reserved.set(BasePtr); // Some targets reserve R9. @@ -150,9 +156,9 @@ getReservedRegs(const MachineFunction &MF) const { return Reserved; } -const TargetRegisterClass* -ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) - const { +const TargetRegisterClass * +ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &) const { const TargetRegisterClass *Super = RC; TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); do { @@ -187,7 +193,8 @@ ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + const TargetFrameLowering *TFI = STI.getFrameLowering(); switch (RC->getID()) { default: @@ -283,29 +290,6 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, } } -bool -ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { - // CortexA9 has a Write-after-write hazard for NEON registers. - if (!STI.isLikeA9()) - return false; - - switch (RC->getID()) { - case ARM::DPRRegClassID: - case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - case ARM::QPRRegClassID: - case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - case ARM::SPRRegClassID: - case ARM::SPR_8RegClassID: - // Avoid reusing S, D, and Q registers. - // Don't increase register pressure for QQ and QQQQ. - return true; - default: - return false; - } -} - bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -350,7 +334,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return false; // Stack realignment requires a frame pointer. If we already started // register allocation with frame pointer elimination, it is too late now. - if (!MRI->canReserveReg(FramePtr)) + if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget<ARMSubtarget>()))) return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. @@ -384,10 +368,11 @@ cannotEliminateFrame(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + const TargetFrameLowering *TFI = STI.getFrameLowering(); if (TFI->hasFP(MF)) - return FramePtr; + return getFramePointerReg(STI); return ARM::SP; } @@ -539,7 +524,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // The incoming offset is relating to the SP at the start of the function, // but when we access the local it'll be relative to the SP after local // allocation, so adjust our SP-relative offset by that allocation size. - Offset = -Offset; Offset += MFI->getLocalFrameSize(); // Assume that we'll have at least some spill slots allocated. // FIXME: This is a total SWAG number. We should run some statistics @@ -552,9 +536,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // on whether there are any local variables that would trigger it. unsigned StackAlign = TFI->getStackAlignment(); if (TFI->hasFP(MF) && - (MI->getDesc().TSFlags & ARMII::AddrModeMask) != ARMII::AddrModeT1_s && !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { - if (isFrameOffsetLegal(MI, FPOffset)) + if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset)) return false; } // If we can reference via the stack pointer, try that. @@ -562,7 +545,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // to only disallow SP relative references in the live range of // the VLA(s). In practice, it's unclear how much difference that // would make, but it may be worth doing. - if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset)) + if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset)) return false; // The offset likely isn't legal, we want to allocate a virtual base register. @@ -625,7 +608,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, (void)Done; } -bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, +bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const { const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); @@ -669,7 +652,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, NumBits = 8; break; case ARMII::AddrModeT1_s: - NumBits = 8; + NumBits = (BaseReg == ARM::SP ? 8 : 5); Scale = 4; isSigned = false; break; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 17027c2..fdc1ef9 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -21,10 +21,6 @@ #include "ARMGenRegisterInfo.inc" namespace llvm { - class ARMSubtarget; - class ARMBaseInstrInfo; - class Type; - /// Register allocation hints. namespace ARMRI { enum { @@ -82,27 +78,22 @@ static inline bool isCalleeSavedRegister(unsigned Reg, class ARMBaseRegisterInfo : public ARMGenRegisterInfo { protected: - const ARMSubtarget &STI; - - /// FramePtr - ARM physical register used as frame ptr. - unsigned FramePtr; - /// BasePtr - ARM physical register used as a base ptr in complex stack /// frames. I.e., when we need a 3rd base, not just SP and FP, due to /// variable size stack objects. unsigned BasePtr; // Can be only subclassed. - explicit ARMBaseRegisterInfo(const ARMSubtarget &STI); + explicit ARMBaseRegisterInfo(); // Return the opcode that implements 'Op', or 0 if no opcode unsigned getOpcode(int Op) const; public: /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const; /// getThisReturnPreservedMask - Returns a call preserved mask specific to the @@ -113,7 +104,8 @@ public: /// /// Should return NULL in the case that the calling convention does not have /// this property - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; + const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF, + CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const override; @@ -124,7 +116,8 @@ public: getCrossCopyRegClass(const TargetRegisterClass *RC) const override; const TargetRegisterClass * - getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; + getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; @@ -138,8 +131,6 @@ public: void updateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const override; - bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const override; - bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; @@ -152,7 +143,7 @@ public: int64_t Offset) const override; void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override; - bool isFrameOffsetLegal(const MachineInstr *MI, + bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override; bool cannotEliminateFrame(const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 375d394..9c8d228 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2265,7 +2265,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Finish off the call including any return values. SmallVector<unsigned, 4> UsedRegs; @@ -2416,7 +2416,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Finish off the call including any return values. SmallVector<unsigned, 4> UsedRegs; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 5a5bd57..830953b 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -293,7 +293,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -742,8 +742,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6ebf640..44cd1ef 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -257,7 +257,7 @@ private: /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. - bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override; // Form pairs of consecutive R, S, D, or Q registers. @@ -3086,7 +3086,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { // Store exclusive double return a i32 value which is the return status // of the issued store. - EVT ResTys[] = { MVT::i32, MVT::Other }; + const EVT ResTys[] = {MVT::i32, MVT::Other}; bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); // Place arguments in the right order. @@ -3472,9 +3472,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ bool ARMDAGToDAGISel:: -SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { - assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); + assert(ConstraintID == InlineAsm::Constraint_m && + "unexpected asm memory constraint"); // Require the address to be in a register. That is safe for all ARM // variants and it is hard to do anything much smarter without knowing // how the operand is used. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 56290aa..3b1b8dd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -23,6 +23,7 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -40,6 +41,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" @@ -47,6 +49,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include <utility> using namespace llvm; @@ -568,14 +571,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::LOAD); // It is legal to extload from v4i8 to v4i16 or v4i32. - MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, - MVT::v4i16, MVT::v2i16, - MVT::v2i32}; - for (unsigned i = 0; i < 6; ++i) { + for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, + MVT::v2i32}) { for (MVT VT : MVT::integer_vector_valuetypes()) { - setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal); - setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal); - setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal); + setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); + setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); + setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); } } } @@ -614,6 +615,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FRINT, MVT::f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); } @@ -869,14 +876,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Various VFP goodness if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { - // int <-> fp are custom expanded into bit_convert + ARMISD ops. - if (Subtarget->hasVFP2()) { - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - } - // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); @@ -1033,11 +1032,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::RBIT: return "ARMISD::RBIT"; - case ARMISD::FTOSI: return "ARMISD::FTOSI"; - case ARMISD::FTOUI: return "ARMISD::FTOUI"; - case ARMISD::SITOF: return "ARMISD::SITOF"; - case ARMISD::UITOF: return "ARMISD::UITOF"; - case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -1164,6 +1158,20 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { return TargetLowering::getRegClassFor(VT); } +// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the +// source/dest is aligned and the copy size is large enough. We therefore want +// to align such objects passed to memory intrinsics. +bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, + unsigned &PrefAlign) const { + if (!isa<MemIntrinsic>(CI)) + return false; + MinSize = 8; + // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 + // cycle faster than 4-byte aligned LDM. + PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); + return true; +} + // Create a fast isel object. FastISel * ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, @@ -1815,16 +1823,16 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); if (isThisReturn) { // For 'this' returns, use the R0-preserving mask if applicable - Mask = ARI->getThisReturnPreservedMask(CallConv); + Mask = ARI->getThisReturnPreservedMask(MF, CallConv); if (!Mask) { // Set isThisReturn to false if the calling convention is not one that // allows 'returned' to be modeled in this way, so LowerCallResult does // not try to pass 'this' straight through isThisReturn = false; - Mask = ARI->getCallPreservedMask(CallConv); + Mask = ARI->getCallPreservedMask(MF, CallConv); } } else - Mask = ARI->getCallPreservedMask(CallConv); + Mask = ARI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1857,60 +1865,61 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, /// on the stack. Remember the next parameter register to allocate, /// and then confiscate the rest of the parameter registers to insure /// this. -void -ARMTargetLowering::HandleByVal( - CCState *State, unsigned &size, unsigned Align) const { - unsigned reg = State->AllocateReg(GPRArgRegs); +void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, + unsigned Align) const { assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); - if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { - if (Subtarget->isAAPCS_ABI() && Align > 4) { - unsigned AlignInRegs = Align / 4; - unsigned Waste = (ARM::R4 - reg) % AlignInRegs; - for (unsigned i = 0; i < Waste; ++i) - reg = State->AllocateReg(GPRArgRegs); - } - if (reg != 0) { - unsigned excess = 4 * (ARM::R4 - reg); - - // Special case when NSAA != SP and parameter size greater than size of - // all remained GPR regs. In that case we can't split parameter, we must - // send it to stack. We also must set NCRN to R4, so waste all - // remained registers. - const unsigned NSAAOffset = State->getNextStackOffset(); - if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { - while (State->AllocateReg(GPRArgRegs)) - ; - return; - } + // Byval (as with any stack) slots are always at least 4 byte aligned. + Align = std::max(Align, 4U); - // First register for byval parameter is the first register that wasn't - // allocated before this method call, so it would be "reg". - // If parameter is small enough to be saved in range [reg, r4), then - // the end (first after last) register would be reg + param-size-in-regs, - // else parameter would be splitted between registers and stack, - // end register would be r4 in this case. - unsigned ByValRegBegin = reg; - unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; - State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); - // Note, first register is allocated in the beginning of function already, - // allocate remained amount of registers we need. - for (unsigned i = reg+1; i != ByValRegEnd; ++i) - State->AllocateReg(GPRArgRegs); - // A byval parameter that is split between registers and memory needs its - // size truncated here. - // In the case where the entire structure fits in registers, we set the - // size in memory to zero. - if (size < excess) - size = 0; - else - size -= excess; - } + unsigned Reg = State->AllocateReg(GPRArgRegs); + if (!Reg) + return; + + unsigned AlignInRegs = Align / 4; + unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; + for (unsigned i = 0; i < Waste; ++i) + Reg = State->AllocateReg(GPRArgRegs); + + if (!Reg) + return; + + unsigned Excess = 4 * (ARM::R4 - Reg); + + // Special case when NSAA != SP and parameter size greater than size of + // all remained GPR regs. In that case we can't split parameter, we must + // send it to stack. We also must set NCRN to R4, so waste all + // remained registers. + const unsigned NSAAOffset = State->getNextStackOffset(); + if (NSAAOffset != 0 && Size > Excess) { + while (State->AllocateReg(GPRArgRegs)) + ; + return; } + + // First register for byval parameter is the first register that wasn't + // allocated before this method call, so it would be "reg". + // If parameter is small enough to be saved in range [reg, r4), then + // the end (first after last) register would be reg + param-size-in-regs, + // else parameter would be splitted between registers and stack, + // end register would be r4 in this case. + unsigned ByValRegBegin = Reg; + unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4); + State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); + // Note, first register is allocated in the beginning of function already, + // allocate remained amount of registers we need. + for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) + State->AllocateReg(GPRArgRegs); + // A byval parameter that is split between registers and memory needs its + // size truncated here. + // In the case where the entire structure fits in registers, we set the + // size in memory to zero. + Size = std::max<int>(Size - Excess, 0); } + /// MatchingStackOffset - Return true if the given stack call argument is /// already available in the same position (relatively) of the caller's /// incoming argument stack. @@ -1991,7 +2000,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; - // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: + // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation // support in the assembler and linker to be used. This would need to be @@ -2819,50 +2828,6 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } -void -ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, - unsigned InRegsParamRecordIdx, - unsigned ArgSize, - unsigned &ArgRegsSize, - unsigned &ArgRegsSaveSize) - const { - unsigned NumGPRs; - if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { - unsigned RBegin, REnd; - CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); - NumGPRs = REnd - RBegin; - } else { - unsigned int firstUnalloced; - firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs); - NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; - } - - unsigned Align = Subtarget->getFrameLowering()->getStackAlignment(); - ArgRegsSize = NumGPRs * 4; - - // If parameter is split between stack and GPRs... - if (NumGPRs && Align > 4 && - (ArgRegsSize < ArgSize || - InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { - // Add padding for part of param recovered from GPRs. For example, - // if Align == 8, its last byte must be at address K*8 - 1. - // We need to do it, since remained (stack) part of parameter has - // stack alignment, and we need to "attach" "GPRs head" without gaps - // to it: - // Stack: - // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... - // [ [padding] [GPRs head] ] [ Tail passed via stack .... - // - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned Padding = - OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align); - ArgRegsSaveSize = ArgRegsSize + Padding; - } else - // We don't need to extend regs save size for byval parameters if they - // are passed via GPRs only. - ArgRegsSaveSize = ArgRegsSize; -} - // The remaining GPRs hold either the beginning of variable-argument // data, or the beginning of an aggregate passed by value (usually // byval). Either way, we allocate stack slots adjacent to the data @@ -2876,13 +2841,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, - unsigned OffsetFromOrigArg, - unsigned ArgOffset, - unsigned ArgSize, - bool ForceMutable, - unsigned ByValStoreOffset, - unsigned TotalArgRegsSaveSize) const { - + int ArgOffset, + unsigned ArgSize) const { // Currently, two use-cases possible: // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; @@ -2897,82 +2857,39 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned firstRegToSaveIndex, lastRegToSaveIndex; unsigned RBegin, REnd; if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); - firstRegToSaveIndex = RBegin - ARM::R0; - lastRegToSaveIndex = REnd - ARM::R0; } else { - firstRegToSaveIndex = CCInfo.getFirstUnallocated(GPRArgRegs); - lastRegToSaveIndex = 4; - } - - unsigned ArgRegsSize, ArgRegsSaveSize; - computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, - ArgRegsSize, ArgRegsSaveSize); - - // Store any by-val regs to their spots on the stack so that they may be - // loaded by deferencing the result of formal parameter pointer or va_next. - // Note: once stack area for byval/varargs registers - // was initialized, it can't be initialized again. - if (ArgRegsSaveSize) { - unsigned Padding = ArgRegsSaveSize - ArgRegsSize; - - if (Padding) { - assert(AFI->getStoredByValParamsPadding() == 0 && - "The only parameter may be padded."); - AFI->setStoredByValParamsPadding(Padding); - } - - int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize, - Padding + - ByValStoreOffset - - (int64_t)TotalArgRegsSaveSize, - false); - SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); - if (Padding) { - MFI->CreateFixedObject(Padding, - ArgOffset + ByValStoreOffset - - (int64_t)ArgRegsSaveSize, - false); - } - - SmallVector<SDValue, 4> MemOps; - for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; - ++firstRegToSaveIndex, ++i) { - const TargetRegisterClass *RC; - if (AFI->isThumb1OnlyFunction()) - RC = &ARM::tGPRRegClass; - else - RC = &ARM::GPRRegClass; + unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); + RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx]; + REnd = ARM::R4; + } - unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); - SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getConstant(4, getPointerTy())); - } + if (REnd != RBegin) + ArgOffset = -4 * (ARM::R4 - RBegin); - AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); + int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false); + SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); - return FrameIndex; - } else { - if (ArgSize == 0) { - // We cannot allocate a zero-byte object for the first variadic argument, - // so just make up a size. - ArgSize = 4; - } - // This will point to the next argument passed via stack. - return MFI->CreateFixedObject( - ArgSize, ArgOffset, !ForceMutable); + SmallVector<SDValue, 4> MemOps; + const TargetRegisterClass *RC = + AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; + + for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { + unsigned VReg = MF.addLiveIn(Reg, RC); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(OrigArg, 4 * i), false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); + return FrameIndex; } // Setup stack frame, the va_list pointer will start from. @@ -2990,11 +2907,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // the result of va_next. // If there is no regs to be stored, just point address after last // argument passed via stack. - int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, - CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, - 0, TotalArgRegsSaveSize); - + int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, + CCInfo.getInRegsParamsCount(), + CCInfo.getNextStackOffset(), 4); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -3020,7 +2935,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, isVarArg)); SmallVector<SDValue, 16> ArgValues; - int lastInsIndex = -1; SDValue ArgValue; Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; @@ -3030,50 +2944,40 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // We also increase this value in case of varargs function. AFI->setArgRegsSaveSize(0); - unsigned ByValStoreOffset = 0; - unsigned TotalArgRegsSaveSize = 0; - unsigned ArgRegsSaveSizeMaxAlign = 4; - // Calculate the amount of stack space that we need to allocate to store // byval and variadic arguments that are passed in registers. // We need to know this before we allocate the first byval or variadic // argument, as they will be allocated a stack slot below the CFA (Canonical // Frame Address, the stack pointer at entry to the function). + unsigned ArgRegBegin = ARM::R4; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) + break; + CCValAssign &VA = ArgLocs[i]; - if (VA.isMemLoc()) { - int index = VA.getValNo(); - if (index != lastInsIndex) { - ISD::ArgFlagsTy Flags = Ins[index].Flags; - if (Flags.isByVal()) { - unsigned ExtraArgRegsSize; - unsigned ExtraArgRegsSaveSize; - computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(), - Flags.getByValSize(), - ExtraArgRegsSize, ExtraArgRegsSaveSize); - - TotalArgRegsSaveSize += ExtraArgRegsSaveSize; - if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign) - ArgRegsSaveSizeMaxAlign = Flags.getByValAlign(); - CCInfo.nextInRegsParam(); - } - lastInsIndex = index; - } - } + unsigned Index = VA.getValNo(); + ISD::ArgFlagsTy Flags = Ins[Index].Flags; + if (!Flags.isByVal()) + continue; + + assert(VA.isMemLoc() && "unexpected byval pointer in reg"); + unsigned RBegin, REnd; + CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); + ArgRegBegin = std::min(ArgRegBegin, RBegin); + + CCInfo.nextInRegsParam(); } CCInfo.rewindByValRegsInfo(); - lastInsIndex = -1; + + int lastInsIndex = -1; if (isVarArg && MFI->hasVAStart()) { - unsigned ExtraArgRegsSize; - unsigned ExtraArgRegsSaveSize; - computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, - ExtraArgRegsSize, ExtraArgRegsSaveSize); - TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); + if (RegIdx != array_lengthof(GPRArgRegs)) + ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); } - // If the arg regs save area contains N-byte aligned values, the - // bottom of it must be at least N-byte aligned. - TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign); - TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U); + + unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); + AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -3178,18 +3082,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, "Byval arguments cannot be implicit"); unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); - ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); - int FrameIndex = StoreByValRegs( - CCInfo, DAG, dl, Chain, CurOrigArg, - CurByValIndex, - Ins[VA.getValNo()].PartOffset, - VA.getLocMemOffset(), - Flags.getByValSize(), - true /*force mutable frames*/, - ByValStoreOffset, - TotalArgRegsSaveSize); - ByValStoreOffset += Flags.getByValSize(); - ByValStoreOffset = std::min(ByValStoreOffset, 16U); + int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg, + CurByValIndex, VA.getLocMemOffset(), + Flags.getByValSize()); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); CCInfo.nextInRegsParam(); } else { @@ -3894,7 +3789,6 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); - if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::FP_TO_SINT) @@ -3907,20 +3801,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { /*isSigned*/ false, SDLoc(Op)).first; } - SDLoc dl(Op); - unsigned Opc; - - switch (Op.getOpcode()) { - default: llvm_unreachable("Invalid opcode!"); - case ISD::FP_TO_SINT: - Opc = ARMISD::FTOSI; - break; - case ISD::FP_TO_UINT: - Opc = ARMISD::FTOUI; - break; - } - Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + return Op; } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -3960,7 +3841,6 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); - if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::SINT_TO_FP) @@ -3973,21 +3853,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { /*isSigned*/ false, SDLoc(Op)).first; } - SDLoc dl(Op); - unsigned Opc; - - switch (Op.getOpcode()) { - default: llvm_unreachable("Invalid opcode!"); - case ISD::SINT_TO_FP: - Opc = ARMISD::SITOF; - break; - case ISD::UINT_TO_FP: - Opc = ARMISD::UITOF; - break; - } - - Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); - return DAG.getNode(Opc, dl, VT, Op); + return Op; } SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { @@ -7239,16 +7105,20 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); - if (IsThumb2) { + if (Subtarget->useMovt(*MF)) { unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp) - .addImm(LoopSize & 0xFFFF)); + AddDefaultPred(BuildMI(BB, dl, + TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16), + Vtmp).addImm(LoopSize & 0xFFFF)); if ((LoopSize & 0xFFFF0000) != 0) - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) - .addReg(Vtmp).addImm(LoopSize >> 16)); + AddDefaultPred(BuildMI(BB, dl, + TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16), + varEnd) + .addReg(Vtmp) + .addImm(LoopSize >> 16)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -10076,6 +9946,28 @@ bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return false; } +bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { + EVT VT = ExtVal.getValueType(); + + if (!isTypeLegal(VT)) + return false; + + // Don't create a loadext if we can fold the extension into a wide/long + // instruction. + // If there's more than one user instruction, the loadext is desirable no + // matter what. There can be two uses by the same instruction. + if (ExtVal->use_empty() || + !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode())) + return true; + + SDNode *U = *ExtVal->use_begin(); + if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB || + U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL)) + return false; + + return true; +} + bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; @@ -10289,9 +10181,9 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // Thumb2 and ARM modes can use cmn for negative immediates. if (!Subtarget->isThumb()) - return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; + return ARM_AM::getSOImmVal(std::abs(Imm)) != -1; if (Subtarget->isThumb2()) - return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; + return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1; // Thumb1 doesn't have cmn, and only 8-bit immediates. return Imm >= 0 && Imm <= 255; } @@ -10302,7 +10194,7 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { /// immediate into a register. bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { // Same encoding for add/sub, just flip the sign. - int64_t AbsImm = llvm::abs64(Imm); + int64_t AbsImm = std::abs(Imm); if (!Subtarget->isThumb()) return ARM_AM::getSOImmVal(AbsImm) != -1; if (Subtarget->isThumb2()) @@ -11198,9 +11090,12 @@ bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles -bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { +TargetLoweringBase::AtomicRMWExpansionKind +ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= (Subtarget->isMClass() ? 32U : 64U); + return (Size <= (Subtarget->isMClass() ? 32U : 64U)) + ? AtomicRMWExpansionKind::LLSC + : AtomicRMWExpansionKind::None; } // This has so far only been implemented for MachO. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index ec1407d..dd4c954 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -65,11 +65,6 @@ namespace llvm { RBIT, // ARM bitreverse instruction - FTOSI, // FP to sint within a FP register. - FTOUI, // FP to uint within a FP register. - SITOF, // sint to FP within a FP register. - UITOF, // uint to FP within a FP register. - SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. @@ -283,6 +278,8 @@ namespace llvm { using TargetLowering::isZExtFree; bool isZExtFree(SDValue Val, EVT VT2) const override; + bool isVectorLoadExtDesirable(SDValue ExtVal) const override; + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; @@ -346,6 +343,12 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + // FIXME: Map different constraints differently. + return InlineAsm::Constraint_m; + } + const ARMSubtarget* getSubtarget() const { return Subtarget; } @@ -360,6 +363,9 @@ namespace llvm { return true; } + bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, + unsigned &PrefAlign) const override; + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, @@ -404,7 +410,8 @@ namespace llvm { bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicRMWExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; bool useLoadStackGuardNode() const override; @@ -525,12 +532,8 @@ namespace llvm { SDLoc dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, - unsigned OffsetFromOrigArg, - unsigned ArgOffset, - unsigned ArgSize, - bool ForceMutable, - unsigned ByValStoreOffset, - unsigned TotalArgRegsSaveSize) const; + int ArgOffset, + unsigned ArgSize) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, @@ -538,12 +541,6 @@ namespace llvm { unsigned TotalArgRegsSaveSize, bool ForceMutable = false) const; - void computeRegArea(CCState &CCInfo, MachineFunction &MF, - unsigned InRegsParamRecordIdx, - unsigned ArgSize, - unsigned &ArgRegsSize, - unsigned &ArgRegsSaveSize) const; - SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const override; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 7d27cf3..e79608d 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -983,7 +983,12 @@ class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> { class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsARM, HasV6]; } - +class VFPPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [HasVFP2]; +} +class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP]; +} //===----------------------------------------------------------------------===// // Thumb Instruction Format Definitions. // diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index bc617f0..7c004c9 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -30,8 +30,7 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) { -} + : ARMBaseInstrInfo(STI), RI() {} /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { @@ -146,6 +145,10 @@ namespace { return false; const ARMSubtarget &STI = static_cast<const ARMSubtarget &>(MF.getSubtarget()); + // Don't do this for Thumb1. + if (STI.isThumb1Only()) + return false; + const TargetMachine &TM = MF.getTarget(); if (TM.getRelocationModel() != Reloc::PIC_) return false; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 126c552..c3984ca 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -199,6 +199,9 @@ def HasV6M : Predicate<"Subtarget->hasV6MOps()">, def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate<"HasV6T2Ops", "armv6t2">; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; +def HasV6K : Predicate<"Subtarget->hasV6KOps()">, + AssemblerPredicate<"HasV6KOps", "armv6k">; +def NoV6K : Predicate<"!Subtarget->hasV6KOps()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate<"HasV7Ops", "armv7">; def HasV8 : Predicate<"Subtarget->hasV8Ops()">, @@ -223,6 +226,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, + AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -1835,11 +1840,11 @@ def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, let Inst{7-0} = imm; } -def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>; -def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>; -def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>; -def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>; -def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>; def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 2a7b4b5..a6a07a8 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -2790,7 +2790,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))))]>; class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode ShOp> + ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), @@ -2826,7 +2826,7 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDNode MulOp, SDNode ShOp> + SDPatternOperator MulOp, SDPatternOperator ShOp> : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), @@ -3674,7 +3674,7 @@ multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N3VMulOpSL_HS<bits<4> op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, SDNode ShOp> { + string OpcodeStr, string Dt, SDPatternOperator ShOp> { def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, @@ -3711,27 +3711,38 @@ multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, } // Neon 3-argument intrinsics, -// element sizes of 8, 16 and 32 bits: -multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itinD, InstrItinClass itinQ, +// element sizes of 16 and 32 bits: +multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, - OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; - def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, + def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; - def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, + def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. - def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, - OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; - def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, + def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; - def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, + def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } +// element sizes of 8, 16 and 32 bits: +multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, SDPatternOperator IntOp> + :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, + itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ + // 64-bit vector types. + def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, + OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; + // 128-bit vector types. + def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, + OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; +} // Neon Long Multiply-Op vector operations, // element sizes of 8, 16 and 32 bits: @@ -4305,6 +4316,147 @@ defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; +let Predicates = [HasNEON, HasV8_1a] in { + // v8.1a Neon Rounding Double Multiply-Op vector operations, + // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long + // (Q += D * D) + defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqadds + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v2i32 (int_arm_neon_vqadds + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v8i16 (int_arm_neon_vqadds + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), + (v8i16 QPR:$Vm))))), + (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + def : Pat<(v4i32 (int_arm_neon_vqadds + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), + (v4i32 QPR:$Vm))))), + (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + + defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqadds + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh + (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, + imm:$lane))>; + def : Pat<(v2i32 (int_arm_neon_vqadds + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh + (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + imm:$lane))>; + def : Pat<(v8i16 (int_arm_neon_vqadds + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh + (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), + imm:$lane)))))), + (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + def : Pat<(v4i32 (int_arm_neon_vqadds + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh + (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), + imm:$lane)))))), + (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + + // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long + // (Q -= D * D) + defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqsubs + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v2i32 (int_arm_neon_vqsubs + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v8i16 (int_arm_neon_vqsubs + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), + (v8i16 QPR:$Vm))))), + (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + def : Pat<(v4i32 (int_arm_neon_vqsubs + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), + (v4i32 QPR:$Vm))))), + (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + + defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqsubs + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh + (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; + def : Pat<(v2i32 (int_arm_neon_vqsubs + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh + (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + imm:$lane))>; + def : Pat<(v8i16 (int_arm_neon_vqsubs + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh + (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), + imm:$lane)))))), + (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + def : Pat<(v4i32 (int_arm_neon_vqsubs + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh + (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), + imm:$lane)))))), + (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; +} // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlal", "s", null_frag>; @@ -6158,6 +6310,21 @@ class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; +class NVCVTIFPat<SDNode OpNode, NeonI Inst> + : NEONFPPat<(f32 (OpNode GPR:$a)), + (f32 (EXTRACT_SUBREG + (v2f32 (Inst + (INSERT_SUBREG + (v2f32 (IMPLICIT_DEF)), + (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), + ssub_0))>; +class NVCVTFIPat<SDNode OpNode, NeonI Inst> + : NEONFPPat<(i32 (OpNode SPR:$a)), + (i32 (EXTRACT_SUBREG + (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, ssub_0))), + ssub_0))>; + def : N3VSPat<fadd, VADDfd>; def : N3VSPat<fsub, VSUBfd>; def : N3VSPat<fmul, VMULfd>; @@ -6173,10 +6340,22 @@ def : N2VSPat<fabs, VABSfd>; def : N2VSPat<fneg, VNEGfd>; def : N3VSPat<NEONfmax, VMAXfd>; def : N3VSPat<NEONfmin, VMINfd>; -def : N2VSPat<arm_ftosi, VCVTf2sd>; -def : N2VSPat<arm_ftoui, VCVTf2ud>; -def : N2VSPat<arm_sitof, VCVTs2fd>; -def : N2VSPat<arm_uitof, VCVTu2fd>; +def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; +def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; +def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; +def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; + +// NEON doesn't have any f64 conversions, so provide patterns to make +// sure the VFP conversions match when extracting from a vector. +def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), + (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), + (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), + (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), + (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; + // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. def : Pat<(f32 (bitconvert GPR:$a)), diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e0a9314..afff016 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -11,16 +11,10 @@ // //===----------------------------------------------------------------------===// -def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; -def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; -def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; -def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; -def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; -def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; @@ -633,7 +627,7 @@ multiclass vcvt_inst<string opc, bits<2> rm, def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), - [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>, + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } @@ -641,7 +635,7 @@ multiclass vcvt_inst<string opc, bits<2> rm, def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), - [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>, + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } @@ -649,7 +643,7 @@ multiclass vcvt_inst<string opc, bits<2> rm, def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), - [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>, + []>, Requires<[HasFPARMv8, HasDPVFP]> { bits<5> Dm; @@ -664,7 +658,7 @@ multiclass vcvt_inst<string opc, bits<2> rm, def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), - [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>, + []>, Requires<[HasFPARMv8, HasDPVFP]> { bits<5> Dm; @@ -676,6 +670,27 @@ multiclass vcvt_inst<string opc, bits<2> rm, let Inst{8} = 1; } } + + let Predicates = [HasFPARMv8] in { + def : Pat<(i32 (fp_to_sint (node SPR:$a))), + (COPY_TO_REGCLASS + (!cast<Instruction>(NAME#"SS") SPR:$a), + GPR)>; + def : Pat<(i32 (fp_to_uint (node SPR:$a))), + (COPY_TO_REGCLASS + (!cast<Instruction>(NAME#"US") SPR:$a), + GPR)>; + } + let Predicates = [HasFPARMv8, HasDPVFP] in { + def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))), + (COPY_TO_REGCLASS + (!cast<Instruction>(NAME#"SD") DPR:$a), + GPR)>; + def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))), + (COPY_TO_REGCLASS + (!cast<Instruction>(NAME#"UD") DPR:$a), + GPR)>; + } } defm VCVTA : vcvt_inst<"a", 0b00, frnd>; @@ -980,14 +995,22 @@ class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", - [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> { + []> { let Inst{7} = 1; // s32 } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(f64 (sint_to_fp GPR:$a)), + (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; + + def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))), + (VSITOD (VLDRS addrmode5:$a))>; +} + def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd),(ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { + []> { let Inst{7} = 1; // s32 // Some single precision VFP instructions may be executed on both NEON and @@ -995,17 +1018,31 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)), + (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))), + (VSITOS (VLDRS addrmode5:$a))>; + def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", - [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> { + []> { let Inst{7} = 0; // u32 } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(f64 (uint_to_fp GPR:$a)), + (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; + + def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))), + (VUITOD (VLDRS addrmode5:$a))>; +} + def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { + []> { let Inst{7} = 0; // u32 // Some single precision VFP instructions may be executed on both NEON and @@ -1013,6 +1050,12 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)), + (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))), + (VUITOS (VLDRS addrmode5:$a))>; + // FP -> Int: class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, @@ -1055,14 +1098,22 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> { + []> { let Inst{7} = 1; // Z bit } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))), + (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>; + + def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), + (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; +} + def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { + []> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1070,17 +1121,31 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)), + (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>; + +def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr), + (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; + def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> { + []> { let Inst{7} = 1; // Z bit } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))), + (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>; + + def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), + (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; +} + def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { + []> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1088,6 +1153,12 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)), + (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>; + +def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr), + (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; + // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. let Uses = [FPSCR] in { // FIXME: Verify encoding after integrated assembler is working. diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a8d0981..eca8e28 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -19,7 +19,7 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "Thumb1RegisterInfo.h" +#include "ThumbRegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -38,6 +38,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index ddfdb52..a68ab1b 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -149,11 +149,7 @@ public: unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; } void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; } - unsigned getArgRegsSaveSize(unsigned Align = 0) const { - if (!Align) - return ArgRegsSaveSize; - return (ArgRegsSaveSize + Align - 1) & ~(Align - 1); - } + unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; } void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; } unsigned getReturnRegsCount() const { return ReturnRegsCount; } diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 80b4b48..e6e8cdf 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -16,6 +16,4 @@ using namespace llvm; void ARMRegisterInfo::anchor() { } -ARMRegisterInfo::ARMRegisterInfo(const ARMSubtarget &sti) - : ARMBaseRegisterInfo(sti) { -} +ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {} diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index b623173..e2e650e 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -23,7 +23,7 @@ class ARMSubtarget; struct ARMRegisterInfo : public ARMBaseRegisterInfo { virtual void anchor(); public: - ARMRegisterInfo(const ARMSubtarget &STI); + ARMRegisterInfo(); }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 89624dd..fbec9e6 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -129,6 +129,7 @@ void ARMSubtarget::initializeEnvironment() { HasV5TEOps = false; HasV6Ops = false; HasV6MOps = false; + HasV6KOps = false; HasV6T2Ops = false; HasV7Ops = false; HasV8Ops = false; @@ -165,6 +166,7 @@ void ARMSubtarget::initializeEnvironment() { HasTrustZone = false; HasCrypto = false; HasCRC = false; + HasV8_1a = false; HasZeroCycleZeroing = false; AllowsUnalignedMem = false; Thumb2DSP = false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index f4deddf..f36cd5c 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -56,13 +56,14 @@ protected: ARMProcClassEnum ARMProcClass; /// HasV4TOps, HasV5TOps, HasV5TEOps, - /// HasV6Ops, HasV6MOps, HasV6T2Ops, HasV7Ops, HasV8Ops - + /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops - /// Specify whether target support specific ARM ISA variants. bool HasV4TOps; bool HasV5TOps; bool HasV5TEOps; bool HasV6Ops; bool HasV6MOps; + bool HasV6KOps; bool HasV6T2Ops; bool HasV7Ops; bool HasV8Ops; @@ -181,6 +182,9 @@ protected: /// HasCRC - if true, processor supports CRC instructions bool HasCRC; + /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions + bool HasV8_1a; + /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing; @@ -287,6 +291,7 @@ public: bool hasV5TEOps() const { return HasV5TEOps; } bool hasV6Ops() const { return HasV6Ops; } bool hasV6MOps() const { return HasV6MOps; } + bool hasV6KOps() const { return HasV6KOps; } bool hasV6T2Ops() const { return HasV6T2Ops; } bool hasV7Ops() const { return HasV7Ops; } bool hasV8Ops() const { return HasV8Ops; } @@ -311,6 +316,7 @@ public: bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasV8_1a() const { return HasV8_1a; } bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index a97a058..1bee1b0 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -37,6 +37,11 @@ EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden, " to make use of cmpxchg flow-based information"), cl::init(true)); +static cl::opt<bool> +EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden, + cl::desc("Enable ARM load/store optimization pass"), + cl::init(true)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget); @@ -105,9 +110,11 @@ computeTargetABI(const Triple &TT, StringRef CPU, return TargetABI; } -static std::string computeDataLayout(const Triple &TT, - ARMBaseTargetMachine::ARMABI ABI, +static std::string computeDataLayout(StringRef TT, StringRef CPU, + const TargetOptions &Options, bool isLittle) { + const Triple Triple(TT); + auto ABI = computeTargetABI(Triple, CPU, Options); std::string Ret = ""; if (isLittle) @@ -117,7 +124,7 @@ static std::string computeDataLayout(const Triple &TT, // Big endian. Ret += "E"; - Ret += DataLayout::getManglingComponent(TT); + Ret += DataLayout::getManglingComponent(Triple); // Pointers are 32 bits and aligned to 32 bits. Ret += "-p:32:32"; @@ -147,7 +154,7 @@ static std::string computeDataLayout(const Triple &TT, // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit // aligned everywhere else. - if (TT.isOSNaCl()) + if (Triple.isOSNaCl()) Ret += "-S128"; else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS) Ret += "-S64"; @@ -164,9 +171,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, + CPU, FS, Options, RM, CM, OL), TargetABI(computeTargetABI(Triple(TT), CPU, Options)), - DL(computeDataLayout(Triple(TT), TargetABI, isLittle)), TLOF(createTLOF(Triple(getTargetTriple()))), Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) { @@ -325,7 +332,7 @@ void ARMPassConfig::addIRPasses() { } bool ARMPassConfig::addPreISel() { - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM->getOptLevel() == CodeGenOpt::Aggressive) // FIXME: This is using the thumb1 only constant value for // maximal global offset for merging globals. We may want // to look into using the old value for non-thumb1 code of @@ -339,32 +346,30 @@ bool ARMPassConfig::addPreISel() { bool ARMPassConfig::addInstSelector() { addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); - const ARMSubtarget *Subtarget = &getARMSubtarget(); - if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() && + if (Triple(TM->getTargetTriple()).isOSBinFormatELF() && TM->Options.EnableFastISel) addPass(createARMGlobalBaseRegPass()); return false; } void ARMPassConfig::addPreRegAlloc() { - if (getOptLevel() != CodeGenOpt::None) - addPass(createARMLoadStoreOptimizationPass(true)); - if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) + if (getOptLevel() != CodeGenOpt::None) { addPass(createMLxExpansionPass()); - // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be - // enabled when NEON is available. - if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() && - getARMSubtarget().hasNEON() && !DisableA15SDOptimization) { - addPass(createA15SDOptimizerPass()); + + if (EnableARMLoadStoreOpt) + addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true)); + + if (!DisableA15SDOptimization) + addPass(createA15SDOptimizerPass()); } } void ARMPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) { - addPass(createARMLoadStoreOptimizationPass()); + if (EnableARMLoadStoreOpt) + addPass(createARMLoadStoreOptimizationPass()); - if (getARMSubtarget().hasNEON()) - addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); + addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } // Expand some pseudo instructions into multiple instructions to allow @@ -372,26 +377,21 @@ void ARMPassConfig::addPreSched2() { addPass(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) { - // in v8, IfConversion depends on Thumb instruction widths - if (getARMSubtarget().restrictIT() && - !getARMSubtarget().prefers32BitThumb()) - addPass(createThumb2SizeReductionPass()); + // in v8, IfConversion depends on Thumb instruction widths + if (getARMSubtarget().restrictIT()) + addPass(createThumb2SizeReductionPass()); + if (!getARMSubtarget().isThumb1Only()) addPass(&IfConverterID); - } } - if (getARMSubtarget().isThumb2()) - addPass(createThumb2ITBlockPass()); + addPass(createThumb2ITBlockPass()); } void ARMPassConfig::addPreEmitPass() { - if (getARMSubtarget().isThumb2()) { - if (!getARMSubtarget().prefers32BitThumb()) - addPass(createThumb2SizeReductionPass()); + addPass(createThumb2SizeReductionPass()); - // Constant island pass work on unbundled instructions. + // Constant island pass work on unbundled instructions. + if (getARMSubtarget().isThumb2()) addPass(&UnpackMachineBundlesID); - } addPass(createARMOptimizeBarriersPass()); addPass(createARMConstantIslandPass()); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 7f6a1ee..20ca97b 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -30,7 +30,6 @@ public: } TargetABI; protected: - const DataLayout DL; std::unique_ptr<TargetLoweringObjectFile> TLOF; ARMSubtarget Subtarget; bool isLittle; @@ -45,9 +44,8 @@ public: bool isLittle); ~ARMBaseTargetMachine() override; - const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } const ARMSubtarget *getSubtargetImpl(const Function &F) const override; - const DataLayout *getDataLayout() const override { return &DL; } bool isLittleEndian() const { return isLittle; } /// \brief Get the TargetIRAnalysis for this target. diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk index 55a5775..6694b53 100644 --- a/lib/Target/ARM/Android.mk +++ b/lib/Target/ARM/Android.mk @@ -4,6 +4,7 @@ arm_codegen_TBLGEN_TABLES := \ ARMGenRegisterInfo.inc \ ARMGenInstrInfo.inc \ ARMGenCodeEmitter.inc \ + ARMGenCodeEmitter.inc \ ARMGenMCCodeEmitter.inc \ ARMGenMCPseudoLowering.inc \ ARMGenAsmWriter.inc \ @@ -41,10 +42,9 @@ arm_codegen_SRC_FILES := \ MLxExpansionPass.cpp \ Thumb1FrameLowering.cpp \ Thumb1InstrInfo.cpp \ - Thumb1RegisterInfo.cpp \ + ThumbRegisterInfo.cpp \ Thumb2ITBlockPass.cpp \ Thumb2InstrInfo.cpp \ - Thumb2RegisterInfo.cpp \ Thumb2SizeReduction.cpp # For the host diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 59461e8..2215efb 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -276,6 +276,9 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasD16() const { return STI.getFeatureBits() & ARM::FeatureD16; } + bool hasV8_1a() const { + return STI.getFeatureBits() & ARM::FeatureV8_1a; + } void SwitchMode() { uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); @@ -342,10 +345,10 @@ public: }; - ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser, + ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) { - MCAsmParserExtension::Initialize(_Parser); + : STI(STI), MII(MII), UC(Parser) { + MCAsmParserExtension::Initialize(Parser); // Cache the MCRegisterInfo. MRI = getContext().getRegisterInfo(); diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 2530640..0b698197 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -40,10 +40,9 @@ add_llvm_target(ARMCodeGen MLxExpansionPass.cpp Thumb1FrameLowering.cpp Thumb1InstrInfo.cpp - Thumb1RegisterInfo.cpp + ThumbRegisterInfo.cpp Thumb2ITBlockPass.cpp Thumb2InstrInfo.cpp - Thumb2RegisterInfo.cpp Thumb2SizeReduction.cpp ) diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 16eea33..e15323d 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -637,12 +637,12 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, printRegName(O, MO1.getReg()); unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); - unsigned Op = ARM_AM::getAM5Op(MO2.getImm()); + ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm()); if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) { O << ", " << markup("<imm:") << "#" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) + << ARM_AM::getAddrOpcStr(Op) << ImmOffs * 4 << markup(">"); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMArchName.def b/lib/Target/ARM/MCTargetDesc/ARMArchName.def index 9f007a0..96a0c1a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMArchName.def +++ b/lib/Target/ARM/MCTargetDesc/ARMArchName.def @@ -30,6 +30,7 @@ ARM_ARCH_NAME("armv5t", ARMV5T, "5T", v5T) ARM_ARCH_NAME("armv5te", ARMV5TE, "5TE", v5TE) ARM_ARCH_NAME("armv6", ARMV6, "6", v6) ARM_ARCH_NAME("armv6j", ARMV6J, "6J", v6) +ARM_ARCH_NAME("armv6k", ARMV6K, "6K", v6K) ARM_ARCH_NAME("armv6t2", ARMV6T2, "6T2", v6T2) ARM_ARCH_NAME("armv6z", ARMV6Z, "6Z", v6KZ) ARM_ARCH_NAME("armv6zk", ARMV6ZK, "6ZK", v6KZ) @@ -43,6 +44,8 @@ ARM_ARCH_NAME("armv7-m", ARMV7M, "7-M", v7) ARM_ARCH_ALIAS("armv7m", ARMV7M) ARM_ARCH_NAME("armv8-a", ARMV8A, "8-A", v8) ARM_ARCH_ALIAS("armv8a", ARMV8A) +ARM_ARCH_NAME("armv8.1-a", ARMV8_1A, "8.1-A", v8) +ARM_ARCH_ALIAS("armv8.1a", ARMV8_1A) ARM_ARCH_NAME("iwmmxt", IWMMXT, "iwmmxt", v5TE) ARM_ARCH_NAME("iwmmxt2", IWMMXT2, "iwmmxt2", v5TE) diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 2b65520..9648ffa 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -783,6 +783,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { setAttributeItem(THUMB_ISA_use, AllowThumb32, false); break; + case ARM::ARMV6K: case ARM::ARMV6Z: case ARM::ARMV6ZK: setAttributeItem(ARM_ISA_use, Allowed, false); @@ -816,6 +817,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { break; case ARM::ARMV8A: + case ARM::ARMV8_1A: setAttributeItem(CPU_arch_profile, ApplicationProfile, false); setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); @@ -913,9 +915,8 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8A, /* OverwriteExisting= */ false); - setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::AllowNeonARMv8, - /* OverwriteExisting= */ false); + // 'Advanced_SIMD_arch' must be emitted not here, but within + // ARMAsmPrinter::emitAttributes(), depending on hasV8Ops() and hasV8_1a() break; case ARM::SOFTVFP: @@ -1362,25 +1363,29 @@ void ARMELFStreamer::emitUnwindRaw(int64_t Offset, namespace llvm { -MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = llvm::createAsmStreamer( - Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); - new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm); - return S; +MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new ARMTargetAsmStreamer(S, OS, *InstPrint, isVerboseAsm); } MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) { return new ARMTargetStreamer(S); } +MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) { + Triple TT(STI.getTargetTriple()); + if (TT.getObjectFormat() == Triple::ELF) + return new ARMTargetELFStreamer(S); + return new ARMTargetStreamer(S); +} + MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll, bool IsThumb) { ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); - new ARMTargetELFStreamer(*S); // FIXME: This should eventually end up somewhere else where more // intelligent flag decisions can be made. For now we are just maintaining // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 66a1618..caa8736 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -59,6 +59,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { // Exceptions handling switch (TheTriple.getOS()) { + case Triple::Bitrig: case Triple::NetBSD: ExceptionsType = ExceptionHandling::DwarfCFI; break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index efbebd3..e48cabb 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -441,14 +441,12 @@ public: MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { return new ARMMCCodeEmitter(MCII, Ctx, true); } MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { return new ARMMCCodeEmitter(MCII, Ctx, false); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 68d32b2..5b90de3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -10,6 +10,7 @@ #include "ARMMCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" using namespace llvm; #define DEBUG_TYPE "armmcexpr" diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index 06bf6c9..2be98d2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -26,8 +26,8 @@ private: const VariantKind Kind; const MCExpr *Expr; - explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr) - : Kind(_Kind), Expr(_Expr) {} + explicit ARMMCExpr(VariantKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} public: /// @name Construction diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 8c19785..7ff7f9a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -153,6 +153,17 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { // Use CPU to figure out the exact features ARMArchFeature = "+v8"; break; + case Triple::ARMSubArch_v8_1a: + if (NoCPU) + // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, + // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, + // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a + ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," + "+trustzone,+t2xtpk,+crypto,+crc"; + else + // Use CPU to figure out the exact features + ARMArchFeature = "+v8.1a"; + break; case Triple::ARMSubArch_v7m: isThumb = true; if (NoCPU) @@ -195,6 +206,9 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { case Triple::ARMSubArch_v6t2: ARMArchFeature = "+v6t2"; break; + case Triple::ARMSubArch_v6k: + ARMArchFeature = "+v6k"; + break; case Triple::ARMSubArch_v6m: isThumb = true; if (NoCPU) @@ -295,27 +309,18 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -// This is duplicated code. Refactor this. -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - Triple TheTriple(TT); +static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, + MCAsmBackend &MAB, raw_ostream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { + return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, + T.getArch() == Triple::thumb); +} - switch (TheTriple.getObjectFormat()) { - default: llvm_unreachable("unsupported object format"); - case Triple::MachO: { - MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false); - new ARMTargetStreamer(*S); - return S; - } - case Triple::COFF: - assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); - return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS); - case Triple::ELF: - return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, - TheTriple.getArch() == Triple::thumb); - } +static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool DWARFMustBeAtTheEnd) { + return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd); } static MCInstPrinter *createARMMCInstPrinter(const Target &T, @@ -379,61 +384,53 @@ static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) { // Force static initialization. extern "C" void LLVMInitializeARMTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheARMLETarget, createARMMCAsmInfo); - RegisterMCAsmInfoFn Y(TheARMBETarget, createARMMCAsmInfo); - RegisterMCAsmInfoFn A(TheThumbLETarget, createARMMCAsmInfo); - RegisterMCAsmInfoFn B(TheThumbBETarget, createARMMCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget, - createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget, - createARMMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheARMBETarget, createARMMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheThumbLETarget, createARMMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheThumbBETarget, createARMMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheARMLETarget, createARMMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheARMBETarget, createARMMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheThumbLETarget, createARMMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheThumbBETarget, createARMMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheARMLETarget, - ARM_MC::createARMMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheARMBETarget, - ARM_MC::createARMMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheThumbLETarget, - ARM_MC::createARMMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheThumbBETarget, - ARM_MC::createARMMCSubtargetInfo); - - // Register the MC instruction analyzer. - TargetRegistry::RegisterMCInstrAnalysis(TheARMLETarget, - createARMMCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheARMBETarget, - createARMMCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheThumbLETarget, - createARMMCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheThumbBETarget, - createARMMCInstrAnalysis); + for (Target *T : {&TheARMLETarget, &TheARMBETarget, &TheThumbLETarget, + &TheThumbBETarget}) { + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createARMMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createARMMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createARMMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, + ARM_MC::createARMMCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis); + + TargetRegistry::RegisterELFStreamer(*T, createELFStreamer); + TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer); + TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer); + + // Register the obj target streamer. + TargetRegistry::RegisterObjectTargetStreamer(*T, + createARMObjectTargetStreamer); + + // Register the asm streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createARMTargetAsmStreamer); + + // Register the null TargetStreamer. + TargetRegistry::RegisterNullTargetStreamer(*T, createARMNullTargetStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createARMMCInstPrinter); + + // Register the MC relocation info. + TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo); + } // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheARMLETarget, - createARMLEMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheARMBETarget, - createARMBEMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheThumbLETarget, - createARMLEMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheThumbBETarget, - createARMBEMCCodeEmitter); + for (Target *T : {&TheARMLETarget, &TheThumbLETarget}) + TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter); + for (Target *T : {&TheARMBETarget, &TheThumbBETarget}) + TargetRegistry::RegisterMCCodeEmitter(*T, createARMBEMCCodeEmitter); // Register the asm backend. TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend); @@ -442,44 +439,4 @@ extern "C" void LLVMInitializeARMTargetMC() { createThumbLEAsmBackend); TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget, createThumbBEAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheARMLETarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheARMBETarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheThumbLETarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheThumbBETarget, createMCStreamer); - - // Register the asm streamer. - TargetRegistry::RegisterAsmStreamer(TheARMLETarget, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheARMBETarget, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer); - - // Register the null TargetStreamer. - TargetRegistry::RegisterNullTargetStreamer(TheARMLETarget, - createARMNullTargetStreamer); - TargetRegistry::RegisterNullTargetStreamer(TheARMBETarget, - createARMNullTargetStreamer); - TargetRegistry::RegisterNullTargetStreamer(TheThumbLETarget, - createARMNullTargetStreamer); - TargetRegistry::RegisterNullTargetStreamer(TheThumbBETarget, - createARMNullTargetStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheThumbLETarget, - createARMMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheThumbBETarget, - createARMMCInstPrinter); - - // Register the MC relocation info. - TargetRegistry::RegisterMCRelocationInfo(TheARMLETarget, - createARMMCRelocationInfo); - TargetRegistry::RegisterMCRelocationInfo(TheARMBETarget, - createARMMCRelocationInfo); - TargetRegistry::RegisterMCRelocationInfo(TheThumbLETarget, - createARMMCRelocationInfo); - TargetRegistry::RegisterMCRelocationInfo(TheThumbBETarget, - createARMMCRelocationInfo); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index c17e959..7e9ba66 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -32,6 +32,7 @@ class MCRelocationInfo; class MCTargetStreamer; class StringRef; class Target; +class Triple; class raw_ostream; extern Target TheARMLETarget, TheThumbLETarget; @@ -47,21 +48,20 @@ namespace ARM_MC { StringRef FS); } -MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst); - MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S); +MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm); +MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, + const MCSubtargetInfo &STI); MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI, @@ -80,10 +80,11 @@ MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which -/// will generate a PE/COFF object file. +// Construct a PE/COFF machine code streamer which will generate a PE/COFF +// object file. MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, - MCCodeEmitter &Emitter, raw_ostream &OS); + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll); /// createARMELFObjectWriter - Construct an ELF Mach-O object writer. MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp index 593fe34..173cc93 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -72,14 +72,10 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { // opcode when r4 is not in .save directive. // Compute the consecutive registers from r4 to r11. - uint32_t Range = 0; - uint32_t Mask = (1u << 4); - for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) { - if ((RegSave & Bit) == 0u) - break; - ++Range; - Mask |= Bit; - } + uint32_t Mask = RegSave & 0xff0u; + uint32_t Range = countTrailingOnes(Mask >> 5); // Exclude r4. + // Mask off non-consecutive registers. Keep r4. + Mask &= ~(0xffffffe0u << Range); // Emit this opcode when the mask covers every registers. uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask); @@ -105,50 +101,24 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { /// Emit unwind opcodes for .vsave directives void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) { - size_t i = 32; - - while (i > 16) { - uint32_t Bit = 1u << (i - 1); - if ((VFPRegSave & Bit) == 0u) { - --i; - continue; - } - - uint32_t Range = 0; - - --i; - Bit >>= 1; - - while (i > 16 && (VFPRegSave & Bit)) { - --i; - ++Range; - Bit >>= 1; + // We only have 4 bits to save the offset in the opcode so look at the lower + // and upper 16 bits separately. + for (uint32_t Regs : {VFPRegSave & 0xffff0000u, VFPRegSave & 0x0000ffffu}) { + while (Regs) { + // Now look for a run of set bits. Remember the MSB and LSB of the run. + auto RangeMSB = 32 - countLeadingZeros(Regs); + auto RangeLen = countLeadingOnes(Regs << (32 - RangeMSB)); + auto RangeLSB = RangeMSB - RangeLen; + + int Opcode = RangeLSB >= 16 + ? ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 + : ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD; + + EmitInt16(Opcode | ((RangeLSB % 16) << 4) | (RangeLen - 1)); + + // Zero out bits we're done with. + Regs &= ~(-1u << RangeLSB); } - - EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | - ((i - 16) << 4) | Range); - } - - while (i > 0) { - uint32_t Bit = 1u << (i - 1); - if ((VFPRegSave & Bit) == 0u) { - --i; - continue; - } - - uint32_t Range = 0; - - --i; - Bit >>= 1; - - while (i > 0 && (VFPRegSave & Bit)) { - --i; - ++Range; - Bit >>= 1; - } - - EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | - Range); } } diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index b344ced..dc707dc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -37,10 +37,10 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { } } -namespace llvm { -MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, - MCCodeEmitter &Emitter, raw_ostream &OS) { - return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS); -} +MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context, + MCAsmBackend &MAB, raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll) { + return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS); } diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 51e519d..ed2deea 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -382,6 +382,9 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TRI = Fn.getSubtarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>(); + // Only run this for CortexA9. + if (!STI->isCortexA9()) + return false; isLikeA9 = STI->isLikeA9() || STI->isSwift(); isSwift = STI->isSwift(); diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index f4d9be3..2d031d0 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -232,7 +232,7 @@ Make use of hi register variants of cmp: tCMPhir / tCMPZhir. //===---------------------------------------------------------------------===// Thumb1 immediate field sometimes keep pre-scaled values. See -Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and +ThumbRegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and Thumb2. //===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 7dcc64e..c496cd7 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -41,7 +41,7 @@ static void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const TargetInstrInfo &TII, DebugLoc dl, - const Thumb1RegisterInfo &MRI, + const ThumbRegisterInfo &MRI, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, MRI, MIFlags); @@ -53,8 +53,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo()); + const ThumbRegisterInfo *RegInfo = + static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount @@ -89,13 +89,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo()); + const ThumbRegisterInfo *RegInfo = + static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); - unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); assert(NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); @@ -328,17 +327,16 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo()); + const ThumbRegisterInfo *RegInfo = + static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); - unsigned Align = STI.getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index b785b28..cf93203 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -16,7 +16,7 @@ #include "ARMFrameLowering.h" #include "Thumb1InstrInfo.h" -#include "Thumb1RegisterInfo.h" +#include "ThumbRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index c24f740..29aaa15 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -22,8 +22,7 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) { -} + : ARMBaseInstrInfo(STI), RI() {} /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 9fba760..f3f493d 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -15,13 +15,13 @@ #define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H #include "ARMBaseInstrInfo.h" -#include "Thumb1RegisterInfo.h" +#include "ThumbRegisterInfo.h" namespace llvm { class ARMSubtarget; class Thumb1InstrInfo : public ARMBaseInstrInfo { - Thumb1RegisterInfo RI; + ThumbRegisterInfo RI; public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); @@ -36,7 +36,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const Thumb1RegisterInfo &getRegisterInfo() const override { return RI; } + const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index b657f2d..7bb2265 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -255,6 +255,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { const ARMSubtarget &STI = static_cast<const ARMSubtarget &>(Fn.getSubtarget()); + if (!STI.isThumb2()) + return false; AFI = Fn.getInfo<ARMFunctionInfo>(); TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); TRI = STI.getRegisterInfo(); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 62c3752..26ca7e9 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -30,8 +30,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) { -} + : ARMBaseInstrInfo(STI), RI() {} /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 46a1f6d..916ab06 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -15,14 +15,14 @@ #define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H #include "ARMBaseInstrInfo.h" -#include "Thumb2RegisterInfo.h" +#include "ThumbRegisterInfo.h" namespace llvm { class ARMSubtarget; class ScheduleHazardRecognizer; class Thumb2InstrInfo : public ARMBaseInstrInfo { - Thumb2RegisterInfo RI; + ThumbRegisterInfo RI; public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); @@ -60,7 +60,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; } + const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } private: void expandLoadStackGuard(MachineBasicBlock::iterator MI, diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp deleted file mode 100644 index 0d5d85a..0000000 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ /dev/null @@ -1,53 +0,0 @@ -//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Thumb-2 implementation of the TargetRegisterInfo -// class. -// -//===----------------------------------------------------------------------===// - -#include "Thumb2RegisterInfo.h" -#include "ARM.h" -#include "ARMSubtarget.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti) - : ARMBaseRegisterInfo(sti) { -} - -/// emitLoadConstPool - Emits a load from constpool to materialize the -/// specified immediate. -void -Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned MIFlags) const { - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - MachineConstantPool *ConstantPool = MF.getConstantPool(); - const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); - unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); - - BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) - .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0) - .setMIFlags(MIFlags); -} diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h deleted file mode 100644 index 1dd94cc..0000000 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ /dev/null @@ -1,38 +0,0 @@ -//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Thumb-2 implementation of the TargetRegisterInfo -// class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H -#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H - -#include "ARMBaseRegisterInfo.h" - -namespace llvm { - -class ARMSubtarget; - -struct Thumb2RegisterInfo : public ARMBaseRegisterInfo { -public: - Thumb2RegisterInfo(const ARMSubtarget &STI); - - /// emitLoadConstPool - Emits a load from constpool to materialize the - /// specified immediate. - void - emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0, - unsigned MIFlags = MachineInstr::NoFlags) const override; -}; -} - -#endif diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 2ee908b..e967e53 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/Function.h" // To access Function attributes #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -1002,6 +1003,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); + if (STI->isThumb1Only() || STI->prefers32BitThumb()) + return false; + TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo()); // Optimizing / minimizing size? diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp index 5e2cbdc..b5f9d7e 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===// +//===-- ThumbRegisterInfo.cpp - Thumb-1 Register Information -------------===// // // The LLVM Compiler Infrastructure // @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "Thumb1RegisterInfo.h" +#include "ThumbRegisterInfo.h" #include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" @@ -38,39 +38,35 @@ extern cl::opt<bool> ReuseFrameIndexVals; using namespace llvm; -Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMSubtarget &sti) - : ARMBaseRegisterInfo(sti) { -} +ThumbRegisterInfo::ThumbRegisterInfo() : ARMBaseRegisterInfo() {} + +const TargetRegisterClass * +ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const { + if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only()) + return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF); -const TargetRegisterClass* -Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) - const { if (ARM::tGPRRegClass.hasSubClassEq(RC)) return &ARM::tGPRRegClass; - return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC); + return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF); } const TargetRegisterClass * -Thumb1RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) - const { +ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only()) + return ARMBaseRegisterInfo::getPointerRegClass(MF, Kind); return &ARM::tGPRRegClass; } -/// emitLoadConstPool - Emits a load from constpool to materialize the -/// specified immediate. -void -Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned MIFlags) const { - assert((isARMLowRegister(DestReg) || - isVirtualRegister(DestReg)) && - "Thumb1 does not have ldr to high register"); - +static void emitThumb1LoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, + unsigned SubIdx, int Val, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned MIFlags) { MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( @@ -83,6 +79,42 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .setMIFlags(MIFlags); } +static void emitThumb2LoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, + unsigned SubIdx, int Val, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned MIFlags) { + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineConstantPool *ConstantPool = MF.getConstantPool(); + const Constant *C = ConstantInt::get( + Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0) + .setMIFlags(MIFlags); +} + +/// emitLoadConstPool - Emits a load from constpool to materialize the +/// specified immediate. +void ThumbRegisterInfo::emitLoadConstPool( + MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, + unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred, + unsigned PredReg, unsigned MIFlags) const { + MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + if (STI.isThumb1Only()) { + assert((isARMLowRegister(DestReg) || isVirtualRegister(DestReg)) && + "Thumb1 does not have ldr to high register"); + return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred, + PredReg, MIFlags); + } + return emitThumb2LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred, + PredReg, MIFlags); +} /// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize /// a destreg = basereg + immediate in Thumb code. Materialize the immediate @@ -317,12 +349,14 @@ static unsigned convertToNonSPOpcode(unsigned Opcode) { return Opcode; } -bool Thumb1RegisterInfo:: -rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const ARMBaseInstrInfo &TII) const { +bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, + unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); + assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() && + "This isn't needed for thumb2!"); DebugLoc dl = MI.getDebugLoc(); MachineInstrBuilder MIB(*MBB.getParent(), &MI); unsigned Opcode = MI.getOpcode(); @@ -386,8 +420,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, return Offset == 0; } -void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, +void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { + const MachineFunction &MF = *MI.getParent()->getParent(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + if (!STI.isThumb1Only()) + return ARMBaseRegisterInfo::resolveFrameIndex(MI, BaseReg, Offset); + const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -403,12 +442,15 @@ void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, /// saveScavengerRegister - Spill the register so it can be used by the /// register scavenger. Return true. -bool -Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator &UseMI, - const TargetRegisterClass *RC, - unsigned Reg) const { +bool ThumbRegisterInfo::saveScavengerRegister( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, + unsigned Reg) const { + + const ARMSubtarget &STI = MBB.getParent()->getSubtarget<ARMSubtarget>(); + if (!STI.isThumb1Only()) + return ARMBaseRegisterInfo::saveScavengerRegister(MBB, I, UseMI, RC, Reg); + // Thumb1 can't use the emergency spill slot on the stack because // ldr/str immediate offsets must be positive, and if we're referencing // off the frame pointer (if, for example, there are alloca() calls in @@ -452,14 +494,18 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, return true; } -void -Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { - unsigned VReg = 0; +void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + if (!STI.isThumb1Only()) + return ARMBaseRegisterInfo::eliminateFrameIndex(II, SPAdj, FIOperandNum, + RS); + + unsigned VReg = 0; const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h index 5feaf52..23aaff3 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/ThumbRegisterInfo.h @@ -1,4 +1,4 @@ -//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===// +//===- ThumbRegisterInfo.h - Thumb Register Information Impl -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// // -// This file contains the Thumb-1 implementation of the TargetRegisterInfo -// class. +// This file contains the Thumb implementation of the TargetRegisterInfo +// class. With the exception of emitLoadConstPool Thumb2 tracks +// ARMBaseRegisterInfo, Thumb1 overloads the functions below. // //===----------------------------------------------------------------------===// @@ -22,12 +23,13 @@ namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; -struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { +struct ThumbRegisterInfo : public ARMBaseRegisterInfo { public: - Thumb1RegisterInfo(const ARMSubtarget &STI); + ThumbRegisterInfo(); const TargetRegisterClass * - getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; + getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const override; const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, |