diff options
Diffstat (limited to 'lib/Target/ARM')
62 files changed, 2557 insertions, 1460 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 02db53a..d3cc068 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -34,16 +34,12 @@ FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalBaseRegPass(); -FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createARMOptimizeBarriersPass(); FunctionPass *createThumb2SizeReductionPass(); -/// \brief Creates an ARM-specific Target Transformation Info pass. -ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM); - void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 80b976b..f080c60 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -147,6 +147,11 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", "NaCl trap">; +// RenderScript-specific support for 64-bit long types on all targets +def FeatureLong64 : SubtargetFeature<"long64", "UseLong64", + "true", + "long type is forced to be 64-bit">; + // ARM ISAs. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; @@ -270,17 +275,6 @@ def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", FeatureHWDivARM]>; -def FeatureAPCS : SubtargetFeature<"apcs", "TargetABI", "ARM_ABI_APCS", - "Use the APCS ABI">; - -def FeatureAAPCS : SubtargetFeature<"aapcs", "TargetABI", "ARM_ABI_AAPCS", - "Use the AAPCS ABI">; - -// RenderScript-specific support for 64-bit long types on all targets -def FeatureLong64 : SubtargetFeature<"long64", "UseLong64", - "true", - "long type is forced to be 64-bit">; - class ProcNoItin<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; @@ -336,6 +330,12 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, // V6M Processors. def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, FeatureDB, FeatureMClass]>; +def : Processor<"cortex-m0plus", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, + FeatureDB, FeatureMClass]>; +def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, + FeatureDB, FeatureMClass]>; +def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, + FeatureDB, FeatureMClass]>; // V6T2 Processors. def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops, @@ -395,10 +395,20 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, FeatureHasRAS, FeatureVFPOnlySP, FeatureD16, FeatureRClass]>; +// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5. +def : ProcessorModel<"cortex-r7", CortexA8Model, + [ProcR5, HasV7Ops, FeatureDB, + FeatureVFP3, FeatureDSPThumb2, + FeatureHasRAS, FeatureVFPOnlySP, + FeatureD16, FeatureMP, FeatureRClass]>; + // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass]>; +def : ProcNoItin<"sc300", [HasV7Ops, + FeatureThumb2, FeatureNoARM, FeatureDB, + FeatureHWDiv, FeatureMClass]>; // V7EM Processors. def : ProcNoItin<"cortex-m4", [HasV7Ops, @@ -427,6 +437,10 @@ def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass, def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass, FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2]>; +// FIXME: Cortex-A72 is currently modelled as an Cortex-A57. +def : ProcNoItin<"cortex-a72", [ProcA57, HasV8Ops, FeatureAClass, + FeatureDB, FeatureFPARMv8, + FeatureNEON, FeatureDSPThumb2]>; // Cyclone is very similar to swift def : ProcessorModel<"cyclone", SwiftModel, diff --git a/lib/Target/ARM/ARMArchExtName.def b/lib/Target/ARM/ARMArchExtName.def new file mode 100644 index 0000000..d6da50c --- /dev/null +++ b/lib/Target/ARM/ARMArchExtName.def @@ -0,0 +1,30 @@ +//===-- ARMArchExtName.def - List of the ARM Extension names ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the list of the supported ARM Architecture Extension +// names. These can be used to enable the extension through .arch_extension +// attribute +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef ARM_ARCHEXT_NAME +#error "You must define ARM_ARCHEXT_NAME(NAME, ID) before including ARMArchExtName.h" +#endif + +ARM_ARCHEXT_NAME("crc", CRC) +ARM_ARCHEXT_NAME("crypto", CRYPTO) +ARM_ARCHEXT_NAME("fp", FP) +ARM_ARCHEXT_NAME("idiv", HWDIV) +ARM_ARCHEXT_NAME("mp", MP) +ARM_ARCHEXT_NAME("sec", SEC) +ARM_ARCHEXT_NAME("virt", VIRT) + +#undef ARM_ARCHEXT_NAME diff --git a/lib/Target/ARM/ARMArchExtName.h b/lib/Target/ARM/ARMArchExtName.h new file mode 100644 index 0000000..bc1157a --- /dev/null +++ b/lib/Target/ARM/ARMArchExtName.h @@ -0,0 +1,26 @@ +//===-- ARMArchExtName.h - List of the ARM Extension names ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMARCHEXTNAME_H +#define LLVM_LIB_TARGET_ARM_ARMARCHEXTNAME_H + +namespace llvm { +namespace ARM { + +enum ArchExtKind { + INVALID_ARCHEXT = 0 + +#define ARM_ARCHEXT_NAME(NAME, ID) , ID +#include "ARMArchExtName.def" +}; + +} // namespace ARM +} // namespace llvm + +#endif diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 695fd4d..2544a01 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -16,6 +16,7 @@ #include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMFPUName.h" +#include "ARMArchExtName.h" #include "ARMMachineFunctionInfo.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" @@ -57,6 +58,11 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" +ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr), + InConstantPool(false) {} + void ARMAsmPrinter::EmitFunctionBodyEnd() { // Make sure to terminate any constant pools that were at the end // of the function. @@ -76,8 +82,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { } void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { - uint64_t Size = - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType()); + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); assert(Size && "C++ constructor pointer had zero size!"); const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); @@ -99,6 +104,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { AFI = MF.getInfo<ARMFunctionInfo>(); MCP = MF.getConstantPool(); + Subtarget = &MF.getSubtarget<ARMSubtarget>(); SetupMachineFunction(MF); @@ -120,6 +126,23 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the rest of the function body. EmitFunctionBody(); + // If we need V4T thumb mode Register Indirect Jump pads, emit them. + // These are created per function, rather than per TU, since it's + // relatively easy to exceed the thumb branch range within a TU. + if (! ThumbIndirectPads.empty()) { + OutStreamer.EmitAssemblerFlag(MCAF_Code16); + EmitAlignment(1); + for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) { + OutStreamer.EmitLabel(ThumbIndirectPads[i].second); + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX) + .addReg(ThumbIndirectPads[i].first) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + } + ThumbIndirectPads.clear(); + } + // We didn't modify anything. return false; } @@ -183,7 +206,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, MCSymbol *ARMAsmPrinter:: GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); SmallString<60> Name; raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '_' << uid2; @@ -192,7 +215,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); SmallString<60> Name; raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH" << getFunctionNumber(); @@ -414,7 +437,8 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, } void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { - if (Subtarget->isTargetMachO()) { + Triple TT(TM.getTargetTriple()); + if (TT.isOSBinFormatMachO()) { Reloc::Model RelocM = TM.getRelocationModel(); if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) { // Declare all the text sections up front (before the DWARF sections @@ -477,10 +501,17 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified); // Emit ARM Build Attributes - if (Subtarget->isTargetELF()) + if (TT.isOSBinFormatELF()) emitAttributes(); - if (!M.getModuleInlineAsm().empty() && Subtarget->isThumb()) + // Use the triple's architecture and subarchitecture to determine + // if we're thumb for the purposes of the top level code16 assembler + // flag. + bool isThumb = TT.getArch() == Triple::thumb || + TT.getArch() == Triple::thumbeb || + TT.getSubArch() == Triple::ARMSubArch_v7m || + TT.getSubArch() == Triple::ARMSubArch_v6m; + if (!M.getModuleInlineAsm().empty() && isThumb) OutStreamer.EmitAssemblerFlag(MCAF_Code16); } @@ -509,7 +540,8 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel, void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Subtarget->isTargetMachO()) { + Triple TT(TM.getTargetTriple()); + if (TT.isOSBinFormatMachO()) { // All darwin targets use mach-o. const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); @@ -552,7 +584,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { } // Emit a .data.rel section containing any stubs that were created. - if (Subtarget->isTargetELF()) { + if (TT.isOSBinFormatELF()) { const TargetLoweringObjectFileELF &TLOFELF = static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); @@ -562,7 +594,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); if (!Stubs.empty()) { OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); for (auto &stub: Stubs) { OutStreamer.EmitLabel(stub.first); @@ -612,69 +644,96 @@ void ARMAsmPrinter::emitAttributes() { MCTargetStreamer &TS = *OutStreamer.getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); - ATS.switchVendor("aeabi"); + ATS.emitTextAttribute(ARMBuildAttrs::conformance, "2.09"); - std::string CPUString = Subtarget->getCPUString(); + ATS.switchVendor("aeabi"); - // FIXME: remove krait check when GNU tools support krait cpu - if (CPUString != "generic" && CPUString != "krait") - ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); + // Compute ARM ELF Attributes based on the default subtarget that + // we'd have constructed. The existing ARM behavior isn't LTO clean + // anyhow. + // FIXME: For ifunc related functions we could iterate over and look + // for a feature string that doesn't match the default one. + StringRef TT = TM.getTargetTriple(); + StringRef CPU = TM.getTargetCPU(); + StringRef FS = TM.getTargetFeatureString(); + std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU); + if (!FS.empty()) { + if (!ArchFS.empty()) + ArchFS = ArchFS + "," + FS.str(); + else + ArchFS = FS; + } + const ARMBaseTargetMachine &ATM = + static_cast<const ARMBaseTargetMachine &>(TM); + const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian()); + + std::string CPUString = STI.getCPUString(); + + if (CPUString != "generic") { + // FIXME: remove krait check when GNU tools support krait cpu + if (STI.isKrait()) { + ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); + // We consider krait as a "cortex-a9" + hwdiv CPU + // Enable hwdiv through ".arch_extension idiv" + if (STI.hasDivide() || STI.hasDivideInARMMode()) + ATS.emitArchExtension(ARM::HWDIV); + } else + ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); + } - ATS.emitAttribute(ARMBuildAttrs::CPU_arch, - getArchForCPU(CPUString, Subtarget)); + ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI)); // Tag_CPU_arch_profile must have the default value of 0 when "Architecture // profile is not applicable (e.g. pre v7, or cross-profile code)". - if (Subtarget->hasV7Ops()) { - if (Subtarget->isAClass()) { + if (STI.hasV7Ops()) { + if (STI.isAClass()) { ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, ARMBuildAttrs::ApplicationProfile); - } else if (Subtarget->isRClass()) { + } else if (STI.isRClass()) { ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, ARMBuildAttrs::RealTimeProfile); - } else if (Subtarget->isMClass()) { + } else if (STI.isMClass()) { ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, ARMBuildAttrs::MicroControllerProfile); } } - ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, Subtarget->hasARMOps() ? - ARMBuildAttrs::Allowed : ARMBuildAttrs::Not_Allowed); - if (Subtarget->isThumb1Only()) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::Allowed); - } else if (Subtarget->hasThumb2()) { + ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, + STI.hasARMOps() ? ARMBuildAttrs::Allowed + : ARMBuildAttrs::Not_Allowed); + if (STI.isThumb1Only()) { + ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); + } else if (STI.hasThumb2()) { ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::AllowThumb32); } - if (Subtarget->hasNEON()) { + if (STI.hasNEON()) { /* NEON is not exactly a VFP architecture, but GAS emit one of * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (Subtarget->hasFPARMv8()) { - if (Subtarget->hasCrypto()) + if (STI.hasFPARMv8()) { + if (STI.hasCrypto()) ATS.emitFPU(ARM::CRYPTO_NEON_FP_ARMV8); else ATS.emitFPU(ARM::NEON_FP_ARMV8); - } - else if (Subtarget->hasVFP4()) + } else if (STI.hasVFP4()) ATS.emitFPU(ARM::NEON_VFPV4); else ATS.emitFPU(ARM::NEON); // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture - if (Subtarget->hasV8Ops()) + if (STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, ARMBuildAttrs::AllowNeonARMv8); } else { - if (Subtarget->hasFPARMv8()) + if (STI.hasFPARMv8()) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one // FPU, but there are two different names for it depending on the CPU. - ATS.emitFPU(Subtarget->hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8); - else if (Subtarget->hasVFP4()) - ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4); - else if (Subtarget->hasVFP3()) - ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV3_D16 : ARM::VFPV3); - else if (Subtarget->hasVFP2()) + ATS.emitFPU(STI.hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8); + else if (STI.hasVFP4()) + ATS.emitFPU(STI.hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4); + else if (STI.hasVFP3()) + ATS.emitFPU(STI.hasD16() ? ARM::VFPV3_D16 : ARM::VFPV3); + else if (STI.hasVFP2()) ATS.emitFPU(ARM::VFPV2); } @@ -694,11 +753,42 @@ void ARMAsmPrinter::emitAttributes() { // Signal various FP modes. if (!TM.Options.UnsafeFPMath) { - ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); - ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, - ARMBuildAttrs::Allowed); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, + ARMBuildAttrs::IEEEDenormals); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed); + + // If the user has permitted this code to choose the IEEE 754 + // rounding at run-time, emit the rounding attribute. + if (TM.Options.HonorSignDependentRoundingFPMathOption) + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, ARMBuildAttrs::Allowed); + } else { + if (!STI.hasVFP2()) { + // When the target doesn't have an FPU (by design or + // intention), the assumptions made on the software support + // mirror that of the equivalent hardware support *if it + // existed*. For v7 and better we indicate that denormals are + // flushed preserving sign, and for V6 we indicate that + // denormals are flushed to positive zero. + if (STI.hasV7Ops()) + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, + ARMBuildAttrs::PreserveFPSign); + } else if (STI.hasVFP3()) { + // In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is, + // the sign bit of the zero matches the sign bit of the input or + // result that is being flushed to zero. + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, + ARMBuildAttrs::PreserveFPSign); + } + // For VFPv2 implementations it is implementation defined as + // to whether denormals are flushed to positive zero or to + // whatever the sign of zero is (ARM v7AR ARM 2.7.5). Historically + // LLVM has chosen to flush this to positive zero (most likely for + // GCC compatibility), so that's the chosen value here (the + // absence of its emission implies zero). } + // TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath is the + // equivalent of GCC's -ffinite-math-only flag. if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath) ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::Allowed); @@ -706,7 +796,7 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::AllowIEE754); - if (Subtarget->allowsUnalignedMem()) + if (STI.allowsUnalignedMem()) ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, ARMBuildAttrs::Allowed); else @@ -719,21 +809,28 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1); // ABI_HardFP_use attribute to indicate single precision FP. - if (Subtarget->isFPOnlySP()) + if (STI.isFPOnlySP()) ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use, ARMBuildAttrs::HardFPSinglePrecision); // Hard float. Use both S and D registers and conform to AAPCS-VFP. - if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) + if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS); // FIXME: Should we signal R9 usage? - if (Subtarget->hasFP16()) - ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); + if (STI.hasFP16()) + ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); + + // FIXME: To support emitting this build attribute as GCC does, the + // -mfp16-format option and associated plumbing must be + // supported. For now the __fp16 type is exposed by default, so this + // attribute should be emitted with value 1. + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format, + ARMBuildAttrs::FP16FormatIEEE); - if (Subtarget->hasMPExtension()) - ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); + if (STI.hasMPExtension()) + ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); // Hardware divide in ARM mode is part of base arch, starting from ARMv8. // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). @@ -741,14 +838,14 @@ void ARMAsmPrinter::emitAttributes() { // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; // otherwise, the default value (AllowDIVIfExists) applies. - if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops()) - ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); + if (STI.hasDivideInARMMode() && !STI.hasV8Ops()) + ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); if (MMI) { if (const Module *SourceModule = MMI->getModule()) { // ABI_PCS_wchar_t to indicate wchar_t width // FIXME: There is no way to emit value 0 (wchar_t prohibited). - if (auto WCharWidthValue = cast_or_null<ConstantInt>( + if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>( SourceModule->getModuleFlag("wchar_size"))) { int WCharWidth = WCharWidthValue->getZExtValue(); assert((WCharWidth == 2 || WCharWidth == 4) && @@ -759,7 +856,7 @@ void ARMAsmPrinter::emitAttributes() { // ABI_enum_size to indicate enum width // FIXME: There is no way to emit value 0 (enums prohibited) or value 3 // (all enums contain a value needing 32 bits to encode). - if (auto EnumWidthValue = cast_or_null<ConstantInt>( + if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>( SourceModule->getModuleFlag("min_enum_size"))) { int EnumWidth = EnumWidthValue->getZExtValue(); assert((EnumWidth == 1 || EnumWidth == 4) && @@ -774,22 +871,20 @@ void ARMAsmPrinter::emitAttributes() { // it as another callee-saved register, but not as SB or a TLS pointer; It // would instead be nicer to push this from the frontend as metadata, as we do // for the wchar and enum size tags - if (Subtarget->isR9Reserved()) - ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, - ARMBuildAttrs::R9Reserved); + if (STI.isR9Reserved()) + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9Reserved); else - ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, - ARMBuildAttrs::R9IsGPR); - - if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowTZVirtualization); - else if (Subtarget->hasTrustZone()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowTZ); - else if (Subtarget->hasVirtualization()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowVirtualization); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR); + + if (STI.hasTrustZone() && STI.hasVirtualization()) + ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowTZVirtualization); + else if (STI.hasTrustZone()) + ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowTZ); + else if (STI.hasVirtualization()) + ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowVirtualization); ATS.finishAttributeSection(); } @@ -858,9 +953,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, void ARMAsmPrinter:: EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); - int Size = - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(MCPV->getType()); + const DataLayout *DL = TM.getDataLayout(); + int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); @@ -1176,7 +1270,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { #include "ARMGenMCPseudoLowering.inc" void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); // If we just ended a constant pool, mark it as such. if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) { @@ -1251,18 +1345,34 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case ARM::tBX_CALL: { - EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr) - .addReg(ARM::LR) - .addReg(ARM::PC) - // Add predicate operands. - .addImm(ARMCC::AL) - .addReg(0)); + if (Subtarget->hasV5TOps()) + llvm_unreachable("Expected BLX to be selected for v5t+"); + + // On ARM v4t, when doing a call from thumb mode, we need to ensure + // that the saved lr has its LSB set correctly (the arch doesn't + // have blx). + // So here we generate a bl to a small jump pad that does bx rN. + // The jump pads are emitted after the function body. + + unsigned TReg = MI->getOperand(0).getReg(); + MCSymbol *TRegSym = nullptr; + for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) { + if (ThumbIndirectPads[i].first == TReg) { + TRegSym = ThumbIndirectPads[i].second; + break; + } + } - EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX) - .addReg(MI->getOperand(0).getReg()) - // Add predicate operands. - .addImm(ARMCC::AL) - .addReg(0)); + if (!TRegSym) { + TRegSym = OutContext.CreateTempSymbol(); + ThumbIndirectPads.push_back(std::make_pair(TReg, TRegSym)); + } + + // Create a link-saving branch to the Reg Indirect Jump Pad. + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBL) + // Predicate comes first here. + .addImm(ARMCC::AL).addReg(0) + .addExpr(MCSymbolRefExpr::Create(TRegSym, OutContext))); return; } case ARM::BMOVPCRX_CALL: { diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 5ff20ce..50cb954 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -20,6 +20,7 @@ class ARMFunctionInfo; class MCOperand; class MachineConstantPool; class MachineOperand; +class MCSymbol; namespace ARM { enum DW_ISA { @@ -45,12 +46,14 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { /// InConstantPool - Maintain state when emitting a sequence of constant /// pool entries so we can properly mark them as data regions. bool InConstantPool; + + /// ThumbIndirectPads - These maintain a per-function list of jump pad + /// labels used for ARMv4t thumb code to make register indirect calls. + SmallVector<std::pair<unsigned, MCSymbol*>, 4> ThumbIndirectPads; + public: - explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr), - InConstantPool(false) { - Subtarget = &TM.getSubtarget<ARMSubtarget>(); - } + explicit ARMAsmPrinter(TargetMachine &TM, + std::unique_ptr<MCStreamer> Streamer); const char *getPassName() const override { return "ARM Assembly / Object Emitter"; @@ -100,12 +103,13 @@ private: const MachineInstr *MI); public: - unsigned getISAEncoding() override { + unsigned getISAEncoding(const Function *F) override { // ARM/Darwin adds ISA to the DWARF info for each function. - if (!Subtarget->isTargetMachO()) + Triple TT(TM.getTargetTriple()); + if (!TT.isOSBinFormatMachO()) return 0; - return Subtarget->isThumb() ? - ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; + const ARMSubtarget &STI = TM.getSubtarget<ARMSubtarget>(*F); + return STI.isThumb() ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; } private: diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 7a315c4..29ee22e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1836,8 +1836,10 @@ bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, return false; } -MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, - bool PreferFalse) const { +MachineInstr * +ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &SeenMIs, + bool PreferFalse) const { assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); @@ -1885,6 +1887,10 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); + // Update SeenMIs set: register newly created MI and erase removed DefMI. + SeenMIs.insert(NewMI); + SeenMIs.erase(DefMI); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; @@ -1985,8 +1991,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize)) + if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize)) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -2394,7 +2399,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. - // For CMPri, we need to check Sub, thus we can't return here. + // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. + // Thus we cannot return here. if (CmpInstr->getOpcode() == ARM::CMPri || CmpInstr->getOpcode() == ARM::t2CMPri) MI = nullptr; @@ -2473,8 +2479,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, case ARM::t2EORrr: case ARM::t2EORri: { // Scan forward for the use of CPSR - // When checking against MI: if it's a conditional code requires - // checking of V bit, then this is not safe to do. + // When checking against MI: if it's a conditional code that requires + // checking of the V bit or C bit, then this is not safe to do. // It is safe to remove CmpInstr if CPSR is redefined or killed. // If we are done with the basic block, we need to check whether CPSR is // live-out. @@ -2541,19 +2547,30 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, OperandsToUpdate.push_back( std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); } - } else + } else { + // No Sub, so this is x = <op> y, z; cmp x, 0. switch (CC) { - default: + case ARMCC::EQ: // Z + case ARMCC::NE: // Z + case ARMCC::MI: // N + case ARMCC::PL: // N + case ARMCC::AL: // none // CPSR can be used multiple times, we should continue. break; - case ARMCC::VS: - case ARMCC::VC: - case ARMCC::GE: - case ARMCC::LT: - case ARMCC::GT: - case ARMCC::LE: + case ARMCC::HS: // C + case ARMCC::LO: // C + case ARMCC::VS: // V + case ARMCC::VC: // V + case ARMCC::HI: // C Z + case ARMCC::LS: // C Z + case ARMCC::GE: // N V + case ARMCC::LT: // N V + case ARMCC::GT: // Z N V + case ARMCC::LE: // Z N V + // The instruction uses the V bit or C bit which is not safe. return false; } + } } } @@ -3647,9 +3664,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize)) + if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) --Latency; } return Latency; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 0ae291b..ecbcf5c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -261,7 +261,9 @@ public: unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override; - MachineInstr *optimizeSelect(MachineInstr *MI, bool) const override; + MachineInstr *optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &SeenMIs, + bool) const override; /// FoldImmediate - 'Reg' is known to be defined by a move immediate /// instruction, try to fold the immediate into the use instruction. diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 6dc0493..7574727 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -60,9 +60,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - const MCPhysReg *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_SaveList - : CSR_AAPCS_SaveList; + const MCPhysReg *RegList = + STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; if (!MF) return RegList; @@ -95,8 +94,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; + return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } const uint32_t* @@ -117,8 +115,8 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return nullptr; - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; + return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask + : CSR_AAPCS_ThisReturn_RegMask; } BitVector ARMBaseRegisterInfo:: @@ -266,7 +264,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, } void -ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, +ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const { MachineRegisterInfo *MRI = &MF.getRegInfo(); std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg); @@ -356,10 +354,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. - if (MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->hasReservedCallFrame(MF)) + if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF)) return true; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. @@ -370,14 +365,10 @@ bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment(); - bool requiresRealignment = - ((MFI->getMaxAlignment() > StackAlign) || - F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackAlignment)); + unsigned StackAlign = + MF.getSubtarget().getFrameLowering()->getStackAlignment(); + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttribute(Attribute::StackAlignment)); return requiresRealignment && canRealignStack(MF); } @@ -555,12 +546,13 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // and pick a real one. Offset += 128; // 128 bytes of spill slots - // If there is a frame pointer, try using it. + // If there's a frame pointer and the addressing mode allows it, try using it. // The FP is only available if there is no dynamic realignment. We // don't know for sure yet whether we'll need that, so we guess based // on whether there are any local variables that would trigger it. unsigned StackAlign = TFI->getStackAlignment(); - if (TFI->hasFP(MF) && + if (TFI->hasFP(MF) && + (MI->getDesc().TSFlags & ARMII::AddrModeMask) != ARMII::AddrModeT1_s && !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { if (isFrameOffsetLegal(MI, FPOffset)) return false; @@ -677,7 +669,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, NumBits = 8; break; case ARMII::AddrModeT1_s: - NumBits = 5; + NumBits = 8; Scale = 4; isSigned = false; break; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index e9bc412..17027c2 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -135,7 +135,7 @@ public: const MachineFunction &MF, const VirtRegMap *VRM) const override; - void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, + void updateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const override; bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const override; diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index bd07236..d687568 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -31,7 +31,7 @@ static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; // Try to get the first register. - if (unsigned Reg = State.AllocateReg(RegList, 4)) + if (unsigned Reg = State.AllocateReg(RegList)) State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else { // For the 2nd half of a v2f64, do not fail. @@ -46,7 +46,7 @@ static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, } // Try to get the second register. - if (unsigned Reg = State.AllocateReg(RegList, 4)) + if (unsigned Reg = State.AllocateReg(RegList)) State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, @@ -76,11 +76,11 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 }; static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); + unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList); if (Reg == 0) { // If we had R3 unallocated only, now we still must to waste it. - Reg = State.AllocateReg(GPRArgRegs, 4); + Reg = State.AllocateReg(GPRArgRegs); assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64"); // For the 2nd half of a v2f64, do not just fail. @@ -126,7 +126,7 @@ static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; - unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + unsigned Reg = State.AllocateReg(HiRegList, LoRegList); if (Reg == 0) return false; // we didn't handle it @@ -160,6 +160,8 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State); } +static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, ARM::S4, ARM::S5, ARM::S6, ARM::S7, ARM::S8, ARM::S9, ARM::S10, ARM::S11, @@ -168,85 +170,114 @@ static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; + // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA // has InConsecutiveRegs set, and that the last member also has // InConsecutiveRegsLast set. We must process all members of the HA before // we can allocate it, as we need to know the total number of registers that // will be needed in order to (attempt to) allocate a contiguous block. -static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, CCState &State) { - SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs(); +static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); // AAPCS HFAs must have 1-4 elements, all of the same type - assert(PendingHAMembers.size() < 4); - if (PendingHAMembers.size() > 0) - assert(PendingHAMembers[0].getLocVT() == LocVT); + if (PendingMembers.size() > 0) + assert(PendingMembers[0].getLocVT() == LocVT); // Add the argument to the list to be allocated once we know the size of the - // HA - PendingHAMembers.push_back( - CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); - - if (ArgFlags.isInConsecutiveRegsLast()) { - assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 && - "Homogeneous aggregates must have between 1 and 4 members"); - - // Try to allocate a contiguous block of registers, each of the correct - // size to hold one member. - const uint16_t *RegList; - unsigned NumRegs; - switch (LocVT.SimpleTy) { - case MVT::f32: - RegList = SRegList; - NumRegs = 16; - break; - case MVT::f64: - RegList = DRegList; - NumRegs = 8; - break; - case MVT::v2f64: - RegList = QRegList; - NumRegs = 4; - break; - default: - llvm_unreachable("Unexpected member type for HA"); - break; - } + // aggregate. Store the type's required alignmnent as extra info for later: in + // the [N x i64] case all trace has been removed by the time we actually get + // to do allocation. + PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo, + ArgFlags.getOrigAlign())); - unsigned RegResult = - State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size()); - - if (RegResult) { - for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin(); - It != PendingHAMembers.end(); ++It) { - It->convertToReg(RegResult); - State.addLoc(*It); - ++RegResult; - } - PendingHAMembers.clear(); - return true; - } + if (!ArgFlags.isInConsecutiveRegsLast()) + return true; + + // Try to allocate a contiguous block of registers, each of the correct + // size to hold one member. + unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U); - // Register allocation failed, fall back to the stack + ArrayRef<uint16_t> RegList; + switch (LocVT.SimpleTy) { + case MVT::i32: { + RegList = RRegList; + unsigned RegIdx = State.getFirstUnallocated(RegList); - // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp) - for (unsigned regNo = 0; regNo < 16; ++regNo) - State.AllocateReg(SRegList[regNo]); + // First consume all registers that would give an unaligned object. Whether + // we go on stack or in regs, no-one will be using them in future. + unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4; + while (RegIdx % RegAlign != 0 && RegIdx < RegList.size()) + State.AllocateReg(RegList[RegIdx++]); - unsigned Size = LocVT.getSizeInBits() / 8; - unsigned Align = std::min(Size, 8U); + break; + } + case MVT::f32: + RegList = SRegList; + break; + case MVT::f64: + RegList = DRegList; + break; + case MVT::v2f64: + RegList = QRegList; + break; + default: + llvm_unreachable("Unexpected member type for block aggregate"); + break; + } + + unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); + if (RegResult) { + for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin(); + It != PendingMembers.end(); ++It) { + It->convertToReg(RegResult); + State.addLoc(*It); + ++RegResult; + } + PendingMembers.clear(); + return true; + } + + // Register allocation failed, we'll be needing the stack + unsigned Size = LocVT.getSizeInBits() / 8; + if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) { + // If nothing else has used the stack until this point, a non-HFA aggregate + // can be split between regs and stack. + unsigned RegIdx = State.getFirstUnallocated(RegList); + for (auto &It : PendingMembers) { + if (RegIdx >= RegList.size()) + It.convertToMem(State.AllocateStack(Size, Size)); + else + It.convertToReg(State.AllocateReg(RegList[RegIdx++])); - for (auto It : PendingHAMembers) { - It.convertToMem(State.AllocateStack(Size, Align)); State.addLoc(It); } + PendingMembers.clear(); + return true; + } else if (LocVT != MVT::i32) + RegList = SRegList; + + // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core) + for (auto Reg : RegList) + State.AllocateReg(Reg); - // All pending members have now been allocated - PendingHAMembers.clear(); + for (auto &It : PendingMembers) { + It.convertToMem(State.AllocateStack(Size, Align)); + State.addLoc(It); + + // After the first item has been allocated, the rest are packed as tightly + // as possible. (E.g. an incoming i64 would have starting Align of 8, but + // we'll be allocating a bunch of i32 slots). + Align = Size; } - // This will be allocated by the last member of the HA + // All pending members have now been allocated + PendingMembers.clear(); + + // This will be allocated by the last member of the aggregate return true; } diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 526089b..7dd21ecbe 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -175,7 +175,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, // HFAs are passed in a contiguous block of registers, or on the stack - CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>, + CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 29405eb..9966cd7 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -383,11 +383,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlignment() << " bytes *****\n"); - TII = (const ARMBaseInstrInfo *)MF->getTarget() - .getSubtargetImpl() - ->getInstrInfo(); + STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget()); + TII = STI->getInstrInfo(); AFI = MF->getInfo<ARMFunctionInfo>(); - STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); @@ -532,7 +530,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // identity mapping of CPI's to CPE's. const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); - const DataLayout &TD = *MF->getSubtarget().getDataLayout(); + const DataLayout &TD = *MF->getTarget().getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); assert(Size >= 4 && "Too small constant pool entry"); @@ -1270,7 +1268,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); - BBInfo[UserMBB->getNumber()].Size += Delta; + computeBlockSize(UserMBB); adjustBBOffsetsAfter(UserMBB); return; } @@ -1952,7 +1950,9 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI << " lea: " << *LeaMI); unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; - MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) + MachineBasicBlock::iterator MI_JT = MI; + MachineInstr *NewJTMI = + BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc)) .addReg(IdxReg, getKillRegState(IdxRegKill)) .addJumpTableIndex(JTI, JTOP.getTargetFlags()) .addImm(MI->getOperand(JTOpIdx+1).getImm()); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2d80518..4438f50 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -22,8 +22,8 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! @@ -887,6 +887,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); // Emit bic r6, r6, MaxAlign + assert(MaxAlign <= 256 && "The BIC instruction cannot encode " + "immediates larger than 256 with all lower " + "bits set."); unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), @@ -980,7 +983,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci; unsigned PICAddOpc = IsARM - ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICADD : ARM::PICLDR) + ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) : ARM::tPICADD; // We need a new const-pool entry to load from. @@ -1129,7 +1132,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Add the source operands (D subregs). unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); - MIB.addReg(D0).addReg(D1); + MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0) + .addReg(D1, SrcIsKill ? RegState::Kill : 0); if (SrcIsKill) // Add an implicit kill for the Q register. MIB->addRegisterKilled(SrcReg, TRI, true); @@ -1342,11 +1346,9 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - const TargetMachine &TM = MF.getTarget(); - TII = static_cast<const ARMBaseInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); - TRI = TM.getSubtargetImpl()->getRegisterInfo(); - STI = &TM.getSubtarget<ARMSubtarget>(); + STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); AFI = MF.getInfo<ARMFunctionInfo>(); bool Modified = false; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a5f635e..375d394 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -93,11 +93,11 @@ class ARMFastISel final : public FastISel { explicit ARMFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo), + Subtarget( + &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())), M(const_cast<Module &>(*funcInfo.Fn->getParent())), - TM(funcInfo.MF->getTarget()), - TII(*TM.getSubtargetImpl()->getInstrInfo()), - TLI(*TM.getSubtargetImpl()->getTargetLowering()) { - Subtarget = &TM.getSubtarget<ARMSubtarget>(); + TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()), + TLI(*Subtarget->getTargetLowering()) { AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); isThumb2 = AFI->isThumbFunction(); Context = &funcInfo.Fn->getContext(); @@ -189,9 +189,7 @@ class ARMFastISel final : public FastISel { unsigned ARMSelectCallOp(bool UseReg); unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); - const TargetLowering *getTargetLowering() { - return TM.getSubtargetImpl()->getTargetLowering(); - } + const TargetLowering *getTargetLowering() { return &TLI; } // Call handling routines. private: @@ -586,9 +584,8 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { Reloc::Model RelocM = TM.getRelocationModel(); bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); - const TargetRegisterClass *RC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass + : &ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); // FastISel TLS support on non-MachO is broken, punt to SelectionDAG. @@ -893,9 +890,8 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) { // put the alloca address into a register, set the base type back to // register and continue. This should almost never happen. if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { - const TargetRegisterClass *RC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass + : &ARM::GPRRegClass; unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1094,9 +1090,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: { - unsigned Res = createResultReg(isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); + unsigned Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass + : &ARM::GPRRegClass); unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1500,9 +1495,8 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { // Now set a register based on the comparison. Explicitly set the predicates // here. unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; - const TargetRegisterClass *RC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass + : &ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); unsigned ZeroReg = fastMaterializeConstant(Zero); @@ -2490,19 +2484,12 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); MFI->setFrameAddressIsTaken(true); - unsigned LdrOpc; - const TargetRegisterClass *RC; - if (isThumb2) { - LdrOpc = ARM::t2LDRi12; - RC = (const TargetRegisterClass*)&ARM::tGPRRegClass; - } else { - LdrOpc = ARM::LDRi12; - RC = (const TargetRegisterClass*)&ARM::GPRRegClass; - } + unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; + const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass + : &ARM::GPRRegClass; const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo *>( - TM.getSubtargetImpl()->getRegisterInfo()); + static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo()); unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); unsigned SrcReg = FramePtr; @@ -3075,13 +3062,13 @@ namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { const TargetMachine &TM = funcInfo.MF->getTarget(); - - const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); + const ARMSubtarget &STI = + static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget()); // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. bool UseFastISel = false; - UseFastISel |= Subtarget->isTargetMachO() && !Subtarget->isThumb1Only(); - UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb(); - UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb(); + UseFastISel |= STI.isTargetMachO() && !STI.isThumb1Only(); + UseFastISel |= STI.isTargetLinux() && !STI.isThumb(); + UseFastISel |= STI.isTargetNaCl() && !STI.isThumb(); if (UseFastISel) { // iOS always has a FP for backtracking, force other targets diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 80add7a..5a5bd57 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -164,9 +164,13 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) { static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes) { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (MFI->getStackProtectorIndex() > 0) - return StackSizeInBytes >= 4080; - return StackSizeInBytes >= 4096; + const Function *F = MF.getFunction(); + unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096; + if (F->hasFnAttribute("stack-probe-size")) + F->getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + return StackSizeInBytes >= StackProbeSize; } namespace { @@ -203,12 +207,77 @@ struct StackAdjustingInsts { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, std::next(Info.I), dl, - TII.get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); + TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } } }; } +/// Emit an instruction sequence that will align the address in +/// register Reg by zero-ing out the lower bits. For versions of the +/// architecture that support Neon, this must be done in a single +/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a +/// single instruction. That function only gets called when optimizing +/// spilling of D registers on a core with the Neon instruction set +/// present. +static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, + const TargetInstrInfo &TII, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, const unsigned Reg, + const unsigned Alignment, + const bool MustBeSingleInstruction) { + const ARMSubtarget &AST = + static_cast<const ARMSubtarget &>(MF.getSubtarget()); + const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); + const unsigned AlignMask = Alignment - 1; + const unsigned NrBitsToZero = countTrailingZeros(Alignment); + assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported"); + if (!AFI->isThumbFunction()) { + // if the BFC instruction is available, use that to zero the lower + // bits: + // bfc Reg, #0, log2(Alignment) + // otherwise use BIC, if the mask to zero the required number of bits + // can be encoded in the bic immediate field + // bic Reg, Reg, Alignment-1 + // otherwise, emit + // lsr Reg, Reg, log2(Alignment) + // lsl Reg, Reg, log2(Alignment) + if (CanUseBFC) { + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask)); + } else if (AlignMask <= 255) { + AddDefaultCC( + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) + .addReg(Reg, RegState::Kill) + .addImm(AlignMask))); + } else { + assert(!MustBeSingleInstruction && + "Shouldn't call emitAligningInstructions demanding a single " + "instruction to be emitted for large stack alignment for a target " + "without BFC."); + AddDefaultCC(AddDefaultPred( + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)))); + AddDefaultCC(AddDefaultPred( + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)))); + } + } else { + // Since this is only reached for Thumb-2 targets, the BFC instruction + // should always be available. + assert(CanUseBFC); + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask)); + } +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -218,15 +287,12 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MCContext &Context = MMI.getContext(); const TargetMachine &TM = MF.getTarget(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); - const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( - TM.getSubtargetImpl()->getRegisterInfo()); - const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); + const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo(); + const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = - TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); + unsigned Align = STI.getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); @@ -451,13 +517,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { nullptr, MRI->getDwarfRegNum(FramePtr, true), -(ArgRegsSaveSize - FramePtrOffsetInPush))); BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } } @@ -491,7 +559,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); break; } } @@ -514,7 +583,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } break; } @@ -535,7 +605,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } } } @@ -561,28 +632,24 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // realigned. if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); - assert (!AFI->isThumb1OnlyFunction()); + assert(!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { - // Emit bic sp, sp, MaxAlign - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::BICri), ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addImm(MaxAlign-1))); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, + false); } else { - // We cannot use sp as source/dest register here, thus we're emitting the - // following sequence: + // We cannot use sp as source/dest register here, thus we're using r4 to + // perform the calculations. We're emitting the following sequence: // mov r4, sp - // bic r4, r4, MaxAlign + // -- use emitAligningInstructions to produce best sequence to zero + // -- out lower bits in r4 // mov sp, r4 // FIXME: It will be better just to find spare register here. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) - .addReg(ARM::SP, RegState::Kill)); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::t2BICri), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign-1))); + .addReg(ARM::SP, RegState::Kill)); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, + false); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4, RegState::Kill)); + .addReg(ARM::R4, RegState::Kill)); } AFI->setShouldRestoreSPFromFP(true); @@ -612,11 +679,59 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } +// Resolve TCReturn pseudo-instruction +void ARMFrameLowering::fixTCReturn(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc dl = MBBI->getDebugLoc(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); + + if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri)) + return; + + // Tail call return: adjust the stack pointer and jump to callee. + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + + // Jump to label or value in register. + if (RetOpcode == ARM::TCRETURNdi) { + unsigned TCOpcode = STI.isThumb() ? + (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : + ARM::TAILJMPd; + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); + if (JumpTarget.isGlobal()) + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + else { + assert(JumpTarget.isSymbol()); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + + // Add the default predicate in Thumb mode. + if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); + } else if (RetOpcode == ARM::TCRETURNri) { + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); + } + + MachineInstr *NewMI = std::prev(MBBI); + for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + MBBI = NewMI; +} + void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); - unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -627,18 +742,17 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment(); + unsigned Align = STI.getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) { + fixTCReturn(MF, MBB); return; + } if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) @@ -717,42 +831,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } - if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) { - // Tail call return: adjust the stack pointer and jump to callee. - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - - // Jump to label or value in register. - if (RetOpcode == ARM::TCRETURNdi) { - unsigned TCOpcode = STI.isThumb() ? - (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : - ARM::TAILJMPd; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); - if (JumpTarget.isGlobal()) - MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - else { - assert(JumpTarget.isSymbol()); - MIB.addExternalSymbol(JumpTarget.getSymbolName(), - JumpTarget.getTargetFlags()); - } - - // Add the default predicate in Thumb mode. - if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); - } else if (RetOpcode == ARM::TCRETURNri) { - BuildMI(MBB, MBBI, dl, - TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). - addReg(JumpTarget.getReg(), RegState::Kill); - } - - MachineInstr *NewMI = std::prev(MBBI); - for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) - NewMI->addOperand(MBBI->getOperand(i)); - - // Delete the pseudo instruction TCRETURN. - MBB.erase(MBBI); - MBBI = NewMI; - } + fixTCReturn(MF, MBB); if (ArgRegsSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); @@ -1062,15 +1141,16 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // The immediate is <= 64, so it doesn't need any special encoding. unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::SP) - .addImm(8 * NumAlignedDPRCS2Regs))); + .addReg(ARM::SP) + .addImm(8 * NumAlignedDPRCS2Regs))); - // bic r4, r4, #align-1 - Opc = isThumb ? ARM::t2BICri : ARM::BICri; unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign - 1))); + // We must set parameter MustBeSingleInstruction to true, since + // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform + // stack alignment. Luckily, this can always be done since all ARM + // architecture versions that support Neon also support the BFC + // instruction. + emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true); // mov sp, r4 // The stack pointer must be adjusted before spilling anything, otherwise @@ -1387,25 +1467,20 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { return; // Naked functions don't spill callee-saved registers. - if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) return; // We are planning to use NEON instructions vst1 / vld1. - if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON()) + if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON()) return; // Don't bother if the default stack alignment is sufficiently high. - if (MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment() >= 8) + if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8) return; // Aligned spills require stack realignment. - const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( - MF.getSubtarget().getRegisterInfo()); - if (!RegInfo->canRealignStack(MF)) + if (!static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo())->canRealignStack(MF)) return; // We always spill contiguous d-registers starting from d8. Count how many @@ -1789,7 +1864,7 @@ static const uint64_t kSplitStackAvailable = 256; void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { unsigned Opcode; unsigned CFIIndex; - const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>(); + const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>(); bool Thumb = ST->isThumb(); // Sadly, this currently doesn't support varargs, platforms other than diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index a83b773..b7be436 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -31,6 +31,8 @@ public: void emitPrologue(MachineFunction &MF) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 0e4f81c..a84603b 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -44,10 +44,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) { MachineInstr *DefMI = LastMI; const MCInstrDesc &LastMCID = LastMI->getDesc(); - const TargetMachine &TM = - MI->getParent()->getParent()->getTarget(); + const MachineFunction *MF = MI->getParent()->getParent(); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); + MF->getSubtarget().getInstrInfo()); // Skip over one non-VFP / NEON instruction. if (!LastMI->isBarrier() && diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6941579..6ebf640 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -70,7 +70,7 @@ public: bool runOnMachineFunction(MachineFunction &MF) override { // Reset the subtarget each time through. - Subtarget = &MF.getTarget().getSubtarget<ARMSubtarget>(); + Subtarget = &MF.getSubtarget<ARMSubtarget>(); SelectionDAGISel::runOnMachineFunction(MF); return true; } @@ -992,18 +992,24 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, Addr = N; unsigned Alignment = 0; - if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) { + + MemSDNode *MemN = cast<MemSDNode>(Parent); + + if (isa<LSBaseSDNode>(MemN) || + ((MemN->getOpcode() == ARMISD::VST1_UPD || + MemN->getOpcode() == ARMISD::VLD1_UPD) && + MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { // This case occurs only for VLD1-lane/dup and VST1-lane instructions. // The maximum alignment is equal to the memory size being referenced. - unsigned LSNAlign = LSN->getAlignment(); - unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8; - if (LSNAlign >= MemSize && MemSize > 1) + unsigned MMOAlign = MemN->getAlignment(); + unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; + if (MMOAlign >= MemSize && MemSize > 1) Alignment = MemSize; } else { // All other uses of addrmode6 are for intrinsics. For now just record // the raw alignment value; it will be refined later based on the legal // alignment operands for the intrinsic. - Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment(); + Alignment = MemN->getAlignment(); } Align = CurDAG->getTargetConstant(Alignment, MVT::i32); @@ -1191,6 +1197,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); + // Only multiples of 4 are allowed for the offset, so the frame object + // alignment must be at least 4. + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (MFI->getObjectAlignment(FI) < 4) + MFI->setObjectAlignment(FI, 4); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; @@ -1208,6 +1219,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + // For LHS+RHS to result in an offset that's a multiple of 4 the object + // indexed by the LHS must be 4-byte aligned. + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (MFI->getObjectAlignment(FI) < 4) + MFI->setObjectAlignment(FI, 4); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); @@ -1784,6 +1800,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, case MVT::v8i16: OpcodeIndex = 1; break; case MVT::v4f32: case MVT::v4i32: OpcodeIndex = 2; break; + case MVT::v2f64: case MVT::v2i64: OpcodeIndex = 3; assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); break; @@ -1920,6 +1937,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, case MVT::v8i16: OpcodeIndex = 1; break; case MVT::v4f32: case MVT::v4i32: OpcodeIndex = 2; break; + case MVT::v2f64: case MVT::v2i64: OpcodeIndex = 3; assert(NumVecs == 1 && "v2i64 type only supported for VST1"); break; @@ -2290,7 +2308,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); // Note: The width operand is encoded as width-1. - unsigned Width = CountTrailingOnes_32(And_imm) - 1; + unsigned Width = countTrailingOnes(And_imm) - 1; unsigned LSB = Srl_imm; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); @@ -2494,6 +2512,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); if (Subtarget->isThumb1Only()) { + // Set the alignment of the frame object to 4, to avoid having to generate + // more than one ADD + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (MFI->getObjectAlignment(FI) < 4) + MFI->setObjectAlignment(FI, 4); return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, CurDAG->getTargetConstant(0, MVT::i32)); } else { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0d0d81f..56290aa 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -156,11 +156,11 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } -ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM) - : TargetLowering(TM) { - Subtarget = &TM.getSubtarget<ARMSubtarget>(); - RegInfo = TM.getSubtargetImpl()->getRegisterInfo(); - Itins = TM.getSubtargetImpl()->getInstrItineraryData(); +ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, + const ARMSubtarget &STI) + : TargetLowering(TM), Subtarget(&STI) { + RegInfo = Subtarget->getRegisterInfo(); + Itins = Subtarget->getInstrItineraryData(); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); @@ -404,22 +404,20 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM) addRegisterClass(MVT::f64, &ARM::DPRRegClass); } - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) - setTruncStoreAction((MVT::SimpleValueType)VT, - (MVT::SimpleValueType)InnerVT, Expand); - setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); - setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + for (MVT VT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::vector_valuetypes()) { + setTruncStoreAction(VT, InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); + } - setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MULHS, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); - setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::BSWAP, VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); @@ -567,15 +565,18 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM) setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); + setTargetDAGCombine(ISD::LOAD); // It is legal to extload from v4i8 to v4i16 or v4i32. MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, MVT::v2i32}; for (unsigned i = 0; i < 6; ++i) { - setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); - setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); - setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); + for (MVT VT : MVT::integer_vector_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal); + setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal); + setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal); + } } } @@ -617,11 +618,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM) setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); } - computeRegisterProperties(); + computeRegisterProperties(Subtarget->getRegisterInfo()); // ARM does not have floating-point extending loads. - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + for (MVT VT : MVT::fp_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + } // ... or truncating stores setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -629,7 +632,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + for (MVT VT : MVT::integer_valuetypes()) + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); // ARM supports all 4 flavors of integer indexed load / store. if (!Subtarget->isThumb1Only()) { @@ -963,13 +967,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM) // of the difficulty prior to coalescing of modeling operand register classes // due to the common occurrence of cross class copies and subregister insertions // and extractions. -std::pair<const TargetRegisterClass*, uint8_t> -ARMTargetLowering::findRepresentativeClass(MVT VT) const{ +std::pair<const TargetRegisterClass *, uint8_t> +ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, + MVT VT) const { const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: - return TargetLowering::findRepresentativeClass(VT); + return TargetLowering::findRepresentativeClass(TRI, VT); // Use DPR as representative register class for all floating point // and vector types. Since there are 32 SPR registers and 32 DPR registers so // the cost is 1 for both f32 and f64. @@ -1166,12 +1171,6 @@ ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, return ARM::createFastISel(funcInfo, libInfo); } -/// getMaximalGlobalOffset - Returns the maximal possible offset which can -/// be used for loads / stores from the global. -unsigned ARMTargetLowering::getMaximalGlobalOffset() const { - return (Subtarget->isThumb1Only() ? 127 : 4095); -} - Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { unsigned NumVals = N->getNumValues(); if (!NumVals) @@ -1190,8 +1189,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Load are scheduled for latency even if there instruction itinerary // is not available. - const TargetInstrInfo *TII = - getTargetMachine().getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); if (MCID.getNumDefs() == 0) @@ -1783,8 +1781,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize); + bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -1815,9 +1812,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. if (!isTailCall) { const uint32_t *Mask; - const TargetRegisterInfo *TRI = - getTargetMachine().getSubtargetImpl()->getRegisterInfo(); - const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); + const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); if (isThisReturn) { // For 'this' returns, use the R0-preserving mask if applicable Mask = ARI->getThisReturnPreservedMask(CallConv); @@ -1865,7 +1860,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, void ARMTargetLowering::HandleByVal( CCState *State, unsigned &size, unsigned Align) const { - unsigned reg = State->AllocateReg(GPRArgRegs, 4); + unsigned reg = State->AllocateReg(GPRArgRegs); assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); @@ -1875,7 +1870,7 @@ ARMTargetLowering::HandleByVal( unsigned AlignInRegs = Align / 4; unsigned Waste = (ARM::R4 - reg) % AlignInRegs; for (unsigned i = 0; i < Waste; ++i) - reg = State->AllocateReg(GPRArgRegs, 4); + reg = State->AllocateReg(GPRArgRegs); } if (reg != 0) { unsigned excess = 4 * (ARM::R4 - reg); @@ -1886,7 +1881,7 @@ ARMTargetLowering::HandleByVal( // remained registers. const unsigned NSAAOffset = State->getNextStackOffset(); if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { - while (State->AllocateReg(GPRArgRegs, 4)) + while (State->AllocateReg(GPRArgRegs)) ; return; } @@ -1903,7 +1898,7 @@ ARMTargetLowering::HandleByVal( // Note, first register is allocated in the beginning of function already, // allocate remained amount of registers we need. for (unsigned i = reg+1; i != ByValRegEnd; ++i) - State->AllocateReg(GPRArgRegs, 4); + State->AllocateReg(GPRArgRegs); // A byval parameter that is split between registers and memory needs its // size truncated here. // In the case where the entire structure fits in registers, we set the @@ -2025,7 +2020,9 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // cannot rely on the linker replacing the tail call with a return. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = G->getGlobal(); - if (GV->hasExternalWeakLinkage()) + const Triple TT(getTargetMachine().getTargetTriple()); + if (GV->hasExternalWeakLinkage() && + (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) return false; } @@ -2084,8 +2081,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = - getTargetMachine().getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { @@ -2837,16 +2833,11 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, NumGPRs = REnd - RBegin; } else { unsigned int firstUnalloced; - firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, - sizeof(GPRArgRegs) / - sizeof(GPRArgRegs[0])); + firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs); NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; } - unsigned Align = MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment(); + unsigned Align = Subtarget->getFrameLowering()->getStackAlignment(); ArgRegsSize = NumGPRs * 4; // If parameter is split between stack and GPRs... @@ -2913,8 +2904,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, firstRegToSaveIndex = RBegin - ARM::R0; lastRegToSaveIndex = REnd - ARM::R0; } else { - firstRegToSaveIndex = CCInfo.getFirstUnallocated - (GPRArgRegs, array_lengthof(GPRArgRegs)); + firstRegToSaveIndex = CCInfo.getFirstUnallocated(GPRArgRegs); lastRegToSaveIndex = 4; } @@ -3087,8 +3077,11 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); - CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; + if (Ins[VA.getValNo()].isOrigArg()) { + std::advance(CurOrigArg, + Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx); + CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex(); + } // Arguments stored in registers. if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); @@ -3129,9 +3122,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, else if (RegVT == MVT::v2f64) RC = &ARM::QPRRegClass; else if (RegVT == MVT::i32) - RC = AFI->isThumb1OnlyFunction() ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; + RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass + : &ARM::GPRRegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); @@ -3169,7 +3161,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.isMemLoc()); assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); - int index = ArgLocs[i].getValNo(); + int index = VA.getValNo(); // Some Ins[] entries become multiple ArgLoc[] entries. // Process them only once. @@ -3182,6 +3174,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { + assert(Ins[index].isOrigArg() && + "Byval arguments cannot be implicit"); unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); @@ -3596,8 +3590,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // inverting the compare condition, swapping 'less' and 'greater') and // sometimes need to swap the operands to the VSEL (which inverts the // condition in the sense of firing whenever the previous condition didn't) - if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || - TrueVal.getValueType() == MVT::f64)) { + if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { ARMCC::CondCodes CondCode = IntCCToARMCC(CC); if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || CondCode == ARMCC::VC || CondCode == ARMCC::NE) { @@ -3616,8 +3610,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { FPCCToARMCC(CC, CondCode, CondCode2); // Try to generate VSEL on ARMv8. - if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || - TrueVal.getValueType() == MVT::f64)) { + if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { // We can select VMAXNM/VMINNM from a compare followed by a select with the // same operands, as follows: // c = fcmp [ogt, olt, ugt, ult] a, b @@ -4483,6 +4477,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); + EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); SDLoc dl(Op); @@ -4512,8 +4507,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { TmpOp0 = Op0; TmpOp1 = Op1; Opc = ISD::OR; - Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); - Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); + Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); + Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); break; case ISD::SETUO: Invert = true; // Fallthrough case ISD::SETO: @@ -4521,8 +4516,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { TmpOp0 = Op0; TmpOp1 = Op1; Opc = ISD::OR; - Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); - Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); + Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); + Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1); break; } } else { @@ -4556,8 +4551,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { Opc = ARMISD::VTST; - Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); - Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); + Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); + Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1)); Invert = !Invert; } } @@ -4583,22 +4578,24 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (SingleOp.getNode()) { switch (Opc) { case ARMISD::VCEQ: - Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; + Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGE: - Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; + Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCLEZ: - Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; + Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGT: - Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; + Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break; case ARMISD::VCLTZ: - Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; + Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break; default: - Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); } } else { - Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); } + Result = DAG.getSExtOrTrunc(Result, dl, VT); + if (Invert) Result = DAG.getNOT(dl, Result, VT); @@ -6497,8 +6494,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, void ARMTargetLowering:: SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { - const TargetInstrInfo *TII = - getTargetMachine().getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -6515,9 +6511,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); - const TargetRegisterClass *TRC = isThumb ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass + : &ARM::GPRRegClass; // Grab constant pool and fixed stack memory operands. MachineMemOperand *CPMMO = @@ -6613,8 +6608,7 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *ARMTargetLowering:: EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { - const TargetInstrInfo *TII = - getTargetMachine().getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -6622,9 +6616,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineFrameInfo *MFI = MF->getFrameInfo(); int FI = MFI->getFunctionContextIndex(); - const TargetRegisterClass *TRC = Subtarget->isThumb() ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRnopcRegClass; + const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass + : &ARM::GPRnopcRegClass; // Get a mapping of the call site numbers to all of the landing pads they're // associated with. @@ -7129,8 +7122,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. - const TargetInstrInfo *TII = - getTargetMachine().getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; @@ -7156,9 +7148,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, UnitSize = 2; } else { // Check whether we can use NEON instructions. - if (!MF->getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoImplicitFloat) && + if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) UnitSize = 16; @@ -7172,14 +7162,11 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Select the correct opcode and register class for unit size load/store bool IsNeon = UnitSize >= 8; - TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass - : (const TargetRegisterClass *)&ARM::GPRRegClass; + TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass; if (IsNeon) - VecTRC = UnitSize == 16 - ? (const TargetRegisterClass *)&ARM::DPairRegClass - : UnitSize == 8 - ? (const TargetRegisterClass *)&ARM::DPRRegClass - : nullptr; + VecTRC = UnitSize == 16 ? &ARM::DPairRegClass + : UnitSize == 8 ? &ARM::DPRRegClass + : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -7364,7 +7351,7 @@ MachineBasicBlock * ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, MachineBasicBlock *MBB) const { const TargetMachine &TM = getTargetMachine(); - const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); assert(Subtarget->isTargetWindows() && @@ -7429,8 +7416,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = - getTargetMachine().getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { @@ -7627,9 +7613,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class - unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); + unsigned NewRsbDstReg = + MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, @@ -7694,8 +7679,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); if (NewOpc) { - const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( - getTargetMachine().getSubtargetImpl()->getInstrInfo()); + const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo(); MCID = &TII->get(NewOpc); assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && @@ -8059,29 +8043,35 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, else IsLeftOperandMUL = true; if (MULOp == SDValue()) - return SDValue(); + return SDValue(); // Figure out the right opcode. unsigned Opc = MULOp->getOpcode(); unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; // Figure out the high and low input values to the MLAL node. - SDValue* HiMul = &MULOp; SDValue* HiAdd = nullptr; SDValue* LoMul = nullptr; SDValue* LowAdd = nullptr; + // Ensure that ADDE is from high result of ISD::SMUL_LOHI. + if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1))) + return SDValue(); + if (IsLeftOperandMUL) HiAdd = &AddeOp1; else HiAdd = &AddeOp0; - if (AddcOp0->getOpcode() == Opc) { + // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node + // whose low result is fed to the ADDC we are checking. + + if (AddcOp0 == MULOp.getValue(0)) { LoMul = &AddcOp0; LowAdd = &AddcOp1; } - if (AddcOp1->getOpcode() == Opc) { + if (AddcOp1 == MULOp.getValue(0)) { LoMul = &AddcOp1; LowAdd = &AddcOp0; } @@ -8089,9 +8079,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, if (!LoMul) return SDValue(); - if (LoMul->getNode() != HiMul->getNode()) - return SDValue(); - // Create the merged node. SelectionDAG &DAG = DCI.DAG; @@ -8583,7 +8570,10 @@ static SDValue PerformBFICombine(SDNode *N, unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); unsigned LSB = countTrailingZeros(~InvMask); unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; - unsigned Mask = (1 << Width)-1; + assert(Width < + static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && + "undefined behavior"); + unsigned Mask = (1u << Width) - 1; unsigned Mask2 = N11C->getZExtValue(); if ((Mask & (~Mask2)) == 0) return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), @@ -8655,147 +8645,6 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -/// PerformSTORECombine - Target-specific dag combine xforms for -/// ISD::STORE. -static SDValue PerformSTORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - StoreSDNode *St = cast<StoreSDNode>(N); - if (St->isVolatile()) - return SDValue(); - - // Optimize trunc store (of multiple scalars) to shuffle and store. First, - // pack all of the elements in one place. Next, store to memory in fewer - // chunks. - SDValue StVal = St->getValue(); - EVT VT = StVal.getValueType(); - if (St->isTruncatingStore() && VT.isVector()) { - SelectionDAG &DAG = DCI.DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT StVT = St->getMemoryVT(); - unsigned NumElems = VT.getVectorNumElements(); - assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); - unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); - - // From, To sizes and ElemCount must be pow of two - if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); - - // We are going to use the original vector elt for storing. - // Accumulated smaller vector elements must be a multiple of the store size. - if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); - - unsigned SizeRatio = FromEltSz / ToEltSz; - assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), - NumElems*SizeRatio); - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - SDLoc DL(St); - SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); - SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; ++i) - ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; - - // Can't shuffle using an illegal type. - if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); - - SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, - DAG.getUNDEF(WideVec.getValueType()), - ShuffleVec.data()); - // At this point all of the data is stored at the bottom of the - // register. We now need to save it to mem. - - // Find the largest store unit - MVT StoreType = MVT::i8; - for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; - tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { - MVT Tp = (MVT::SimpleValueType)tp; - if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) - StoreType = Tp; - } - // Didn't find a legal store type. - if (!TLI.isTypeLegal(StoreType)) - return SDValue(); - - // Bitcast the original vector into a vector of store-size units - EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), - StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); - assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); - SmallVector<SDValue, 8> Chains; - SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, - TLI.getPointerTy()); - SDValue BasePtr = St->getBasePtr(); - - // Perform one or more big stores into memory. - unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); - for (unsigned I = 0; I < E; I++) { - SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - StoreType, ShuffWide, - DAG.getIntPtrConstant(I)); - SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, - St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); - BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, - Increment); - Chains.push_back(Ch); - } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); - } - - if (!ISD::isNormalStore(St)) - return SDValue(); - - // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and - // ARM stores of arguments in the same cache line. - if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && - StVal.getNode()->hasOneUse()) { - SelectionDAG &DAG = DCI.DAG; - bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); - SDLoc DL(St); - SDValue BasePtr = St->getBasePtr(); - SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), - BasePtr, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); - - SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, - DAG.getConstant(4, MVT::i32)); - return DAG.getStore(NewST1.getValue(0), DL, - StVal.getNode()->getOperand(isBigEndian ? 0 : 1), - OffsetPtr, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), - std::min(4U, St->getAlignment() / 2)); - } - - if (StVal.getValueType() != MVT::i64 || - StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return SDValue(); - - // Bitcast an i64 store extracted from a vector to f64. - // Otherwise, the i64 value will be legalized to a pair of i32 values. - SelectionDAG &DAG = DCI.DAG; - SDLoc dl(StVal); - SDValue IntVec = StVal.getOperand(0); - EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, - IntVec.getValueType().getVectorNumElements()); - SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); - SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - Vec, StVal.getOperand(1)); - dl = SDLoc(N); - SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); - // Make the DAGCombiner fold the bitcasts. - DCI.AddToWorklist(Vec.getNode()); - DCI.AddToWorklist(ExtElt.getNode()); - DCI.AddToWorklist(V.getNode()); - return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), - St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment(), - St->getAAInfo()); -} - /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node /// are normal, non-volatile loads. If so, it is profitable to bitcast an /// i64 vector to have f64 elements, since the value can then be loaded @@ -9016,18 +8865,20 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { DAG.getUNDEF(VT), NewMask.data()); } -/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and -/// NEON load/store intrinsics to merge base address updates. +/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, +/// NEON load/store intrinsics, and generic vector load/stores, to merge +/// base address updates. +/// For generic load/stores, the memory type is assumed to be a vector. +/// The caller is assumed to have checked legality. static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) - return SDValue(); - SelectionDAG &DAG = DCI.DAG; - bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || - N->getOpcode() == ISD::INTRINSIC_W_CHAIN); - unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); + const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || + N->getOpcode() == ISD::INTRINSIC_W_CHAIN); + const bool isStore = N->getOpcode() == ISD::STORE; + const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1); SDValue Addr = N->getOperand(AddrOpIdx); + MemSDNode *MemN = cast<MemSDNode>(N); // Search for a use of the address operand that is an increment. for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), @@ -9043,7 +8894,7 @@ static SDValue CombineBaseUpdate(SDNode *N, continue; // Find the new opcode for the updating load/store. - bool isLoad = true; + bool isLoadOp = true; bool isLaneOp = false; unsigned NewOpc = 0; unsigned NumVecs = 0; @@ -9066,19 +8917,19 @@ static SDValue CombineBaseUpdate(SDNode *N, case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; NumVecs = 4; isLaneOp = true; break; case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; - NumVecs = 1; isLoad = false; break; + NumVecs = 1; isLoadOp = false; break; case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; - NumVecs = 2; isLoad = false; break; + NumVecs = 2; isLoadOp = false; break; case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; - NumVecs = 3; isLoad = false; break; + NumVecs = 3; isLoadOp = false; break; case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; - NumVecs = 4; isLoad = false; break; + NumVecs = 4; isLoadOp = false; break; case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; - NumVecs = 2; isLoad = false; isLaneOp = true; break; + NumVecs = 2; isLoadOp = false; isLaneOp = true; break; case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; - NumVecs = 3; isLoad = false; isLaneOp = true; break; + NumVecs = 3; isLoadOp = false; isLaneOp = true; break; case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; - NumVecs = 4; isLoad = false; isLaneOp = true; break; + NumVecs = 4; isLoadOp = false; isLaneOp = true; break; } } else { isLaneOp = true; @@ -9087,15 +8938,24 @@ static SDValue CombineBaseUpdate(SDNode *N, case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; + case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD; + NumVecs = 1; isLaneOp = false; break; + case ISD::STORE: NewOpc = ARMISD::VST1_UPD; + NumVecs = 1; isLaneOp = false; isLoadOp = false; break; } } // Find the size of memory referenced by the load/store. EVT VecTy; - if (isLoad) + if (isLoadOp) { VecTy = N->getValueType(0); - else + } else if (isIntrinsic) { VecTy = N->getOperand(AddrOpIdx+1).getValueType(); + } else { + assert(isStore && "Node has to be a load, a store, or an intrinsic!"); + VecTy = N->getOperand(1).getValueType(); + } + unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; if (isLaneOp) NumBytes /= VecTy.getVectorNumElements(); @@ -9112,32 +8972,99 @@ static SDValue CombineBaseUpdate(SDNode *N, continue; } + // OK, we found an ADD we can fold into the base update. + // Now, create a _UPD node, taking care of not breaking alignment. + + EVT AlignedVecTy = VecTy; + unsigned Alignment = MemN->getAlignment(); + + // If this is a less-than-standard-aligned load/store, change the type to + // match the standard alignment. + // The alignment is overlooked when selecting _UPD variants; and it's + // easier to introduce bitcasts here than fix that. + // There are 3 ways to get to this base-update combine: + // - intrinsics: they are assumed to be properly aligned (to the standard + // alignment of the memory type), so we don't need to do anything. + // - ARMISD::VLDx nodes: they are only generated from the aforementioned + // intrinsics, so, likewise, there's nothing to do. + // - generic load/store instructions: the alignment is specified as an + // explicit operand, rather than implicitly as the standard alignment + // of the memory type (like the intrisics). We need to change the + // memory type to match the explicit alignment. That way, we don't + // generate non-standard-aligned ARMISD::VLDx nodes. + if (isa<LSBaseSDNode>(N)) { + if (Alignment == 0) + Alignment = 1; + if (Alignment < VecTy.getScalarSizeInBits() / 8) { + MVT EltTy = MVT::getIntegerVT(Alignment * 8); + assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); + assert(!isLaneOp && "Unexpected generic load/store lane."); + unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); + AlignedVecTy = MVT::getVectorVT(EltTy, NumElts); + } + // Don't set an explicit alignment on regular load/stores that we want + // to transform to VLD/VST 1_UPD nodes. + // This matches the behavior of regular load/stores, which only get an + // explicit alignment if the MMO alignment is larger than the standard + // alignment of the memory type. + // Intrinsics, however, always get an explicit alignment, set to the + // alignment of the MMO. + Alignment = 1; + } + // Create the new updating load/store node. + // First, create an SDVTList for the new updating node's results. EVT Tys[6]; - unsigned NumResultVecs = (isLoad ? NumVecs : 0); + unsigned NumResultVecs = (isLoadOp ? NumVecs : 0); unsigned n; for (n = 0; n < NumResultVecs; ++n) - Tys[n] = VecTy; + Tys[n] = AlignedVecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); + + // Then, gather the new node's operands. SmallVector<SDValue, 8> Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); Ops.push_back(Inc); - for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { - Ops.push_back(N->getOperand(i)); + + if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) { + // Try to match the intrinsic's signature + Ops.push_back(StN->getValue()); + } else { + // Loads (and of course intrinsics) match the intrinsics' signature, + // so just add all but the alignment operand. + for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i) + Ops.push_back(N->getOperand(i)); + } + + // For all node types, the alignment operand is always the last one. + Ops.push_back(DAG.getConstant(Alignment, MVT::i32)); + + // If this is a non-standard-aligned STORE, the penultimate operand is the + // stored value. Bitcast it to the aligned type. + if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) { + SDValue &StVal = Ops[Ops.size()-2]; + StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal); } - MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); + SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops, MemInt->getMemoryVT(), - MemInt->getMemOperand()); + Ops, AlignedVecTy, + MemN->getMemOperand()); // Update the uses. - std::vector<SDValue> NewResults; - for (unsigned i = 0; i < NumResultVecs; ++i) { + SmallVector<SDValue, 5> NewResults; + for (unsigned i = 0; i < NumResultVecs; ++i) NewResults.push_back(SDValue(UpdN.getNode(), i)); + + // If this is an non-standard-aligned LOAD, the first result is the loaded + // value. Bitcast it to the expected result type. + if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) { + SDValue &LdVal = NewResults[0]; + LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal); } + NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain DCI.CombineTo(N, NewResults); DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); @@ -9147,6 +9074,14 @@ static SDValue CombineBaseUpdate(SDNode *N, return SDValue(); } +static SDValue PerformVLDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + return CombineBaseUpdate(N, DCI); +} + /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and @@ -9260,6 +9195,164 @@ static SDValue PerformVDUPLANECombine(SDNode *N, return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } +static SDValue PerformLOADCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + + // If this is a legal vector load, try to combine it into a VLD1_UPD. + if (ISD::isNormalLoad(N) && VT.isVector() && + DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return CombineBaseUpdate(N, DCI); + + return SDValue(); +} + +/// PerformSTORECombine - Target-specific dag combine xforms for +/// ISD::STORE. +static SDValue PerformSTORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + StoreSDNode *St = cast<StoreSDNode>(N); + if (St->isVolatile()) + return SDValue(); + + // Optimize trunc store (of multiple scalars) to shuffle and store. First, + // pack all of the elements in one place. Next, store to memory in fewer + // chunks. + SDValue StVal = St->getValue(); + EVT VT = StVal.getValueType(); + if (St->isTruncatingStore() && VT.isVector()) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT StVT = St->getMemoryVT(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); + unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); + + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); + + unsigned SizeRatio = FromEltSz / ToEltSz; + assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle. + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems*SizeRatio); + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + SDLoc DL(St); + SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; ++i) + ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; + + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, + DAG.getUNDEF(WideVec.getValueType()), + ShuffleVec.data()); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. + + // Find the largest store unit + MVT StoreType = MVT::i8; + for (MVT Tp : MVT::integer_valuetypes()) { + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) + StoreType = Tp; + } + // Didn't find a legal store type. + if (!TLI.isTypeLegal(StoreType)) + return SDValue(); + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), + StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); + SmallVector<SDValue, 8> Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, + TLI.getPointerTy()); + SDValue BasePtr = St->getBasePtr(); + + // Perform one or more big stores into memory. + unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); + for (unsigned I = 0; I < E; I++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + StoreType, ShuffWide, + DAG.getIntPtrConstant(I)); + SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + Increment); + Chains.push_back(Ch); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } + + if (!ISD::isNormalStore(St)) + return SDValue(); + + // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and + // ARM stores of arguments in the same cache line. + if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && + StVal.getNode()->hasOneUse()) { + SelectionDAG &DAG = DCI.DAG; + bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); + SDLoc DL(St); + SDValue BasePtr = St->getBasePtr(); + SDValue NewST1 = DAG.getStore(St->getChain(), DL, + StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), + BasePtr, St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + + SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, + DAG.getConstant(4, MVT::i32)); + return DAG.getStore(NewST1.getValue(0), DL, + StVal.getNode()->getOperand(isBigEndian ? 0 : 1), + OffsetPtr, St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), + std::min(4U, St->getAlignment() / 2)); + } + + if (StVal.getValueType() == MVT::i64 && + StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + + // Bitcast an i64 store extracted from a vector to f64. + // Otherwise, the i64 value will be legalized to a pair of i32 values. + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(StVal); + SDValue IntVec = StVal.getOperand(0); + EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, + IntVec.getValueType().getVectorNumElements()); + SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); + SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, + Vec, StVal.getOperand(1)); + dl = SDLoc(N); + SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(Vec.getNode()); + DCI.AddToWorklist(ExtElt.getNode()); + DCI.AddToWorklist(V.getNode()); + return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment(), + St->getAAInfo()); + } + + // If this is a legal vector store, try to combine it into a VST1_UPD. + if (ISD::isNormalStore(N) && VT.isVector() && + DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return CombineBaseUpdate(N, DCI); + + return SDValue(); +} + // isConstVecPow2 - Return true if each vector element is a power of 2, all // elements are the same constant, C, and Log2(C) ranges from 1 to 32. static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) @@ -9316,16 +9409,18 @@ static SDValue PerformVCVTCombine(SDNode *N, MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); - if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 || + NumLanes > 4) { // These instructions only exist converting from f32 to i32. We can handle // smaller integers by generating an extra truncate, but larger ones would - // be lossy. + // be lossy. We also can't handle more then 4 lanes, since these intructions + // only support v2i32/v4i32 types. return SDValue(); } unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - unsigned NumLanes = Op.getValueType().getVectorNumElements(); SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, @@ -9848,10 +9943,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); + case ISD::LOAD: return PerformLOADCombine(N, DCI); case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: - return CombineBaseUpdate(N, DCI); + return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); case ISD::INTRINSIC_VOID: @@ -9871,7 +9967,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: - return CombineBaseUpdate(N, DCI); + return PerformVLDCombine(N, DCI); default: break; } break; @@ -9934,10 +10030,8 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, const Function *F = MF.getFunction(); // See if we can use NEON instructions for this... - if ((!IsMemset || ZeroMemset) && - Subtarget->hasNEON() && - !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoImplicitFloat)) { + if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && + !F->hasFnAttribute(Attribute::NoImplicitFloat)) { bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || @@ -10535,7 +10629,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight( typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; RCPair -ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, +ARMTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters @@ -10581,7 +10676,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, if (StringRef("{cc}").equals_lower(Constraint)) return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops @@ -10861,11 +10956,7 @@ bool ARM::isBitFieldInvertedMask(unsigned v) { // there can be 1's on either or both "outsides", all the "inside" // bits must be 0's - unsigned TO = CountTrailingOnes_32(v); - unsigned LO = CountLeadingOnes_32(v); - v = (v >> TO) << TO; - v = (v << LO) >> LO; - return v == 0; + return isShiftedMask_32(~v); } /// isFPImmLegal - Returns true if the target can instruction select the @@ -11114,7 +11205,7 @@ bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { // This has so far only been implemented for MachO. bool ARMTargetLowering::useLoadStackGuardNode() const { - return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO; + return Subtarget->isTargetMachO(); } bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, @@ -11274,7 +11365,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, return (Members > 0 && Members <= 4); } -/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. +/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of +/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when +/// passing according to AAPCS rules. bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { if (getEffectiveCallingConv(CallConv, isVarArg) != @@ -11283,7 +11376,9 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( HABaseType Base = HA_UNKNOWN; uint64_t Members = 0; - bool result = isHomogeneousAggregate(Ty, Base, Members); - DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump()); - return result; + bool IsHA = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump()); + + bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); + return IsHA || IsIntArray; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 89b0c31..ec1407d 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -232,7 +232,8 @@ namespace llvm { class ARMTargetLowering : public TargetLowering { public: - explicit ARMTargetLowering(const TargetMachine &TM); + explicit ARMTargetLowering(const TargetMachine &TM, + const ARMSubtarget &STI); unsigned getJumpTableEncoding() const override; @@ -332,9 +333,10 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const override; - std::pair<unsigned, const TargetRegisterClass*> - getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const override; + std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, + MVT VT) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is @@ -352,10 +354,6 @@ namespace llvm { /// specified value type. const TargetRegisterClass *getRegClassFor(MVT VT) const override; - /// getMaximalGlobalOffset - Returns the maximal possible offset which can - /// be used for loads / stores from the global. - unsigned getMaximalGlobalOffset() const override; - /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. @@ -414,8 +412,9 @@ namespace llvm { unsigned &Cost) const override; protected: - std::pair<const TargetRegisterClass*, uint8_t> - findRepresentativeClass(MVT VT) const override; + std::pair<const TargetRegisterClass *, uint8_t> + findRepresentativeClass(const TargetRegisterInfo *TRI, + MVT VT) const override; private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 17d1ffa..bc617f0 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -93,7 +93,7 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, Reloc::Model RM) const { MachineFunction &MF = *MI->getParent()->getParent(); - const ARMSubtarget &Subtarget = MF.getTarget().getSubtarget<ARMSubtarget>(); + const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>(); if (!Subtarget.useMovt(MF)) { if (RM == Reloc::PIC_) @@ -144,21 +144,20 @@ namespace { ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (AFI->getGlobalBaseReg() == 0) return false; - - const ARMTargetMachine *TM = - static_cast<const ARMTargetMachine *>(&MF.getTarget()); - if (TM->getRelocationModel() != Reloc::PIC_) + const ARMSubtarget &STI = + static_cast<const ARMSubtarget &>(MF.getSubtarget()); + const TargetMachine &TM = MF.getTarget(); + if (TM.getRelocationModel() != Reloc::PIC_) return false; LLVMContext *Context = &MF.getFunction()->getContext(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - unsigned PCAdj = TM->getSubtarget<ARMSubtarget>().isThumb() ? 4 : 8; + unsigned PCAdj = STI.isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj); - unsigned Align = - TM->getSubtargetImpl()->getDataLayout()->getPrefTypeAlignment( - Type::getInt32PtrTy(*Context)); + unsigned Align = TM.getDataLayout()->getPrefTypeAlignment( + Type::getInt32PtrTy(*Context)); unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); MachineBasicBlock &FirstMBB = MF.front(); @@ -166,9 +165,8 @@ namespace { DebugLoc DL = FirstMBB.findDebugLoc(MBBI); unsigned TempReg = MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); - unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? - ARM::t2LDRpci : ARM::LDRcp; - const TargetInstrInfo &TII = *TM->getSubtargetImpl()->getInstrInfo(); + unsigned Opc = STI.isThumb2() ? ARM::t2LDRpci : ARM::LDRcp; + const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), TempReg) .addConstantPoolIndex(Idx); @@ -178,15 +176,13 @@ namespace { // Fix the GOT address by adding pc. unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); - Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? ARM::tPICADD - : ARM::PICADD; + Opc = STI.isThumb2() ? ARM::tPICADD : ARM::PICADD; MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg) .addReg(TempReg) .addImm(ARMPCLabelIndex); if (Opc == ARM::PICADD) AddDefaultPred(MIB); - return true; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 3177114..126c552 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -263,8 +263,6 @@ def IsNotMClass : Predicate<"!Subtarget->isMClass()">, "!armv*m">; def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb", "arm-mode">; -def IsIOS : Predicate<"Subtarget->isTargetIOS()">; -def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; def IsMachO : Predicate<"Subtarget->isTargetMachO()">; def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; @@ -333,24 +331,6 @@ def imm16_31 : ImmLeaf<i32, [{ return (int32_t)Imm >= 16 && (int32_t)Imm < 32; }]>; -def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } -def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - unsigned Value = -(unsigned)N->getZExtValue(); - return Value && ARM_AM::getSOImmVal(Value) != -1; - }], imm_neg_XFORM> { - let ParserMatchClass = so_imm_neg_asmoperand; -} - -// Note: this pattern doesn't require an encoder method and such, as it's -// only used on aliases (Pat<> and InstAlias<>). The actual encoding -// is handled by the destination instructions, which use so_imm. -def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; } -def so_imm_not : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; - }], imm_not_XFORM> { - let ParserMatchClass = so_imm_not_asmoperand; -} - // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. def sext_16_node : PatLeaf<(i32 GPR:$a), [{ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; @@ -530,7 +510,7 @@ def shift_imm : Operand<i32> { let ParserMatchClass = ShifterImmAsmOperand; } -// shifter_operand operands: so_reg_reg, so_reg_imm, and so_imm. +// shifter_operand operands: so_reg_reg, so_reg_imm, and mod_imm. def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; } def so_reg_reg : Operand<i32>, // reg reg imm ComplexPattern<i32, 3, "SelectRegShifterOperand", @@ -575,27 +555,43 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm let MIOperandInfo = (ops GPR, i32imm); } - -// so_imm - Match a 32-bit shifter_operand immediate operand, which is an -// 8-bit immediate rotated by an arbitrary number of bits. -def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; } -def so_imm : Operand<i32>, ImmLeaf<i32, [{ +// mod_imm: match a 32-bit immediate operand, which can be encoded into +// a 12-bit immediate; an 8-bit integer and a 4-bit rotator (See ARMARM +// - "Modified Immediate Constants"). Within the MC layer we keep this +// immediate in its encoded form. +def ModImmAsmOperand: AsmOperandClass { + let Name = "ModImm"; + let ParserMethod = "parseModImm"; +} +def mod_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getSOImmVal(Imm) != -1; }]> { - let EncoderMethod = "getSOImmOpValue"; - let ParserMatchClass = SOImmAsmOperand; - let DecoderMethod = "DecodeSOImmOperand"; + let EncoderMethod = "getModImmOpValue"; + let PrintMethod = "printModImmOperand"; + let ParserMatchClass = ModImmAsmOperand; } -// Break so_imm's up into two pieces. This handles immediates with up to 16 -// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to -// get the first/second pieces. -def so_imm2part : PatLeaf<(imm), [{ - return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); -}]>; +// Note: the patterns mod_imm_not and mod_imm_neg do not require an encoder +// method and such, as they are only used on aliases (Pat<> and InstAlias<>). +// The actual parsing, encoding, decoding are handled by the destination +// instructions, which use mod_imm. -/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true. -/// +def ModImmNotAsmOperand : AsmOperandClass { let Name = "ModImmNot"; } +def mod_imm_not : Operand<i32>, PatLeaf<(imm), [{ + return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; + }], imm_not_XFORM> { + let ParserMatchClass = ModImmNotAsmOperand; +} + +def ModImmNegAsmOperand : AsmOperandClass { let Name = "ModImmNeg"; } +def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{ + unsigned Value = -(unsigned)N->getZExtValue(); + return Value && ARM_AM::getSOImmVal(Value) != -1; + }], imm_neg_XFORM> { + let ParserMatchClass = ModImmNegAsmOperand; +} + +/// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal() def arm_i32imm : PatLeaf<(imm), [{ if (Subtarget->useMovt(*MF)) return true; @@ -1204,7 +1200,7 @@ include "ARMInstrFormats.td" // Multiclass helpers... // -/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a +/// AsI1_bin_irs - Defines a set of (op r, {mod_imm|r|so_reg}) patterns for a /// binop that produces a value. let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AsI1_bin_irs<bits<4> opcod, string opc, @@ -1213,9 +1209,9 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, + [(set GPR:$Rd, (opnode GPR:$Rn, mod_imm:$imm))]>, Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; @@ -1286,9 +1282,9 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>, + [(set GPR:$Rd, (opnode mod_imm:$imm, GPR:$Rn))]>, Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; @@ -1356,9 +1352,9 @@ let hasPostISelHook = 1, Defs = [CPSR] in { multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, bit Commutable = 0> { - def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p), 4, iii, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm))]>, Sched<[WriteALU, ReadALU]>; def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p), @@ -1389,9 +1385,9 @@ let hasPostISelHook = 1, Defs = [CPSR] in { multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, bit Commutable = 0> { - def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p), 4, iii, - [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>, + [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn))]>, Sched<[WriteALU, ReadALU]>; def rsi : ARMPseudoInst<(outs GPR:$Rd), @@ -1410,16 +1406,16 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, } } -/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test +/// AI1_cmp_irs - Defines a set of (op r, {mod_imm|r|so_reg}) cmp / test /// patterns. Similar to AsI1_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. let isCompare = 1, Defs = [CPSR] in { multiclass AI1_cmp_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, bit Commutable = 0> { - def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, + def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii, opc, "\t$Rn, $imm", - [(opnode GPR:$Rn, so_imm:$imm)]>, + [(opnode GPR:$Rn, mod_imm:$imm)]>, Sched<[WriteCMP, ReadALU]> { bits<4> Rn; bits<12> imm; @@ -1547,9 +1543,9 @@ let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm, CPSR))]>, Requires<[IsARM]>, Sched<[WriteALU, ReadALU]> { bits<4> Rd; @@ -1617,9 +1613,9 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>, + [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn, CPSR))]>, Requires<[IsARM]>, Sched<[WriteALU, ReadALU]> { bits<4> Rd; @@ -1813,7 +1809,7 @@ multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii, /// the function. The first operand is the ID# for this instruction, the second /// is the index into the MachineConstantPool that this is, the third is the /// size in bytes of this constant pool entry. -let neverHasSideEffects = 1, isNotDuplicable = 1 in +let hasSideEffects = 0, isNotDuplicable = 1 in def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), NoItinerary, []>; @@ -2057,7 +2053,7 @@ def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), // LEApcrel - Load a pc-relative address into a register without offending the // assembler. -let neverHasSideEffects = 1, isReMaterializable = 1 in +let hasSideEffects = 0, isReMaterializable = 1 in // The 'adr' mnemonic encodes differently if the label is before or after // the instruction. The {24-21} opcode bits are set by the fixup, as we don't // know until then which form of the instruction will be used. @@ -2387,6 +2383,33 @@ def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> { let Inst{24-23} = 0b11; } +// Hypervisor Call is a system instruction +let isCall = 1 in { +def HVC : AInoP< (outs), (ins imm0_65535:$imm), BrFrm, NoItinerary, + "hvc", "\t$imm", []>, + Requires<[IsARM, HasVirtualization]> { + bits<16> imm; + + // Even though HVC isn't predicable, it's encoding includes a condition field. + // The instruction is undefined if the condition field is 0xf otherwise it is + // unpredictable if it isn't condition AL (0xe). + let Inst{31-28} = 0b1110; + let Unpredictable{31-28} = 0b1111; + let Inst{27-24} = 0b0001; + let Inst{23-20} = 0b0100; + let Inst{19-8} = imm{15-4}; + let Inst{7-4} = 0b0111; + let Inst{3-0} = imm{3-0}; +} +} + +// Return from exception in Hypervisor mode. +let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in +def ERET : ABI<0b0001, (outs), (ins), NoItinerary, "eret", "", []>, + Requires<[IsARM, HasVirtualization]> { + let Inst{23-0} = 0b011000000000000001101110; +} + //===----------------------------------------------------------------------===// // Load / Store Instructions. // @@ -2404,7 +2427,7 @@ defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, +let canFoldAsLoad = 1, mayLoad = 1, hasSideEffects = 0, isReMaterializable = 1, isCodeGenOnly = 1 in def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr), AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr", @@ -2431,7 +2454,7 @@ def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>, @@ -2508,7 +2531,7 @@ multiclass AI2_ldridx<bit isByte, string opc, } -let mayLoad = 1, neverHasSideEffects = 1 in { +let mayLoad = 1, hasSideEffects = 0 in { // FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or // IIC_iLoad_siu depending on whether it the offset register is shifted. defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>; @@ -2544,7 +2567,7 @@ multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> { } } -let mayLoad = 1, neverHasSideEffects = 1 in { +let mayLoad = 1, hasSideEffects = 0 in { defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>; defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>; defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>; @@ -2577,10 +2600,10 @@ def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), let DecoderMethod = "DecodeAddrMode3Instruction"; } } // hasExtraDefRegAllocReq = 1 -} // mayLoad = 1, neverHasSideEffects = 1 +} // mayLoad = 1, hasSideEffects = 0 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT. -let mayLoad = 1, neverHasSideEffects = 1 in { +let mayLoad = 1, hasSideEffects = 0 in { def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$addr, am2offset_reg:$offset), IndexModePost, LdFrm, IIC_iLoad_ru, @@ -2699,7 +2722,7 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>; // Store doubleword -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr), StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>, Requires<[IsARM, HasV5TE]> { @@ -2772,7 +2795,7 @@ multiclass AI2_stridx<bit isByte, string opc, } } -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, hasSideEffects = 0 in { // FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or // IIC_iStore_siu depending on whether it the offset register is shifted. defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>; @@ -2864,7 +2887,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb), let DecoderMethod = "DecodeAddrMode3Instruction"; } -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr), IndexModePre, StMiscFrm, IIC_iStore_d_ru, @@ -2894,7 +2917,7 @@ def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb), let Inst{3-0} = offset{3-0}; // imm3_0/Rm let DecoderMethod = "DecodeAddrMode3Instruction"; } -} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 // STRT, STRBT, and STRHT @@ -2938,7 +2961,7 @@ def STRBT_POST : ARMAsmPseudo<"strbt${q} $Rt, $addr", (ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>; -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, hasSideEffects = 0 in { def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), IndexModePost, StFrm, IIC_iStore_ru, @@ -3103,17 +3126,18 @@ multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f, } } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m, - IIC_iLoad_mu>; + IIC_iLoad_mu>, ComplexDeprecationPredicate<"ARMLoad">; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m, - IIC_iStore_mu>; + IIC_iStore_mu>, + ComplexDeprecationPredicate<"ARMStore">; -} // neverHasSideEffects +} // hasSideEffects // FIXME: remove when we have a way to marking a MI with these properties. // FIXME: Should pc be an implicit operand like PICADD, etc? @@ -3139,7 +3163,7 @@ defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, // Move Instructions. // -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; @@ -3153,7 +3177,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, } // A version for the smaller set of tail call registers. -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; @@ -3197,8 +3221,8 @@ def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi, - "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP, +def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, IIC_iMOVi, + "mov", "\t$Rd, $imm", [(set GPR:$Rd, mod_imm:$imm)]>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<12> imm; @@ -3408,10 +3432,10 @@ defm RSC : AI1_rsc_irs<0b0111, "rsc", // assume opposite meanings of the carry flag (i.e., carry == !borrow). // See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory // details. -def : ARMPat<(add GPR:$src, so_imm_neg:$imm), - (SUBri GPR:$src, so_imm_neg:$imm)>; -def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm), - (SUBSri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(add GPR:$src, mod_imm_neg:$imm), + (SUBri GPR:$src, mod_imm_neg:$imm)>; +def : ARMPat<(ARMaddc GPR:$src, mod_imm_neg:$imm), + (SUBSri GPR:$src, mod_imm_neg:$imm)>; def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm), (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>, @@ -3423,8 +3447,8 @@ def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. -def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR), - (SBCri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(ARMadde GPR:$src, mod_imm_not:$imm, CPSR), + (SBCri GPR:$src, mod_imm_not:$imm)>; def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR), (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>, Requires<[IsARM, HasV6T2]>; @@ -3705,9 +3729,9 @@ def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift), let Inst{3-0} = shift{3-0}; } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, +def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, IIC_iMVNi, "mvn", "\t$Rd, $imm", - [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> { + [(set GPR:$Rd, mod_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<12> imm; let Inst{25} = 1; @@ -3716,8 +3740,8 @@ def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, let Inst{11-0} = imm; } -def : ARMPat<(and GPR:$src, so_imm_not:$imm), - (BICri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(and GPR:$src, mod_imm_not:$imm), + (BICri GPR:$src, mod_imm_not:$imm)>; //===----------------------------------------------------------------------===// // Multiply Instructions. @@ -3811,7 +3835,7 @@ def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), } // Extra precision multiplies with low / high results -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let isCommutable = 1 in { def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, @@ -3878,7 +3902,7 @@ def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), Requires<[IsARM, NoV6]>; } -} // neverHasSideEffects +} // hasSideEffects // Most significant word multiply def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), @@ -4242,8 +4266,8 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; // ARMcmpZ can re-use the above instruction definitions. -def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm), - (CMPri GPR:$src, so_imm:$imm)>; +def : ARMPat<(ARMcmpZ GPR:$src, mod_imm:$imm), + (CMPri GPR:$src, mod_imm:$imm)>; def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs), (CMPrr GPR:$src, GPR:$rhs)>; def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs), @@ -4253,9 +4277,9 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs), // CMN register-integer let isCompare = 1, Defs = [CPSR] in { -def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi, +def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi, "cmn", "\t$Rn, $imm", - [(ARMcmn GPR:$Rn, so_imm:$imm)]>, + [(ARMcmn GPR:$Rn, mod_imm:$imm)]>, Sched<[WriteCMP, ReadALU]> { bits<4> Rn; bits<12> imm; @@ -4328,11 +4352,11 @@ def CMNzrsr : AI1<0b1011, (outs), } -def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm), - (CMNri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(ARMcmp GPR:$src, mod_imm_neg:$imm), + (CMNri GPR:$src, mod_imm_neg:$imm)>; -def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), - (CMNri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm), + (CMNri GPR:$src, mod_imm_neg:$imm)>; // Note that TST/TEQ don't set all the same flags that CMP does! defm TST : AI1_cmp_irs<0b1000, "tst", @@ -4359,7 +4383,7 @@ def BCCZi64 : PseudoInst<(outs), // Conditional moves -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let isCommutable = 1, isSelect = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), @@ -4396,9 +4420,9 @@ def MOVCCi16 let isMoveImm = 1 in def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm, cmovpred:$p), + (ins GPR:$false, mod_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, - [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, + [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm:$imm, cmovpred:$p))]>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; @@ -4414,13 +4438,13 @@ def MOVCCi32imm let isMoveImm = 1 in def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm, cmovpred:$p), + (ins GPR:$false, mod_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, - [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, + [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm_not:$imm, cmovpred:$p))]>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// @@ -5074,7 +5098,7 @@ def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked), let Inst{23} = 0; let Inst{22} = banked{5}; // R bit - let Inst{21-20} = 0b10; + let Inst{21-20} = 0b00; let Inst{19-16} = banked{3-0}; let Inst{15-12} = Rd; let Inst{11-9} = 0b001; @@ -5103,17 +5127,17 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, let Inst{3-0} = Rn; } -def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, - "msr", "\t$mask, $a", []> { +def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, mod_imm:$imm), NoItinerary, + "msr", "\t$mask, $imm", []> { bits<5> mask; - bits<12> a; + bits<12> imm; let Inst{23} = 0; let Inst{22} = mask{4}; // R bit let Inst{21-20} = 0b10; let Inst{19-16} = mask{3-0}; let Inst{15-12} = 0b1111; - let Inst{11-0} = a; + let Inst{11-0} = imm; } // However, the MSR (banked register) system instruction (ARMv7VE) *does* have a @@ -5204,7 +5228,7 @@ let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), NoItinerary, [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsARM, IsIOS]>; + Requires<[IsARM]>; } // eh.sjlj.dispatchsetup pseudo-instruction. @@ -5228,7 +5252,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in // Large immediate handling. -// 32-bit immediate using two piece so_imms or movw + movt. +// 32-bit immediate using two piece mod_imms or movw + movt. // This is a single pseudo instruction, the benefit is that it can be remat'd // as a single unit instead of having to handle reg inputs. // FIXME: Remove this when we can do generalized remat. @@ -5257,6 +5281,7 @@ def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), (ARMWrapperPIC tglobaladdr:$addr))]>, Requires<[IsARM, DontUseMovt]>; +let AddedComplexity = 10 in def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), NoItinerary, [(set GPR:$dst, @@ -5519,36 +5544,36 @@ def : MnemonicAlias<"uqsubaddx", "uqsax">; // USAX == USUBADDX def : MnemonicAlias<"usubaddx", "usax">; -// "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like +// "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like // for isel. def : ARMInstAlias<"mov${s}${p} $Rd, $imm", - (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; + (MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", - (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; + (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Same for AND <--> BIC def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", - (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", - (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", - (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"and${s}${p} $Rdn, $imm", - (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -// Likewise, "add Rd, so_imm_neg" -> sub +// Likewise, "add Rd, mod_imm_neg" -> sub def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm", - (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>; + (SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"add${s}${p} $Rd, $imm", - (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>; -// Same for CMP <--> CMN via so_imm_neg + (SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; +// Same for CMP <--> CMN via mod_imm_neg def : ARMInstAlias<"cmp${p} $Rd, $imm", - (CMNri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; + (CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; def : ARMInstAlias<"cmn${p} $Rd, $imm", - (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; + (CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; // The shifter forms of the MOV instruction are aliased to the ASR, LSL, // LSR, ROR, and RRX instructions. diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index a0c627c..2a7b4b5 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -665,7 +665,7 @@ class VLDQQQQWBPseudo<InstrItinClass itin> (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, "$addr.addr = $wb, $src = $dst">; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> @@ -1023,7 +1023,7 @@ def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; -} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 // Classes for VLD*LN pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -1106,7 +1106,7 @@ def : Pat<(vector_insert (v4f32 QPR:$src), (f32 (load addrmode6:$addr)), imm:$lane), (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1359,7 +1359,7 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; -} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 // VLD1DUP : Vector Load (single element to all lanes) class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, @@ -1405,7 +1405,7 @@ def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPq32 addrmode6:$addr)>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, @@ -1609,9 +1609,9 @@ def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; -} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { // Classes for VST* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -2025,7 +2025,7 @@ def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; -} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 // Classes for VST*LN pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -2129,7 +2129,7 @@ def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -2351,7 +2351,7 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; -} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 // Use vld1/vst1 for unaligned f64 load / store def : Pat<(f64 (hword_alignedload addrmode6:$addr)), diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index a867844..3c62e0e 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -714,7 +714,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, // // These require base address to be written back or one of the loaded regs. -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops), @@ -754,7 +754,7 @@ def tSTMIA_UPD : Thumb1I<(outs GPR:$wb), let Inst{7-0} = regs; } -} // neverHasSideEffects +} // hasSideEffects def : InstAlias<"ldm${p} $Rn!, $regs", (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>, @@ -888,7 +888,7 @@ def tADDrr : // A8.6.6 T1 "add", "\t$Rd, $Rn, $Rm", [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr, "add", "\t$Rdn, $Rm", []>, T1Special<{0,0,?,?}>, Sched<[WriteALU]> { @@ -1048,7 +1048,7 @@ def : tInstAlias <"movs $Rdn, $imm", // A7-73: MOV(2) - mov setting flag. -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone, 2, IIC_iMOVr, "mov", "\t$Rd, $Rm", "", []>, @@ -1070,7 +1070,7 @@ def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, let Inst{5-3} = Rm; let Inst{2-0} = Rd; } -} // neverHasSideEffects +} // hasSideEffects // Multiply register let isCommutable = 1 in @@ -1248,7 +1248,7 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p), let DecoderMethod = "DecodeThumbAddSpecialReg"; } -let neverHasSideEffects = 1, isReMaterializable = 1 in +let hasSideEffects = 0, isReMaterializable = 1 in def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), 2, IIC_iALUi, []>, Sched<[WriteALU]>; @@ -1297,7 +1297,7 @@ def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone, Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -1375,6 +1375,17 @@ def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), def : T1Pat<(zextloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +// extload from the stack -> word load from the stack, as it avoids having to +// materialize the base in a separate register. This only works when a word +// load puts the byte/halfword value in the same place in the register that the +// byte/halfword load would, i.e. when little-endian. +def : T1Pat<(extloadi1 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>, + Requires<[IsThumb, IsThumb1Only, IsLE]>; +def : T1Pat<(extloadi8 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>, + Requires<[IsThumb, IsThumb1Only, IsLE]>; +def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>, + Requires<[IsThumb, IsThumb1Only, IsLE]>; + // extload -> zextload def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 807c252..10b0a0e 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1185,7 +1185,8 @@ class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> class T2I_exta_rrot_np<bits<3> opcod, string opc> : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot), - IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []> { + IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []>, + Requires<[HasT2ExtractPack, IsThumb2]> { bits<2> rot; let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -1241,7 +1242,7 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let DecoderMethod = "DecodeT2Adr"; } -let neverHasSideEffects = 1, isReMaterializable = 1 in +let hasSideEffects = 0, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; let hasSideEffects = 1 in @@ -1272,12 +1273,12 @@ defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, GPRnopc, UnOpFrag<(sextloadi8 node:$Src)>>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>; -} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr), @@ -1326,7 +1327,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), // Indexed loads -let mayLoad = 1, neverHasSideEffects = 1 in { +let mayLoad = 1, hasSideEffects = 0 in { def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, @@ -1378,7 +1379,7 @@ def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; -} // mayLoad = 1, neverHasSideEffects = 1 +} // mayLoad = 1, hasSideEffects = 0 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110). // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 @@ -1443,14 +1444,14 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; // Indexed stores -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, hasSideEffects = 0 in { def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, @@ -1468,7 +1469,7 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, "strb", "\t$Rt, $addr!", "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; -} // mayStore = 1, neverHasSideEffects = 1 +} // mayStore = 1, hasSideEffects = 0 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), (ins GPRnopc:$Rt, addr_offset_none:$Rn, @@ -1763,7 +1764,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin, } } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>; @@ -1848,14 +1849,14 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin, let mayStore = 1, hasExtraSrcRegAllocReq = 1 in defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// // Move Instructions. // -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, "mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; @@ -2572,7 +2573,7 @@ def t2MLS: T2FourReg< } // Extra precision multiplies with low / high results -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let isCommutable = 1 in { def t2SMULL : T2MulLong<0b000, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), @@ -2603,7 +2604,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110, (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsThumb2, HasThumb2DSP]>; -} // neverHasSideEffects +} // hasSideEffects // Rounding variants of the below included for disassembly only @@ -3150,7 +3151,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; // Conditional moves -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let isCommutable = 1, isSelect = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), @@ -3213,7 +3214,7 @@ def t2MOVCCi32imm RegConstraint<"$false = $dst">; } // isCodeGenOnly = 1 -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// // Atomic operations intrinsics @@ -3824,6 +3825,27 @@ def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary, let Inst{7-0} = imm; } +// Hypervisor Call is a system instruction. +let isCall = 1 in { +def t2HVC : T2XI <(outs), (ins imm0_65535:$imm16), IIC_Br, "hvc.w\t$imm16", []>, + Requires<[IsThumb2, HasVirtualization]>, Sched<[WriteBr]> { + bits<16> imm16; + let Inst{31-20} = 0b111101111110; + let Inst{19-16} = imm16{15-12}; + let Inst{15-12} = 0b1000; + let Inst{11-0} = imm16{11-0}; +} +} + +// Alias for HVC without the ".w" optional width specifier +def : t2InstAlias<"hvc\t$imm16", (t2HVC imm0_65535:$imm16)>; + +// ERET - Return from exception in Hypervisor mode. +// B9.3.3, B9.3.20: ERET is an alias for "SUBS PC, LR, #0" in an implementation that +// includes virtualization extensions. +def t2ERET : InstAlias<"eret${p}", (t2SUBS_PC_LR 0, pred:$p)>, + Requires<[IsThumb2, HasVirtualization]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -4564,17 +4586,21 @@ def : t2InstAlias<"strh${p} $Rt, $addr", (t2STRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; // Extend instruction optional rotate operand. -def : t2InstAlias<"sxtab${p} $Rd, $Rn, $Rm", - (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; -def : t2InstAlias<"sxtah${p} $Rd, $Rn, $Rm", - (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; -def : t2InstAlias<"sxtab16${p} $Rd, $Rn, $Rm", - (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm", + (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm", + (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm", + (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : InstAlias<"sxtb16${p} $Rd, $Rm", + (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; def : t2InstAlias<"sxtb${p} $Rd, $Rm", (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; -def : t2InstAlias<"sxtb16${p} $Rd, $Rm", - (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; def : t2InstAlias<"sxth${p} $Rd, $Rm", (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; def : t2InstAlias<"sxtb${p}.w $Rd, $Rm", @@ -4582,19 +4608,23 @@ def : t2InstAlias<"sxtb${p}.w $Rd, $Rm", def : t2InstAlias<"sxth${p}.w $Rd, $Rm", (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; -def : t2InstAlias<"uxtab${p} $Rd, $Rn, $Rm", - (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; -def : t2InstAlias<"uxtah${p} $Rd, $Rn, $Rm", - (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; -def : t2InstAlias<"uxtab16${p} $Rd, $Rn, $Rm", - (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm", + (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm", + (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm", + (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : InstAlias<"uxtb16${p} $Rd, $Rm", + (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; + def : t2InstAlias<"uxtb${p} $Rd, $Rm", (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; -def : t2InstAlias<"uxtb16${p} $Rd, $Rm", - (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; def : t2InstAlias<"uxth${p} $Rd, $Rm", (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; - def : t2InstAlias<"uxtb${p}.w $Rd, $Rm", (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; def : t2InstAlias<"uxth${p}.w $Rd, $Rm", @@ -4603,15 +4633,17 @@ def : t2InstAlias<"uxth${p}.w $Rd, $Rm", // Extend instruction w/o the ".w" optional width specifier. def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot", (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; -def : t2InstAlias<"uxtb16${p} $Rd, $Rm$rot", - (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; +def : InstAlias<"uxtb16${p} $Rd, $Rm$rot", + (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; def : t2InstAlias<"uxth${p} $Rd, $Rm$rot", (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot", (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; -def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot", - (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; +def : InstAlias<"sxtb16${p} $Rd, $Rm$rot", + (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index d78f2ac..e0a9314 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -194,7 +194,7 @@ multiclass vfp_ldst_mult<string asm, bit L_bit, } } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>; @@ -202,7 +202,7 @@ defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>; -} // neverHasSideEffects +} // hasSideEffects def : MnemonicAlias<"vldm", "vldmia">; def : MnemonicAlias<"vstm", "vstmia">; @@ -769,7 +769,7 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>; @@ -777,7 +777,7 @@ def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>; -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// // FP <-> GPR Copies. Int <-> FP Conversions. @@ -827,7 +827,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, let D = VFPNeonDomain; } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm), IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm", @@ -876,7 +876,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, let D = VFPNeonDomain; let DecoderMethod = "DecodeVMOVRRS"; } -} // neverHasSideEffects +} // hasSideEffects // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR @@ -907,7 +907,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, let isRegSequence = 1; } -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", @@ -1543,7 +1543,7 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), // FP Conditional moves. // -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p), IIC_fpUNA64, [(set (f64 DPR:$Dd), @@ -1555,7 +1555,7 @@ def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), [(set (f32 SPR:$Sd), (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>; -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// // Move from VFP System Register to ARM core register. diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c429ac1..a8d0981 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -170,7 +170,8 @@ static int getMemoryOpOffset(const MachineInstr *MI) { return OffField; // Thumb1 immediate offsets are scaled by 4 - if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || + Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) return OffField * 4; int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) @@ -206,6 +207,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ib: return ARM::STMIB; } case ARM::tLDRi: + case ARM::tLDRspi: // tLDMIA is writeback-only - unless the base register is in the input // reglist. ++NumLDMGened; @@ -214,6 +216,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::tLDMIA; } case ARM::tSTRi: + case ARM::tSTRspi: // There is no non-writeback tSTMIA either. ++NumSTMGened; switch (Mode) { @@ -328,7 +331,7 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { } // end namespace llvm static bool isT1i32Load(unsigned Opc) { - return Opc == ARM::tLDRi; + return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; } static bool isT2i32Load(unsigned Opc) { @@ -340,7 +343,7 @@ static bool isi32Load(unsigned Opc) { } static bool isT1i32Store(unsigned Opc) { - return Opc == ARM::tSTRi; + return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; } static bool isT2i32Store(unsigned Opc) { @@ -356,6 +359,8 @@ static unsigned getImmScale(unsigned Opc) { default: llvm_unreachable("Unhandled opcode!"); case ARM::tLDRi: case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: return 1; case ARM::tLDRHi: case ARM::tSTRHi: @@ -495,6 +500,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (isThumb1) for (unsigned I = 0; I < NumRegs; ++I) if (Base == Regs[I].first) { + assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); if (Opcode == ARM::tLDRi) { Writeback = false; break; @@ -515,7 +521,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; - } else if (Offset != 0) { + } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { // Check if this is a supported opcode before inserting instructions to // calculate a new base register. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; @@ -545,6 +551,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, int BaseOpc = isThumb2 ? ARM::t2ADDri : + (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : (isThumb1 && Offset < 8) ? ARM::tADDi3 : isThumb1 ? ARM::tADDi8 : ARM::ADDri; @@ -552,7 +559,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, Offset = - Offset; BaseOpc = isThumb2 ? ARM::t2SUBri : - (isThumb1 && Offset < 8) ? ARM::tSUBi3 : + (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri; } @@ -566,18 +573,34 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // or // MOV NewBase, Base // ADDS NewBase, #imm8. - if (Base != NewBase && Offset >= 8) { + if (Base != NewBase && + (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { // Need to insert a MOV to the new base first. - BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg); + if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && + !STI->hasV6Ops()) { + // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr + if (Pred != ARMCC::AL) + return false; + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) + .addReg(Base, getKillRegState(BaseKill)); + } else + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg); + // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. Base = NewBase; BaseKill = false; } - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) - .addImm(Pred).addReg(PredReg); + if (BaseOpc == ARM::tADDrSPi) { + assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) + .addImm(Pred).addReg(PredReg); + } else + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + .addImm(Pred).addReg(PredReg); } else { BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) @@ -958,6 +981,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STRi12: case ARM::tLDRi: case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -1393,6 +1418,8 @@ static bool isMemoryOp(const MachineInstr *MI) { case ARM::STRi12: case ARM::tLDRi: case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -1787,12 +1814,11 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { } bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - const TargetMachine &TM = Fn.getTarget(); - TL = TM.getSubtargetImpl()->getTargetLowering(); + STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); + TL = STI->getTargetLowering(); AFI = Fn.getInfo<ARMFunctionInfo>(); - TII = TM.getSubtargetImpl()->getInstrInfo(); - TRI = TM.getSubtargetImpl()->getRegisterInfo(); - STI = &TM.getSubtarget<ARMSubtarget>(); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; @@ -1802,7 +1828,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { ++MFI) { MachineBasicBlock &MBB = *MFI; Modified |= LoadStoreMultipleOpti(MBB); - if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) + if (STI->hasV5TOps()) Modified |= MergeReturnIntoLDM(MBB); } @@ -1850,10 +1876,10 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TD = Fn.getSubtarget().getDataLayout(); - TII = Fn.getSubtarget().getInstrInfo(); - TRI = Fn.getSubtarget().getRegisterInfo(); + TD = Fn.getTarget().getDataLayout(); STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); MRI = &Fn.getRegInfo(); MF = &Fn; diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 023f5f8..fd4f5ff 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -119,11 +119,45 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP) { OutMI.setOpcode(MI->getOpcode()); + // In the MC layer, we keep modified immediates in their encoded form + bool EncodeImms = false; + switch (MI->getOpcode()) { + default: break; + case ARM::MOVi: + case ARM::MVNi: + case ARM::CMPri: + case ARM::CMNri: + case ARM::TSTri: + case ARM::TEQri: + case ARM::MSRi: + case ARM::ADCri: + case ARM::ADDri: + case ARM::ADDSri: + case ARM::SBCri: + case ARM::SUBri: + case ARM::SUBSri: + case ARM::ANDri: + case ARM::ORRri: + case ARM::EORri: + case ARM::BICri: + case ARM::RSBri: + case ARM::RSBSri: + case ARM::RSCri: + EncodeImms = true; + break; + } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCOp; - if (AP.lowerOperand(MO, MCOp)) + if (AP.lowerOperand(MO, MCOp)) { + if (MCOp.isImm() && EncodeImms) { + int32_t Enc = ARM_AM::getSOImmVal(MCOp.getImm()); + if (Enc != -1) + MCOp.setImm(Enc); + } OutMI.addOperand(MCOp); + } } } diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp index 892b269..229d041 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -14,8 +14,8 @@ using namespace llvm; void ARMFunctionInfo::anchor() { } ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) - : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), - hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), + : isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()), + hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()), StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 4e67fa1..ddfdb52 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -16,10 +16,10 @@ #include "ARMSubtarget.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/DenseMap.h" namespace llvm { diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 2a49255..1c50f9e 100644 --- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -9,8 +9,8 @@ //===------------------------------------------------------------------------------------------===// #include "ARM.h" -#include "ARMMachineFunctionInfo.h" #include "ARMInstrInfo.h" +#include "ARMMachineFunctionInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b290e7f..45cc9ea 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -199,7 +199,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), // Thumb1 instructions that know how to use hi regs. let AltOrders = [(add LR, GPR), (trunc GPR, 8)]; let AltOrderSelect = [{ - return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only(); }]; } @@ -209,7 +209,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; let AltOrderSelect = [{ - return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only(); }]; } @@ -219,7 +219,7 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)> { let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; let AltOrderSelect = [{ - return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only(); }]; } @@ -237,7 +237,7 @@ def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>; def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)]; let AltOrderSelect = [{ - return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only(); }]; } @@ -255,7 +255,7 @@ def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>; def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> { let AltOrders = [(and tcGPR, tGPR)]; let AltOrderSelect = [{ - return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + return MF.getSubtarget<ARMSubtarget>().isThumb1Only(); }]; } diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index fa30ac3..636205f 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -32,7 +32,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { - const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>(); + const ARMSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) @@ -150,14 +151,14 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { - const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>(); + const ARMSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); // Use default for non-AAPCS (or MachO) subtargets if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() || Subtarget.isTargetWindows()) return SDValue(); - const ARMTargetLowering &TLI = - *DAG.getTarget().getSubtarget<ARMSubtarget>().getTargetLowering(); + const ARMTargetLowering &TLI = *Subtarget.getTargetLowering(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 600f39d..89624dd 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -15,12 +15,14 @@ #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMInstrInfo.h" +#include "ARMMachineFunctionInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" -#include "ARMMachineFunctionInfo.h" +#include "ARMTargetMachine.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -28,7 +30,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -87,56 +88,6 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), "Allow IT blocks based on ARMv7"), clEnumValEnd)); -static std::string computeDataLayout(ARMSubtarget &ST) { - std::string Ret = ""; - - if (ST.isLittle()) - // Little endian. - Ret += "e"; - else - // Big endian. - Ret += "E"; - - Ret += DataLayout::getManglingComponent(ST.getTargetTriple()); - - // Pointers are 32 bits and aligned to 32 bits. - Ret += "-p:32:32"; - - // ABIs other than APCS have 64 bit integers with natural alignment. - if (!ST.isAPCS_ABI()) - Ret += "-i64:64"; - - // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 - // bits, others to 64 bits. We always try to align to 64 bits. - if (ST.isAPCS_ABI()) - Ret += "-f64:32:64"; - - // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others - // to 64. We always ty to give them natural alignment. - if (ST.isAPCS_ABI()) - Ret += "-v64:32:64-v128:32:128"; - else - Ret += "-v128:64:128"; - - // Try to align aggregates to 32 bits (the default is 64 bits, which has no - // particular hardware support on 32-bit ARM). - Ret += "-a:0:32"; - - // Integer registers are 32 bits. - Ret += "-n32"; - - // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit - // aligned everywhere else. - if (ST.isTargetNaCl()) - Ret += "-S128"; - else if (ST.isAAPCS_ABI()) - Ret += "-S64"; - else - Ret += "-S32"; - - return Ret; -} - /// initializeSubtargetDependencies - Initializes using a CPU and feature string /// so that we can use initializer lists for subtarget initialization. ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, @@ -146,23 +97,31 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } +ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, + StringRef FS) { + ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS); + if (STI.isThumb1Only()) + return (ARMFrameLowering *)new Thumb1FrameLowering(STI); + + return new ARMFrameLowering(STI); +} + ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM, - bool IsLittle) + const std::string &FS, + const ARMBaseTargetMachine &TM, bool IsLittle) : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), - TargetTriple(TT), Options(TM.Options), TargetABI(ARM_ABI_UNKNOWN), - DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))), - TSInfo(DL), + TargetTriple(TT), Options(TM.Options), TM(TM), + TSInfo(*TM.getDataLayout()), + FrameLowering(initializeFrameLowering(CPU, FS)), + // At this point initializeSubtargetDependencies has been called so + // we can query directly. InstrInfo(isThumb1Only() ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) : !isThumb() ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), - TLInfo(TM), - FrameLowering(!isThumb1Only() - ? new ARMFrameLowering(*this) - : (ARMFrameLowering *)new Thumb1FrameLowering(*this)) {} + TLInfo(TM, *this) {} void ARMSubtarget::initializeEnvironment() { HasV4TOps = false; @@ -216,7 +175,7 @@ void ARMSubtarget::initializeEnvironment() { void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (CPUString.empty()) { - if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s")) + if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s")) // Default to the Swift CPU when targeting armv7s/thumbv7s. CPUString = "swift"; else @@ -226,8 +185,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied // based on the architecture version. - std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(), - CPUString); + std::string ArchFS = + ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString); if (!FS.empty()) { if (!ArchFS.empty()) ArchFS = ArchFS + "," + FS.str(); @@ -246,30 +205,9 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - if (TargetABI == ARM_ABI_UNKNOWN) { - switch (TargetTriple.getEnvironment()) { - case Triple::Android: - case Triple::EABI: - case Triple::EABIHF: - case Triple::GNUEABI: - case Triple::GNUEABIHF: - TargetABI = ARM_ABI_AAPCS; - break; - default: - if (TargetTriple.isOSBinFormatMachO() && - TargetTriple.getOS() == Triple::UnknownOS) - TargetABI = ARM_ABI_AAPCS; - else - TargetABI = ARM_ABI_APCS; - break; - } - } - // FIXME: this is invalid for WindowsCE - if (isTargetWindows()) { - TargetABI = ARM_ABI_AAPCS; + if (isTargetWindows()) NoARM = true; - } if (isAAPCS_ABI()) stackAlignment = 8; @@ -331,6 +269,15 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { UseNEONForSinglePrecisionFP = true; } +bool ARMSubtarget::isAPCS_ABI() const { + assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); + return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS; +} +bool ARMSubtarget::isAAPCS_ABI() const { + assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); + return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS; +} + /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. bool ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, @@ -402,6 +349,5 @@ bool ARMSubtarget::useMovt(const MachineFunction &MF) const { // immediates as it is inherently position independent, and may be out of // range otherwise. return UseMovt && (isTargetWindows() || - !MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize)); + !MF.getFunction()->hasFnAttribute(Attribute::MinSize)); } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index d5ee009..f4deddf 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -20,10 +20,10 @@ #include "ARMInstrInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" @@ -37,6 +37,7 @@ namespace llvm { class GlobalValue; class StringRef; class TargetOptions; +class ARMBaseTargetMachine; class ARMSubtarget : public ARMGenSubtargetInfo { protected: @@ -228,18 +229,14 @@ protected: /// Options passed via command line that could influence the target const TargetOptions &Options; - public: - enum { - ARM_ABI_UNKNOWN, - ARM_ABI_APCS, - ARM_ABI_AAPCS // ARM EABI - } TargetABI; + const ARMBaseTargetMachine &TM; +public: /// This constructor initializes the data members to match that /// of the specified triple. /// ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM, bool IsLittle); + const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -254,7 +251,6 @@ protected: /// so that we can use initializer lists for subtarget initialization. ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); - const DataLayout *getDataLayout() const override { return &DL; } const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } @@ -272,16 +268,17 @@ protected: } private: - const DataLayout DL; ARMSelectionDAGInfo TSInfo; + // Either Thumb1FrameLowering or ARMFrameLowering. + std::unique_ptr<ARMFrameLowering> FrameLowering; // Either Thumb1InstrInfo or Thumb2InstrInfo. std::unique_ptr<ARMBaseInstrInfo> InstrInfo; ARMTargetLowering TLInfo; - // Either Thumb1FrameLowering or ARMFrameLowering. - std::unique_ptr<ARMFrameLowering> FrameLowering; void initializeEnvironment(); void initSubtargetFeatures(StringRef CPU, StringRef FS); + ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS); + public: void computeIssueWidth(); @@ -316,7 +313,8 @@ public: bool hasCRC() const { return HasCRC; } bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { - return hasNEON() && UseNEONForSinglePrecisionFP; } + return hasNEON() && UseNEONForSinglePrecisionFP; + } bool hasDivide() const { return HasHardwareDivide; } bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } @@ -350,7 +348,7 @@ public: bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } - bool isTargetNetBSD() const { return TargetTriple.getOS() == Triple::NetBSD; } + bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } bool isTargetWindows() const { return TargetTriple.isOSWindows(); } bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } @@ -391,14 +389,8 @@ public: return TargetTriple.getEnvironment() == Triple::Android; } - bool isAPCS_ABI() const { - assert(TargetABI != ARM_ABI_UNKNOWN); - return TargetABI == ARM_ABI_APCS; - } - bool isAAPCS_ABI() const { - assert(TargetABI != ARM_ABI_UNKNOWN); - return TargetABI == ARM_ABI_AAPCS; - } + bool isAPCS_ABI() const; + bool isAAPCS_ABI() const; bool isThumb() const { return InThumbMode; } bool isThumb1Only() const { return InThumbMode && !HasThumb2; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 88d6c5e..a97a058 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,13 +11,14 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMTargetMachine.h" #include "ARMFrameLowering.h" +#include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" +#include "ARMTargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" @@ -52,6 +53,110 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { return make_unique<ARMElfTargetObjectFile>(); } +static ARMBaseTargetMachine::ARMABI +computeTargetABI(const Triple &TT, StringRef CPU, + const TargetOptions &Options) { + if (Options.MCOptions.getABIName().startswith("aapcs")) + return ARMBaseTargetMachine::ARM_ABI_AAPCS; + else if (Options.MCOptions.getABIName().startswith("apcs")) + return ARMBaseTargetMachine::ARM_ABI_APCS; + + assert(Options.MCOptions.getABIName().empty() && + "Unknown target-abi option!"); + + ARMBaseTargetMachine::ARMABI TargetABI = + ARMBaseTargetMachine::ARM_ABI_UNKNOWN; + + // FIXME: This is duplicated code from the front end and should be unified. + if (TT.isOSBinFormatMachO()) { + if (TT.getEnvironment() == llvm::Triple::EABI || + (TT.getOS() == llvm::Triple::UnknownOS && + TT.getObjectFormat() == llvm::Triple::MachO) || + CPU.startswith("cortex-m")) { + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + } else { + TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; + } + } else if (TT.isOSWindows()) { + // FIXME: this is invalid for WindowsCE + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + } else { + // Select the default based on the platform. + switch (TT.getEnvironment()) { + case llvm::Triple::Android: + case llvm::Triple::GNUEABI: + case llvm::Triple::GNUEABIHF: + case llvm::Triple::EABIHF: + case llvm::Triple::EABI: + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + break; + case llvm::Triple::GNU: + TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; + break; + default: + if (TT.getOS() == llvm::Triple::NetBSD) + TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; + else + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + break; + } + } + + return TargetABI; +} + +static std::string computeDataLayout(const Triple &TT, + ARMBaseTargetMachine::ARMABI ABI, + bool isLittle) { + std::string Ret = ""; + + if (isLittle) + // Little endian. + Ret += "e"; + else + // Big endian. + Ret += "E"; + + Ret += DataLayout::getManglingComponent(TT); + + // Pointers are 32 bits and aligned to 32 bits. + Ret += "-p:32:32"; + + // ABIs other than APCS have 64 bit integers with natural alignment. + if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS) + Ret += "-i64:64"; + + // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 + // bits, others to 64 bits. We always try to align to 64 bits. + if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS) + Ret += "-f64:32:64"; + + // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others + // to 64. We always ty to give them natural alignment. + if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS) + Ret += "-v64:32:64-v128:32:128"; + else + Ret += "-v128:64:128"; + + // Try to align aggregates to 32 bits (the default is 64 bits, which has no + // particular hardware support on 32-bit ARM). + Ret += "-a:0:32"; + + // Integer registers are 32 bits. + Ret += "-n32"; + + // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit + // aligned everywhere else. + if (TT.isOSNaCl()) + Ret += "-S128"; + else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS) + Ret += "-S64"; + else + Ret += "-S32"; + + return Ret; +} + /// TargetMachine ctor - Create an ARM architecture model. /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, @@ -60,6 +165,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + TargetABI(computeTargetABI(Triple(TT), CPU, Options)), + DL(computeDataLayout(Triple(TT), TargetABI, isLittle)), TLOF(createTLOF(Triple(getTargetTriple()))), Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) { @@ -73,11 +180,8 @@ ARMBaseTargetMachine::~ARMBaseTargetMachine() {} const ARMSubtarget * ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { - AttributeSet FnAttrs = F.getAttributes(); - Attribute CPUAttr = - FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu"); - Attribute FSAttr = - FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); + Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute FSAttr = F.getFnAttribute("target-features"); std::string CPU = !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString().str() @@ -91,8 +195,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // function before we can generate a subtarget. We also need to use // it as a key for the subtarget since that can be the only difference // between two functions. - Attribute SFAttr = - FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float"); + Attribute SFAttr = F.getFnAttribute("use-soft-float"); bool SoftFloat = !SFAttr.hasAttribute(Attribute::None) ? SFAttr.getValueAsString() == "true" : Options.UseSoftFloat; @@ -109,12 +212,9 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } -void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our ARM pass. This - // allows the ARM pass to delegate to the target independent layer when - // appropriate. - PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createARMTargetTransformInfoPass(this)); +TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis( + [this](Function &F) { return TargetTransformInfo(ARMTTIImpl(this, F)); }); } @@ -197,9 +297,9 @@ public: void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; - bool addPreRegAlloc() override; - bool addPreSched2() override; - bool addPreEmitPass() override; + void addPreRegAlloc() override; + void addPreSched2() override; + void addPreEmitPass() override; }; } // namespace @@ -226,7 +326,12 @@ void ARMPassConfig::addIRPasses() { bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createGlobalMergePass(TM)); + // FIXME: This is using the thumb1 only constant value for + // maximal global offset for merging globals. We may want + // to look into using the old value for non-thumb1 code of + // 4095 based on the TargetMachine, but this starts to become + // tricky when doing code gen per function. + addPass(createGlobalMergePass(TM, 127)); return false; } @@ -241,7 +346,7 @@ bool ARMPassConfig::addInstSelector() { return false; } -bool ARMPassConfig::addPreRegAlloc() { +void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) @@ -252,13 +357,11 @@ bool ARMPassConfig::addPreRegAlloc() { getARMSubtarget().hasNEON() && !DisableA15SDOptimization) { addPass(createA15SDOptimizerPass()); } - return true; } -bool ARMPassConfig::addPreSched2() { +void ARMPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) { addPass(createARMLoadStoreOptimizationPass()); - printAndVerify("After ARM load / store optimizer"); if (getARMSubtarget().hasNEON()) addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); @@ -279,11 +382,9 @@ bool ARMPassConfig::addPreSched2() { } if (getARMSubtarget().isThumb2()) addPass(createThumb2ITBlockPass()); - - return true; } -bool ARMPassConfig::addPreEmitPass() { +void ARMPassConfig::addPreEmitPass() { if (getARMSubtarget().isThumb2()) { if (!getARMSubtarget().prefers32BitThumb()) addPass(createThumb2SizeReductionPass()); @@ -294,6 +395,4 @@ bool ARMPassConfig::addPreEmitPass() { addPass(createARMOptimizeBarriersPass()); addPass(createARMConstantIslandPass()); - - return true; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index fba0ec2..7f6a1ee 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -22,7 +22,15 @@ namespace llvm { class ARMBaseTargetMachine : public LLVMTargetMachine { +public: + enum ARMABI { + ARM_ABI_UNKNOWN, + ARM_ABI_APCS, + ARM_ABI_AAPCS // ARM EABI + } TargetABI; + protected: + const DataLayout DL; std::unique_ptr<TargetLoweringObjectFile> TLOF; ARMSubtarget Subtarget; bool isLittle; @@ -39,9 +47,11 @@ public: const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; } const ARMSubtarget *getSubtargetImpl(const Function &F) const override; + const DataLayout *getDataLayout() const override { return &DL; } + bool isLittleEndian() const { return isLittle; } - /// \brief Register ARM analysis passes with a pass manager. - void addAnalysisPasses(PassManagerBase &PM) override; + /// \brief Get the TargetIRAnalysis for this target. + TargetIRAnalysis getTargetIRAnalysis() override; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 48238bf..80f03c6 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "ARMTargetObjectFile.h" -#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCAsmInfo.h" @@ -27,7 +27,8 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { - bool isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI(); + bool isAAPCS_ABI = static_cast<const ARMTargetMachine &>(TM).TargetABI == + ARMTargetMachine::ARMABI::ARM_ABI_AAPCS; TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(isAAPCS_ABI); @@ -36,10 +37,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, } AttributesSection = - getContext().getELFSection(".ARM.attributes", - ELF::SHT_ARM_ATTRIBUTES, - 0, - SectionKind::getMetadata()); + getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0); } const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference( diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index ec834e8..4e1b371 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===// +//===-- ARMTargetTransformInfo.cpp - ARM specific TTI ---------------------===// // // The LLVM Compiler Infrastructure // @@ -6,17 +6,8 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// \file -/// This file implements a TargetTransformInfo analysis pass specific to the -/// ARM target machine. It uses the target's detailed information to provide -/// more precise answers to certain TTI queries, while letting the target -/// independent and default TTI implementations handle the rest. -/// -//===----------------------------------------------------------------------===// -#include "ARM.h" -#include "ARMTargetMachine.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "ARMTargetTransformInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" @@ -24,132 +15,7 @@ using namespace llvm; #define DEBUG_TYPE "armtti" -// Declare the pass initialization routine locally as target-specific passes -// don't have a target-wide initialization entry point, and so we rely on the -// pass constructor initialization. -namespace llvm { -void initializeARMTTIPass(PassRegistry &); -} - -namespace { - -class ARMTTI final : public ImmutablePass, public TargetTransformInfo { - const ARMBaseTargetMachine *TM; - const ARMSubtarget *ST; - const ARMTargetLowering *TLI; - - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - -public: - ARMTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - ARMTTI(const ARMBaseTargetMachine *TM) - : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), - TLI(TM->getSubtargetImpl()->getTargetLowering()) { - initializeARMTTIPass(*PassRegistry::getPassRegistry()); - } - - void initializePass() override { - pushTTIStack(this); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo*)this; - return this; - } - - /// \name Scalar TTI Implementations - /// @{ - using TargetTransformInfo::getIntImmCost; - unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; - - /// @} - - - /// \name Vector TTI Implementations - /// @{ - - unsigned getNumberOfRegisters(bool Vector) const override { - if (Vector) { - if (ST->hasNEON()) - return 16; - return 0; - } - - if (ST->isThumb1Only()) - return 8; - return 13; - } - - unsigned getRegisterBitWidth(bool Vector) const override { - if (Vector) { - if (ST->hasNEON()) - return 128; - return 0; - } - - return 32; - } - - unsigned getMaxInterleaveFactor() const override { - // These are out of order CPUs: - if (ST->isCortexA15() || ST->isSwift()) - return 2; - return 1; - } - - unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const override; - - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const override; - - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const override; - - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const override; - - unsigned getAddressComputationCost(Type *Val, - bool IsComplex) const override; - - unsigned getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue, - OperandValueKind Op2Info = OK_AnyValue, - OperandValueProperties Opd1PropInfo = OP_None, - OperandValueProperties Opd2PropInfo = OP_None) const override; - - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const override; - /// @} -}; - -} // end anonymous namespace - -INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti", - "ARM Target Transform Info", true, true, false) -char ARMTTI::ID = 0; - -ImmutablePass * -llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) { - return new ARMTTI(TM); -} - - -unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { +unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned Bits = Ty->getPrimitiveSizeInBits(); @@ -181,8 +47,7 @@ unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { return 3; } -unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { +unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -206,7 +71,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, EVT DstTy = TLI->getValueType(Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src); // Some arithmetic, load and store operations have specific instructions // to cast up/down their types automatically at no extra cost. @@ -377,11 +242,11 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return ARMIntegerConversionTbl[Idx].Cost; } - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src); } -unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, - unsigned Index) const { +unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, + unsigned Index) { // Penalize inserting into an D-subregister. We end up with a three times // lower estimated throughput on swift. if (ST->isSwift() && @@ -397,11 +262,11 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, ValTy->getVectorElementType()->isIntegerTy()) return 3; - return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); + return BaseT::getVectorInstrCost(Opcode, ValTy, Index); } -unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { +unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. @@ -431,10 +296,10 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return LT.first; } - return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { +unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting @@ -449,13 +314,32 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { return 1; } -unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) const { +unsigned ARMTTIImpl::getFPOpCost(Type *Ty) { + // Use similar logic that's in ARMISelLowering: + // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access + // to VFP. + + if (ST->hasVFP2() && !ST->isThumb1Only()) { + if (Ty->isFloatTy()) { + return TargetTransformInfo::TCC_Basic; + } + + if (Ty->isDoubleTy()) { + return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive : + TargetTransformInfo::TCC_Basic; + } + } + + return TargetTransformInfo::TCC_Expensive; +} + +unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) { // We only handle costs of reverse and alternate shuffles for now. - if (Kind != SK_Reverse && Kind != SK_Alternate) - return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate) + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); - if (Kind == SK_Reverse) { + if (Kind == TTI::SK_Reverse) { static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = { // Reverse shuffle cost one instruction if we are shuffling within a // double word (vrev) or two if we shuffle a quad word (vrev, vext). @@ -473,11 +357,11 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); if (Idx == -1) - return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); return LT.first * NEONShuffleTbl[Idx].Cost; } - if (Kind == SK_Alternate) { + if (Kind == TTI::SK_Alternate) { static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = { // Alt shuffle cost table for ARM. Cost is the number of instructions // required to create the shuffled vector. @@ -499,16 +383,16 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, int Idx = CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); if (Idx == -1) - return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); return LT.first * NEONAltShuffleTbl[Idx].Cost; } - return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } -unsigned ARMTTI::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Op1Info, - OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo) const { +unsigned ARMTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, + TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo) { int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); @@ -564,8 +448,8 @@ unsigned ARMTTI::getArithmeticInstrCost( if (Idx != -1) return LT.first * CostTbl[Idx].Cost; - unsigned Cost = TargetTransformInfo::getArithmeticInstrCost( - Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo); + unsigned Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo); // This is somewhat of a hack. The problem that we are facing is that SROA // creates a sequence of shift, and, or instructions to construct values. @@ -581,8 +465,9 @@ unsigned ARMTTI::getArithmeticInstrCost( return Cost; } -unsigned ARMTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const { +unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) { std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); if (Src->isVectorTy() && Alignment != 16 && diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h new file mode 100644 index 0000000..97590f6 --- /dev/null +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -0,0 +1,134 @@ +//===-- ARMTargetTransformInfo.h - ARM specific TTI -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// ARM target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H + +#include "ARM.h" +#include "ARMTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { + typedef BasicTTIImplBase<ARMTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const ARMSubtarget *ST; + const ARMTargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); + + const ARMSubtarget *getST() const { return ST; } + const ARMTargetLowering *getTLI() const { return TLI; } + +public: + explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F) + : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + ARMTTIImpl(const ARMTTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + ARMTTIImpl(ARMTTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + ARMTTIImpl &operator=(const ARMTTIImpl &RHS) { + BaseT::operator=(static_cast<const BaseT &>(RHS)); + ST = RHS.ST; + TLI = RHS.TLI; + return *this; + } + ARMTTIImpl &operator=(ARMTTIImpl &&RHS) { + BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); + ST = std::move(RHS.ST); + TLI = std::move(RHS.TLI); + return *this; + } + + /// \name Scalar TTI Implementations + /// @{ + + using BaseT::getIntImmCost; + unsigned getIntImmCost(const APInt &Imm, Type *Ty); + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool Vector) { + if (Vector) { + if (ST->hasNEON()) + return 16; + return 0; + } + + if (ST->isThumb1Only()) + return 8; + return 13; + } + + unsigned getRegisterBitWidth(bool Vector) { + if (Vector) { + if (ST->hasNEON()) + return 128; + return 0; + } + + return 32; + } + + unsigned getMaxInterleaveFactor() { + // These are out of order CPUs: + if (ST->isCortexA15() || ST->isSwift()) + return 2; + return 1; + } + + unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp); + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + + unsigned getAddressComputationCost(Type *Val, bool IsComplex); + + unsigned getFPOpCost(Type *Ty); + + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 9cc89bd..59461e8 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -164,7 +164,10 @@ class ARMAsmParser : public MCTargetAsmParser { // according to count of instructions in block. // ~0U if no active IT block. } ITState; - bool inITBlock() { return ITState.CurPosition != ~0U;} + bool inITBlock() { return ITState.CurPosition != ~0U; } + bool lastInITBlock() { + return ITState.CurPosition == 4 - countTrailingZeros(ITState.Mask); + } void forwardITPosition() { if (!inITBlock()) return; // Move to the next instruction in the IT block, if there is one. If not, @@ -186,6 +189,11 @@ class ARMAsmParser : public MCTargetAsmParser { return getParser().Error(L, Msg, Ranges); } + bool validatetLDMRegList(MCInst Inst, const OperandVector &Operands, + unsigned ListNo, bool IsARPop = false); + bool validatetSTMRegList(MCInst Inst, const OperandVector &Operands, + unsigned ListNo); + int tryParseRegister(); bool tryParseRegisterWithWriteBack(OperandVector &); int tryParseShiftRegister(OperandVector &); @@ -305,6 +313,7 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseSetEndImm(OperandVector &); OperandMatchResultTy parseShifterImm(OperandVector &); OperandMatchResultTy parseRotImm(OperandVector &); + OperandMatchResultTy parseModImm(OperandVector &); OperandMatchResultTy parseBitfield(OperandVector &); OperandMatchResultTy parsePostIdxReg(OperandVector &); OperandMatchResultTy parseAM3Offset(OperandVector &); @@ -318,7 +327,7 @@ class ARMAsmParser : public MCTargetAsmParser { void cvtThumbBranches(MCInst &Inst, const OperandVector &); bool validateInstruction(MCInst &Inst, const OperandVector &Ops); - bool processInstruction(MCInst &Inst, const OperandVector &Ops); + bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out); bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands); bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands); @@ -400,6 +409,7 @@ class ARMOperand : public MCParsedAsmOperand { k_ShiftedImmediate, k_ShifterImmediate, k_RotateImmediate, + k_ModifiedImmediate, k_BitfieldDescriptor, k_Token } Kind; @@ -511,6 +521,11 @@ class ARMOperand : public MCParsedAsmOperand { unsigned Imm; }; + struct ModImmOp { + unsigned Bits; + unsigned Rot; + }; + struct BitfieldOp { unsigned LSB; unsigned Width; @@ -537,6 +552,7 @@ class ARMOperand : public MCParsedAsmOperand { struct RegShiftedRegOp RegShiftedReg; struct RegShiftedImmOp RegShiftedImm; struct RotImmOp RotImm; + struct ModImmOp ModImm; struct BitfieldOp Bitfield; }; @@ -612,6 +628,9 @@ public: case k_RotateImmediate: RotImm = o.RotImm; break; + case k_ModifiedImmediate: + ModImm = o.ModImm; + break; case k_BitfieldDescriptor: Bitfield = o.Bitfield; break; @@ -1020,33 +1039,17 @@ public: } bool isAdrLabel() const { // If we have an immediate that's not a constant, treat it as a label - // reference needing a fixup. If it is a constant, but it can't fit - // into shift immediate encoding, we reject it. - if (isImm() && !isa<MCConstantExpr>(getImm())) return true; - else return (isARMSOImm() || isARMSOImmNeg()); - } - bool isARMSOImm() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return ARM_AM::getSOImmVal(Value) != -1; - } - bool isARMSOImmNot() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return ARM_AM::getSOImmVal(~Value) != -1; - } - bool isARMSOImmNeg() const { + // reference needing a fixup. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; + + // If it is a constant, it must fit into a modified immediate encoding. if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - // Only use this when not representable as a plain so_imm. - return ARM_AM::getSOImmVal(Value) == -1 && - ARM_AM::getSOImmVal(-Value) != -1; + return (ARM_AM::getSOImmVal(Value) != -1 || + ARM_AM::getSOImmVal(-Value) != -1);; } bool isT2SOImm() const { if (!isImm()) return false; @@ -1091,6 +1094,22 @@ public: bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; } bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; } bool isRotImm() const { return Kind == k_RotateImmediate; } + bool isModImm() const { return Kind == k_ModifiedImmediate; } + bool isModImmNot() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getSOImmVal(~Value) != -1; + } + bool isModImmNeg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getSOImmVal(Value) == -1 && + ARM_AM::getSOImmVal(-Value) != -1; + } bool isBitfield() const { return Kind == k_BitfieldDescriptor; } bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } bool isPostIdxReg() const { @@ -1826,6 +1845,30 @@ public: Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3)); } + void addModImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + // Support for fixups (MCFixup) + if (isImm()) + return addImmOperands(Inst, N); + + Inst.addOperand(MCOperand::CreateImm(ModImm.Bits | (ModImm.Rot << 7))); + } + + void addModImmNotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + uint32_t Enc = ARM_AM::getSOImmVal(~CE->getValue()); + Inst.addOperand(MCOperand::CreateImm(Enc)); + } + + void addModImmNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + uint32_t Enc = ARM_AM::getSOImmVal(-CE->getValue()); + Inst.addOperand(MCOperand::CreateImm(Enc)); + } + void addBitfieldOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // Munge the lsb/width into a bitfield mask. @@ -1982,22 +2025,6 @@ public: Inst.addOperand(MCOperand::CreateImm(Memory.OffsetImm->getValue())); } - void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // The operand is actually a so_imm, but we have its bitwise - // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); - } - - void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // The operand is actually a so_imm, but we have its - // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); - } - void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); @@ -2630,6 +2657,16 @@ public: return Op; } + static std::unique_ptr<ARMOperand> CreateModImm(unsigned Bits, unsigned Rot, + SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_ModifiedImmediate); + Op->ModImm.Bits = Bits; + Op->ModImm.Rot = Rot; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static std::unique_ptr<ARMOperand> CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) { auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor); @@ -2883,6 +2920,10 @@ void ARMOperand::print(raw_ostream &OS) const { case k_RotateImmediate: OS << "<ror " << " #" << (RotImm.Imm * 8) << ">"; break; + case k_ModifiedImmediate: + OS << "<mod_imm #" << ModImm.Bits << ", #" + << ModImm.Rot << ")>"; + break; case k_BitfieldDescriptor: OS << "<bitfield " << "lsb: " << Bitfield.LSB << ", width: " << Bitfield.Width << ">"; @@ -4339,6 +4380,123 @@ ARMAsmParser::parseRotImm(OperandVector &Operands) { } ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseModImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + MCAsmLexer &Lexer = getLexer(); + int64_t Imm1, Imm2; + + SMLoc S = Parser.getTok().getLoc(); + + // 1) A mod_imm operand can appear in the place of a register name: + // add r0, #mod_imm + // add r0, r0, #mod_imm + // to correctly handle the latter, we bail out as soon as we see an + // identifier. + // + // 2) Similarly, we do not want to parse into complex operands: + // mov r0, #mod_imm + // mov r0, :lower16:(_foo) + if (Parser.getTok().is(AsmToken::Identifier) || + Parser.getTok().is(AsmToken::Colon)) + return MatchOperand_NoMatch; + + // Hash (dollar) is optional as per the ARMARM + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) { + // Avoid parsing into complex operands (#:) + if (Lexer.peekTok().is(AsmToken::Colon)) + return MatchOperand_NoMatch; + + // Eat the hash (dollar) + Parser.Lex(); + } + + SMLoc Sx1, Ex1; + Sx1 = Parser.getTok().getLoc(); + const MCExpr *Imm1Exp; + if (getParser().parseExpression(Imm1Exp, Ex1)) { + Error(Sx1, "malformed expression"); + return MatchOperand_ParseFail; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm1Exp); + + if (CE) { + // Immediate must fit within 32-bits + Imm1 = CE->getValue(); + int Enc = ARM_AM::getSOImmVal(Imm1); + if (Enc != -1 && Parser.getTok().is(AsmToken::EndOfStatement)) { + // We have a match! + Operands.push_back(ARMOperand::CreateModImm((Enc & 0xFF), + (Enc & 0xF00) >> 7, + Sx1, Ex1)); + return MatchOperand_Success; + } + + // We have parsed an immediate which is not for us, fallback to a plain + // immediate. This can happen for instruction aliases. For an example, + // ARMInstrInfo.td defines the alias [mov <-> mvn] which can transform + // a mov (mvn) with a mod_imm_neg/mod_imm_not operand into the opposite + // instruction with a mod_imm operand. The alias is defined such that the + // parser method is shared, that's why we have to do this here. + if (Parser.getTok().is(AsmToken::EndOfStatement)) { + Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1)); + return MatchOperand_Success; + } + } else { + // Operands like #(l1 - l2) can only be evaluated at a later stage (via an + // MCFixup). Fallback to a plain immediate. + Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1)); + return MatchOperand_Success; + } + + // From this point onward, we expect the input to be a (#bits, #rot) pair + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Sx1, "expected modified immediate operand: #[0, 255], #even[0-30]"); + return MatchOperand_ParseFail; + } + + if (Imm1 & ~0xFF) { + Error(Sx1, "immediate operand must a number in the range [0, 255]"); + return MatchOperand_ParseFail; + } + + // Eat the comma + Parser.Lex(); + + // Repeat for #rot + SMLoc Sx2, Ex2; + Sx2 = Parser.getTok().getLoc(); + + // Eat the optional hash (dollar) + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) + Parser.Lex(); + + const MCExpr *Imm2Exp; + if (getParser().parseExpression(Imm2Exp, Ex2)) { + Error(Sx2, "malformed expression"); + return MatchOperand_ParseFail; + } + + CE = dyn_cast<MCConstantExpr>(Imm2Exp); + + if (CE) { + Imm2 = CE->getValue(); + if (!(Imm2 & ~0x1E)) { + // We have a match! + Operands.push_back(ARMOperand::CreateModImm(Imm1, Imm2, S, Ex2)); + return MatchOperand_Success; + } + Error(Sx2, "immediate operand must an even number in the range [0, 30]"); + return MatchOperand_ParseFail; + } else { + Error(Sx2, "constant expression expected"); + return MatchOperand_ParseFail; + } +} + +ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseBitfield(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); @@ -5091,15 +5249,52 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { return true; } + enum { + COFF = (1 << MCObjectFileInfo::IsCOFF), + ELF = (1 << MCObjectFileInfo::IsELF), + MACHO = (1 << MCObjectFileInfo::IsMachO) + }; + static const struct PrefixEntry { + const char *Spelling; + ARMMCExpr::VariantKind VariantKind; + uint8_t SupportedFormats; + } PrefixEntries[] = { + { "lower16", ARMMCExpr::VK_ARM_LO16, COFF | ELF | MACHO }, + { "upper16", ARMMCExpr::VK_ARM_HI16, COFF | ELF | MACHO }, + }; + StringRef IDVal = Parser.getTok().getIdentifier(); - if (IDVal == "lower16") { - RefKind = ARMMCExpr::VK_ARM_LO16; - } else if (IDVal == "upper16") { - RefKind = ARMMCExpr::VK_ARM_HI16; - } else { + + const auto &Prefix = + std::find_if(std::begin(PrefixEntries), std::end(PrefixEntries), + [&IDVal](const PrefixEntry &PE) { + return PE.Spelling == IDVal; + }); + if (Prefix == std::end(PrefixEntries)) { Error(Parser.getTok().getLoc(), "unexpected prefix in operand"); return true; } + + uint8_t CurrentFormat; + switch (getContext().getObjectFileInfo()->getObjectFileType()) { + case MCObjectFileInfo::IsMachO: + CurrentFormat = MACHO; + break; + case MCObjectFileInfo::IsELF: + CurrentFormat = ELF; + break; + case MCObjectFileInfo::IsCOFF: + CurrentFormat = COFF; + break; + } + + if (~Prefix->SupportedFormats & CurrentFormat) { + Error(Parser.getTok().getLoc(), + "cannot represent relocation in the current file format"); + return true; + } + + RefKind = Prefix->VariantKind; Parser.Lex(); if (getLexer().isNot(AsmToken::Colon)) { @@ -5107,6 +5302,7 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { return true; } Parser.Lex(); // Eat the last ':' + return false; } @@ -5139,7 +5335,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || - Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("vsel")) + Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || + Mnemonic.startswith("vsel")) return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5244,7 +5441,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || - Mnemonic == "vrintm" || Mnemonic.startswith("aes") || + Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { // These mnemonics are never predicable @@ -5282,7 +5479,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // conditionally adding the cc_out in the first place because we need // to check the type of the parsed immediate operand. if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() && - !static_cast<ARMOperand &>(*Operands[4]).isARMSOImm() && + !static_cast<ARMOperand &>(*Operands[4]).isModImm() && static_cast<ARMOperand &>(*Operands[4]).isImm0_65535Expr() && static_cast<ARMOperand &>(*Operands[1]).getReg() == 0) return true; @@ -5823,6 +6020,50 @@ static bool instIsBreakpoint(const MCInst &Inst) { } +bool ARMAsmParser::validatetLDMRegList(MCInst Inst, + const OperandVector &Operands, + unsigned ListNo, bool IsARPop) { + const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]); + bool HasWritebackToken = Op.isToken() && Op.getToken() == "!"; + + bool ListContainsSP = listContainsReg(Inst, ListNo, ARM::SP); + bool ListContainsLR = listContainsReg(Inst, ListNo, ARM::LR); + bool ListContainsPC = listContainsReg(Inst, ListNo, ARM::PC); + + if (!IsARPop && ListContainsSP) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "SP may not be in the register list"); + else if (ListContainsPC && ListContainsLR) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "PC and LR may not be in the register list simultaneously"); + else if (inITBlock() && !lastInITBlock() && ListContainsPC) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "instruction must be outside of IT block or the last " + "instruction in an IT block"); + return false; +} + +bool ARMAsmParser::validatetSTMRegList(MCInst Inst, + const OperandVector &Operands, + unsigned ListNo) { + const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]); + bool HasWritebackToken = Op.isToken() && Op.getToken() == "!"; + + bool ListContainsSP = listContainsReg(Inst, ListNo, ARM::SP); + bool ListContainsPC = listContainsReg(Inst, ListNo, ARM::PC); + + if (ListContainsSP && ListContainsPC) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "SP and PC may not be in the register list"); + else if (ListContainsSP) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "SP may not be in the register list"); + else if (ListContainsPC) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "PC may not be in the register list"); + return false; +} + // FIXME: We would really like to be able to tablegen'erate this. bool ARMAsmParser::validateInstruction(MCInst &Inst, const OperandVector &Operands) { @@ -6006,9 +6247,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return Error(Operands[3]->getStartLoc(), "writeback operator '!' not allowed when base register " "in register list"); - if (listContainsReg(Inst, 3 + HasWritebackToken, ARM::SP)) - return Error(Operands[3 + HasWritebackToken]->getStartLoc(), - "SP not allowed in register list"); + + if (validatetLDMRegList(Inst, Operands, 3)) + return true; break; } case ARM::LDMIA_UPD: @@ -6025,13 +6266,14 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, break; case ARM::t2LDMIA: case ARM::t2LDMDB: + if (validatetLDMRegList(Inst, Operands, 3)) + return true; + break; case ARM::t2STMIA: - case ARM::t2STMDB: { - if (listContainsReg(Inst, 3, ARM::SP)) - return Error(Operands.back()->getStartLoc(), - "SP not allowed in register list"); + case ARM::t2STMDB: + if (validatetSTMRegList(Inst, Operands, 3)) + return true; break; - } case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: case ARM::t2STMIA_UPD: @@ -6040,9 +6282,13 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return Error(Operands.back()->getStartLoc(), "writeback register not allowed in register list"); - if (listContainsReg(Inst, 4, ARM::SP)) - return Error(Operands.back()->getStartLoc(), - "SP not allowed in register list"); + if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { + if (validatetLDMRegList(Inst, Operands, 3)) + return true; + } else { + if (validatetSTMRegList(Inst, Operands, 3)) + return true; + } break; } case ARM::sysLDMIA_UPD: @@ -6087,6 +6333,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or pc"); + if (validatetLDMRegList(Inst, Operands, 2, !isMClass())) + return true; break; } case ARM::tPUSH: { @@ -6095,6 +6343,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or lr"); + if (validatetSTMRegList(Inst, Operands, 2)) + return true; break; } case ARM::tSTMIA_UPD: { @@ -6111,9 +6361,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return Error(Operands[4]->getStartLoc(), "writeback operator '!' not allowed when base register " "in register list"); - if (listContainsReg(Inst, 4, ARM::SP) && !inITBlock()) - return Error(Operands.back()->getStartLoc(), - "SP not allowed in register list"); + + if (validatetSTMRegList(Inst, Operands, 4)) + return true; break; } case ARM::tADDrSP: { @@ -6434,7 +6684,8 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { } bool ARMAsmParser::processInstruction(MCInst &Inst, - const OperandVector &Operands) { + const OperandVector &Operands, + MCStreamer &Out) { switch (Inst.getOpcode()) { // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction. case ARM::LDRT_POST: @@ -6475,12 +6726,35 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // Alias for alternate form of 'ADR Rd, #imm' instruction. case ARM::ADDri: { if (Inst.getOperand(1).getReg() != ARM::PC || - Inst.getOperand(5).getReg() != 0) + Inst.getOperand(5).getReg() != 0 || + !(Inst.getOperand(2).isExpr() || Inst.getOperand(2).isImm())) return false; MCInst TmpInst; TmpInst.setOpcode(ARM::ADR); TmpInst.addOperand(Inst.getOperand(0)); - TmpInst.addOperand(Inst.getOperand(2)); + if (Inst.getOperand(2).isImm()) { + // Immediate (mod_imm) will be in its encoded form, we must unencode it + // before passing it to the ADR instruction. + unsigned Enc = Inst.getOperand(2).getImm(); + TmpInst.addOperand(MCOperand::CreateImm( + ARM_AM::rotr32(Enc & 0xFF, (Enc & 0xF00) >> 7))); + } else { + // Turn PC-relative expression into absolute expression. + // Reading PC provides the start of the current instruction + 8 and + // the transform to adr is biased by that. + MCSymbol *Dot = getContext().CreateTempSymbol(); + Out.EmitLabel(Dot); + const MCExpr *OpExpr = Inst.getOperand(2).getExpr(); + const MCExpr *InstPC = MCSymbolRefExpr::Create(Dot, + MCSymbolRefExpr::VK_None, + getContext()); + const MCExpr *Const8 = MCConstantExpr::Create(8, getContext()); + const MCExpr *ReadPC = MCBinaryExpr::CreateAdd(InstPC, Const8, + getContext()); + const MCExpr *FixupAddr = MCBinaryExpr::CreateAdd(ReadPC, OpExpr, + getContext()); + TmpInst.addOperand(MCOperand::CreateExpr(FixupAddr)); + } TmpInst.addOperand(Inst.getOperand(3)); TmpInst.addOperand(Inst.getOperand(4)); Inst = TmpInst; @@ -8302,7 +8576,6 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { - default: break; case Match_Success: // Context sensitive operand constraints aren't handled by the matcher, // so check them here. @@ -8320,7 +8593,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // encoding is selected. Loop on it while changes happen so the // individual transformations can chain off each other. E.g., // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8) - while (processInstruction(Inst, Operands)) + while (processInstruction(Inst, Operands, Out)) ; // Only after the instruction is fully processed, we can validate it @@ -8732,7 +9005,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { Parser.Lex(); // Consume the EndOfStatement - if (!RegisterReqs.insert(std::make_pair(Name, Reg)).second) { + if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg) { Error(SRegLoc, "redefinition of '" + Name + "' does not match original."); return false; } @@ -8858,8 +9131,13 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { if (Tag == ARMBuildAttrs::compatibility) { if (Parser.getTok().isNot(AsmToken::Comma)) IsStringValue = false; - else - Parser.Lex(); + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "comma expected"); + Parser.eatToEndOfStatement(); + return false; + } else { + Parser.Lex(); + } } if (IsStringValue) { @@ -8888,38 +9166,78 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { StringRef CPU = getParser().parseStringToEndOfStatement().trim(); getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU); + + if (!STI.isCPUStringValid(CPU)) { + Error(L, "Unknown CPU name"); + return false; + } + + // FIXME: This switches the CPU features globally, therefore it might + // happen that code you would not expect to assemble will. For details + // see: http://llvm.org/bugs/show_bug.cgi?id=20757 + STI.InitMCProcessorInfo(CPU, ""); + STI.InitCPUSchedModel(CPU); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + return false; } // FIXME: This is duplicated in getARMFPUFeatures() in // tools/clang/lib/Driver/Tools.cpp static const struct { - const unsigned Fpu; + const unsigned ID; const uint64_t Enabled; const uint64_t Disabled; -} Fpus[] = { - {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON}, - {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON}, - {ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON}, - {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON}, - {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON}, - {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON}, - {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16, - ARM::FeatureNEON | ARM::FeatureCrypto}, - {ARM::FP_ARMV8, ARM::FeatureFPARMv8, - ARM::FeatureNEON | ARM::FeatureCrypto}, - {ARM::NEON, ARM::FeatureNEON, 0}, - {ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0}, - {ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON, - ARM::FeatureCrypto}, - {ARM::CRYPTO_NEON_FP_ARMV8, - ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0}, - {ARM::SOFTVFP, 0, 0}, +} FPUs[] = { + {/* ID */ ARM::VFP, + /* Enabled */ ARM::FeatureVFP2, + /* Disabled */ ARM::FeatureNEON}, + {/* ID */ ARM::VFPV2, + /* Enabled */ ARM::FeatureVFP2, + /* Disabled */ ARM::FeatureNEON}, + {/* ID */ ARM::VFPV3, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3, + /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16}, + {/* ID */ ARM::VFPV3_D16, + /* Enable */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16, + /* Disabled */ ARM::FeatureNEON}, + {/* ID */ ARM::VFPV4, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4, + /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16}, + {/* ID */ ARM::VFPV4_D16, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureD16, + /* Disabled */ ARM::FeatureNEON}, + {/* ID */ ARM::FPV5_D16, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureFPARMv8 | ARM::FeatureD16, + /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto}, + {/* ID */ ARM::FP_ARMV8, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureFPARMv8, + /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto | ARM::FeatureD16}, + {/* ID */ ARM::NEON, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, + /* Disabled */ ARM::FeatureD16}, + {/* ID */ ARM::NEON_VFPV4, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureNEON, + /* Disabled */ ARM::FeatureD16}, + {/* ID */ ARM::NEON_FP_ARMV8, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureFPARMv8 | ARM::FeatureNEON, + /* Disabled */ ARM::FeatureCrypto | ARM::FeatureD16}, + {/* ID */ ARM::CRYPTO_NEON_FP_ARMV8, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, + /* Disabled */ ARM::FeatureD16}, + {ARM::SOFTVFP, 0, 0}, }; /// parseDirectiveFPU /// ::= .fpu str bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { + SMLoc FPUNameLoc = getTok().getLoc(); StringRef FPU = getParser().parseStringToEndOfStatement().trim(); unsigned ID = StringSwitch<unsigned>(FPU) @@ -8928,18 +9246,18 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { .Default(ARM::INVALID_FPU); if (ID == ARM::INVALID_FPU) { - Error(L, "Unknown FPU name"); + Error(FPUNameLoc, "Unknown FPU name"); return false; } - for (const auto &Fpu : Fpus) { - if (Fpu.Fpu != ID) + for (const auto &Entry : FPUs) { + if (Entry.ID != ID) continue; // Need to toggle features that should be on but are off and that // should off but are on. - uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) | - (Fpu.Disabled & STI.getFeatureBits()); + uint64_t Toggle = (Entry.Enabled & ~STI.getFeatureBits()) | + (Entry.Disabled & STI.getFeatureBits()); setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle))); break; } @@ -9766,7 +10084,7 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, if (CE->getValue() == 0) return Match_Success; break; - case MCK_ARMSOImm: + case MCK_ModImm: if (Op.isImm()) { const MCExpr *SOExpr = Op.getImm(); int64_t Value; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ef65418..4d5122a 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -176,8 +176,6 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, @@ -405,6 +403,28 @@ static MCDisassembler *createThumbDisassembler(const Target &T, return new ThumbDisassembler(STI, Ctx); } +// Post-decoding checks +static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size, + uint64_t Address, raw_ostream &OS, + raw_ostream &CS, + uint32_t Insn, + DecodeStatus Result) +{ + switch (MI.getOpcode()) { + case ARM::HVC: { + // HVC is undefined if condition = 0xf otherwise upredictable + // if condition != 0xe + uint32_t Cond = (Insn >> 28) & 0xF; + if (Cond == 0xF) + return MCDisassembler::Fail; + if (Cond != 0xE) + return MCDisassembler::SoftFail; + return Result; + } + default: return Result; + } +} + DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &OS, @@ -430,7 +450,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, decodeInstruction(DecoderTableARM32, MI, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 4; - return Result; + return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result); } // VFP and NEON instructions, similarly, are shared between ARM @@ -1113,15 +1133,6 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { - uint32_t imm = Val & 0xFF; - uint32_t rot = (Val & 0xF00) >> 7; - uint32_t rot_imm = (imm >> rot) | (imm << ((32-rot) & 0x1F)); - Inst.addOperand(MCOperand::CreateImm(rot_imm)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4960,7 +4971,7 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, DecodeStatus S = MCDisassembler::Success; // Shift of "asr #32" is not allowed in Thumb2 mode. - if (Val == 0x20) S = MCDisassembler::SoftFail; + if (Val == 0x20) S = MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(Val)); return S; } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 0570084..16eea33 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -269,7 +269,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // expressed as a GPRPair, so we have to manually merge them. // FIXME: We would really like to be able to tablegen'erate this. case ARM::LDREXD: case ARM::STREXD: - case ARM::LDAEXD: case ARM::STLEXD: + case ARM::LDAEXD: case ARM::STLEXD: { const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID); bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD; unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg(); @@ -290,6 +290,23 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, printInstruction(&NewMI, O); return; } + break; + } + // B9.3.3 ERET (Thumb) + // For a target that has Virtualization Extensions, ERET is the preferred + // disassembly of SUBS PC, LR, #0 + case ARM::t2SUBS_PC_LR: { + if (MI->getNumOperands() == 3 && + MI->getOperand(0).isImm() && + MI->getOperand(0).getImm() == 0 && + (getAvailableFeatures() & ARM::FeatureVirtualization)) { + O << "\teret"; + printPredicateOperand(MI, 1, O); + printAnnotation(O, Annot); + return; + } + break; + } } printInstruction(MI, O); @@ -1301,6 +1318,52 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, O << markup(">"); } +void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + MCOperand Op = MI->getOperand(OpNum); + + // Support for fixups (MCFixup) + if (Op.isExpr()) + return printOperand(MI, OpNum, O); + + unsigned Bits = Op.getImm() & 0xFF; + unsigned Rot = (Op.getImm() & 0xF00) >> 7; + + bool PrintUnsigned = false; + switch (MI->getOpcode()){ + case ARM::MOVi: + // Movs to PC should be treated unsigned + PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC); + break; + case ARM::MSRi: + // Movs to special registers should be treated unsigned + PrintUnsigned = true; + break; + } + + int32_t Rotated = ARM_AM::rotr32(Bits, Rot); + if (ARM_AM::getSOImmVal(Rotated) == Op.getImm()) { + // #rot has the least possible value + O << "#" << markup("<imm:"); + if (PrintUnsigned) + O << static_cast<uint32_t>(Rotated); + else + O << Rotated; + O << markup(">"); + return; + } + + // Explicit #bits, #rot implied + O << "#" + << markup("<imm:") + << Bits + << markup(">") + << ", #" + << markup("<imm:") + << Rot + << markup(">"); +} + void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << markup("<imm:") diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 09fd536..f179e01 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -131,6 +131,7 @@ public: void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index f24b419..a821a6b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -51,7 +51,7 @@ ARMELFObjectWriter::~ARMELFObjectWriter() {} bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, unsigned Type) const { - // FIXME: This is extremelly conservative. This really needs to use a + // FIXME: This is extremely conservative. This really needs to use a // whitelist with a clear explanation for why each realocation needs to // point to the symbol, not to the section. switch (Type) { @@ -148,6 +148,22 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, } else { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); + case FK_Data_1: + switch (Modifier) { + default: llvm_unreachable("unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_ABS8; + break; + } + break; + case FK_Data_2: + switch (Modifier) { + default: llvm_unreachable("unsupported modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_ABS16; + break; + } + break; case FK_Data_4: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); @@ -184,6 +200,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_PREL31: Type = ELF::R_ARM_PREL31; break; + case MCSymbolRefExpr::VK_ARM_SBREL: + Type = ELF::R_ARM_SBREL32; + break; case MCSymbolRefExpr::VK_ARM_TLSLDO: Type = ELF::R_ARM_TLS_LDO32; break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 24ee537..2b65520 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -15,6 +15,7 @@ #include "ARMArchName.h" #include "ARMFPUName.h" +#include "ARMArchExtName.h" #include "ARMRegisterInfo.h" #include "ARMUnwindOpAsm.h" #include "llvm/ADT/StringExtras.h" @@ -105,6 +106,19 @@ static unsigned GetArchDefaultCPUArch(unsigned ID) { return 0; } +static const char *GetArchExtName(unsigned ID) { + switch (ID) { + default: + llvm_unreachable("Unknown ARCH Extension kind"); + break; +#define ARM_ARCHEXT_NAME(NAME, ID) \ + case ARM::ID: \ + return NAME; +#include "ARMArchExtName.def" + } + return nullptr; +} + namespace { class ARMELFStreamer; @@ -134,6 +148,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer { void emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StrinValue) override; void emitArch(unsigned Arch) override; + void emitArchExtension(unsigned ArchExt) override; void emitObjectArch(unsigned Arch) override; void emitFPU(unsigned FPU) override; void emitInst(uint32_t Inst, char Suffix = '\0') override; @@ -249,6 +264,9 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute, void ARMTargetAsmStreamer::emitArch(unsigned Arch) { OS << "\t.arch\t" << GetArchName(Arch) << "\n"; } +void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) { + OS << "\t.arch_extension\t" << GetArchExtName(ArchExt) << "\n"; +} void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) { OS << "\t.object_arch\t" << GetArchName(Arch) << '\n'; } @@ -300,7 +318,19 @@ private: StringRef StringValue; static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) { - return (LHS.Tag < RHS.Tag); + // The conformance tag must be emitted first when serialised + // into an object file. Specifically, the addenda to the ARM ABI + // states that (2.3.7.4): + // + // "To simplify recognition by consumers in the common case of + // claiming conformity for the whole file, this tag should be + // emitted first in a file-scope sub-subsection of the first + // public subsection of the attributes section." + // + // So it is special-cased in this comparison predicate when the + // attributes are sorted in finishAttributeSection(). + return (RHS.Tag != ARMBuildAttrs::conformance) && + ((LHS.Tag == ARMBuildAttrs::conformance) || (LHS.Tag < RHS.Tag)); } }; @@ -541,6 +571,10 @@ public: /// necessary. void EmitValueImpl(const MCExpr *Value, unsigned Size, const SMLoc &Loc) override { + if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value)) + if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) + getContext().FatalError(Loc, "relocated expression must be 32-bit"); + EmitDataMappingSymbol(); MCELFStreamer::EmitValueImpl(Value, Size); } @@ -942,11 +976,8 @@ void ARMTargetELFStreamer::finishAttributeSection() { if (AttributeSection) { Streamer.SwitchSection(AttributeSection); } else { - AttributeSection = - Streamer.getContext().getELFSection(".ARM.attributes", - ELF::SHT_ARM_ATTRIBUTES, - 0, - SectionKind::getMetadata()); + AttributeSection = Streamer.getContext().getELFSection( + ".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0); Streamer.SwitchSection(AttributeSection); // Format version @@ -979,12 +1010,12 @@ void ARMTargetELFStreamer::finishAttributeSection() { Streamer.EmitULEB128IntValue(item.IntValue); break; case AttributeItem::TextAttribute: - Streamer.EmitBytes(item.StringValue.upper()); + Streamer.EmitBytes(item.StringValue); Streamer.EmitIntValue(0, 1); // '\0' break; case AttributeItem::NumericAndTextAttributes: Streamer.EmitULEB128IntValue(item.IntValue); - Streamer.EmitBytes(item.StringValue.upper()); + Streamer.EmitBytes(item.StringValue); Streamer.EmitIntValue(0, 1); // '\0' break; } @@ -1053,11 +1084,11 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, // Get .ARM.extab or .ARM.exidx section const MCSectionELF *EHSection = nullptr; if (const MCSymbol *Group = FnSection.getGroup()) { - EHSection = getContext().getELFSection( - EHSecName, Type, Flags | ELF::SHF_GROUP, Kind, - FnSection.getEntrySize(), Group->getName()); + EHSection = + getContext().getELFSection(EHSecName, Type, Flags | ELF::SHF_GROUP, + FnSection.getEntrySize(), Group->getName()); } else { - EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind); + EHSection = getContext().getELFSection(EHSecName, Type, Flags); } assert(EHSection && "Failed to get the required EH section"); @@ -1341,10 +1372,8 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, return S; } -MCStreamer *createARMNullStreamer(MCContext &Ctx) { - MCStreamer *S = llvm::createNullStreamer(Ctx); - new ARMTargetStreamer(*S); - return S; +MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) { + return new ARMTargetStreamer(S); } MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 1d82099..66a1618 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "ARMMCAsmInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; @@ -89,6 +89,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() { AlignmentIsInBytes = false; PrivateGlobalPrefix = "$M"; + PrivateLabelPrefix = "$M"; } void ARMCOFFMCAsmInfoGNU::anchor() { } @@ -101,6 +102,7 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() { Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; PrivateGlobalPrefix = ".L"; + PrivateLabelPrefix = ".L"; SupportsDebugInformation = true; ExceptionsType = ExceptionHandling::None; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index f1fef41..6cb4715 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -21,7 +21,8 @@ namespace llvm { class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { - void anchor() override; + virtual void anchor(); + public: explicit ARMMCAsmInfoDarwin(StringRef TT); }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index b8ee555..efbebd3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -37,8 +37,8 @@ STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created."); namespace { class ARMMCCodeEmitter : public MCCodeEmitter { - ARMMCCodeEmitter(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION; - void operator=(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION; + ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete; + void operator=(const ARMMCCodeEmitter &) = delete; const MCInstrInfo &MCII; const MCContext &CTX; bool IsLittleEndian; @@ -304,6 +304,28 @@ public: return Binary; } + unsigned getModImmOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &ST) const { + const MCOperand &MO = MI.getOperand(Op); + + // Support for fixups (MCFixup) + if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + // In instruction code this value always encoded as lowest 12 bits, + // so we don't have to perform any specific adjustments. + // Due to requirements of relocatable records we have to use FK_Data_4. + // See ARMELFObjectWriter::ExplicitRelSym and + // ARMELFObjectWriter::GetRelocTypeInner for more details. + MCFixupKind Kind = MCFixupKind(FK_Data_4); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); + return 0; + } + + // Immediate is already in its encoded format + return MO.getImm(); + } + /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value. unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 98190ba..8c19785 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -64,10 +64,60 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, } static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, - std::string &Info) { - if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && - MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) { - Info = "applying IT instruction to more than one subsequent instruction is deprecated"; + std::string &Info) { + if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && MI.getOperand(1).isImm() && + MI.getOperand(1).getImm() != 8) { + Info = "applying IT instruction to more than one subsequent instruction is " + "deprecated"; + return true; + } + + return false; +} + +static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, + std::string &Info) { + assert((~STI.getFeatureBits() & llvm::ARM::ModeThumb) && + "cannot predicate thumb instructions"); + + assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments"); + for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) { + assert(MI.getOperand(OI).isReg() && "expected register"); + if (MI.getOperand(OI).getReg() == ARM::SP || + MI.getOperand(OI).getReg() == ARM::PC) { + Info = "use of SP or PC in the list is deprecated"; + return true; + } + } + return false; +} + +static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, + std::string &Info) { + assert((~STI.getFeatureBits() & llvm::ARM::ModeThumb) && + "cannot predicate thumb instructions"); + + assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments"); + bool ListContainsPC = false, ListContainsLR = false; + for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) { + assert(MI.getOperand(OI).isReg() && "expected register"); + switch (MI.getOperand(OI).getReg()) { + default: + break; + case ARM::LR: + ListContainsLR = true; + break; + case ARM::PC: + ListContainsPC = true; + break; + case ARM::SP: + Info = "use of SP in the list is deprecated"; + return true; + } + } + + if (ListContainsPC && ListContainsLR) { + Info = "use of LR and PC simultaneously in the list is deprecated"; return true; } @@ -405,11 +455,15 @@ extern "C" void LLVMInitializeARMTargetMC() { TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer); TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer); - // Register the null streamer. - TargetRegistry::RegisterNullStreamer(TheARMLETarget, createARMNullStreamer); - TargetRegistry::RegisterNullStreamer(TheARMBETarget, createARMNullStreamer); - TargetRegistry::RegisterNullStreamer(TheThumbLETarget, createARMNullStreamer); - TargetRegistry::RegisterNullStreamer(TheThumbBETarget, createARMNullStreamer); + // Register the null TargetStreamer. + TargetRegistry::RegisterNullTargetStreamer(TheARMLETarget, + createARMNullTargetStreamer); + TargetRegistry::RegisterNullTargetStreamer(TheARMBETarget, + createARMNullTargetStreamer); + TargetRegistry::RegisterNullTargetStreamer(TheThumbLETarget, + createARMNullTargetStreamer); + TargetRegistry::RegisterNullTargetStreamer(TheThumbBETarget, + createARMNullTargetStreamer); // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index a6c20d5..c17e959 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -29,6 +29,7 @@ class MCRegisterInfo; class MCSubtargetInfo; class MCStreamer; class MCRelocationInfo; +class MCTargetStreamer; class StringRef; class Target; class raw_ostream; @@ -51,7 +52,7 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst); -MCStreamer *createARMNullStreamer(MCContext &Ctx); +MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S); MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 7da5003..3187d36 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -54,10 +54,10 @@ public: : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/true) {} - void RecordRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) override; + void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + const MCAsmLayout &Layout, const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) override; }; } @@ -232,7 +232,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value2; - Writer->addRelocation(Fragment->getParent(), MRE); + Writer->addRelocation(nullptr, Fragment->getParent(), MRE); } MachO::any_relocation_info MRE; @@ -243,7 +243,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value; - Writer->addRelocation(Fragment->getParent(), MRE); + Writer->addRelocation(nullptr, Fragment->getParent(), MRE); } void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, @@ -297,7 +297,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value2; - Writer->addRelocation(Fragment->getParent(), MRE); + Writer->addRelocation(nullptr, Fragment->getParent(), MRE); } MachO::any_relocation_info MRE; @@ -307,7 +307,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value; - Writer->addRelocation(Fragment->getParent(), MRE); + Writer->addRelocation(nullptr, Fragment->getParent(), MRE); } bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, @@ -351,11 +351,10 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, } void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, + MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, + const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; @@ -401,8 +400,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned Index = 0; - unsigned IsExtern = 0; unsigned Type = 0; + const MCSymbolData *RelSymbol = nullptr; if (Target.isAbsolute()) { // constant // FIXME! @@ -422,8 +421,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // Check whether we need an external or internal relocation. if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD, FixedValue)) { - IsExtern = 1; - Index = SD->getIndex(); + RelSymbol = SD; // For external relocations, make sure to offset the fixup value to // compensate for the addend of the symbol address, if it was @@ -447,11 +445,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // struct relocation_info (8 bytes) MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + MRE.r_word1 = + (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. @@ -476,10 +471,10 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, (Log2Size << 25) | (MachO::ARM_RELOC_PAIR << 28)); - Writer->addRelocation(Fragment->getParent(), MREPair); + Writer->addRelocation(nullptr, Fragment->getParent(), MREPair); } - Writer->addRelocation(Fragment->getParent(), MRE); + Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS, diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 8acd7af..b680db5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -63,6 +63,7 @@ void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) {} void ARMTargetStreamer::emitArch(unsigned Arch) {} +void ARMTargetStreamer::emitArchExtension(unsigned ArchExt) {} void ARMTargetStreamer::emitObjectArch(unsigned Arch) {} void ARMTargetStreamer::emitFPU(unsigned FPU) {} void ARMTargetStreamer::finishAttributeSection() {} diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index d31f1f4..2fd6445 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -8,7 +8,10 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/ARMFixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/Support/COFF.h" @@ -26,14 +29,16 @@ public: virtual ~ARMWinCOFFObjectWriter() { } unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsCrossSection) const override; + bool IsCrossSection, + const MCAsmBackend &MAB) const override; bool recordRelocation(const MCFixup &) const override; }; unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsCrossSection) const { + bool IsCrossSection, + const MCAsmBackend &MAB) const { assert(getMachine() == COFF::IMAGE_FILE_MACHINE_ARMNT && "AArch64 support not yet implemented"); @@ -41,7 +46,10 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); switch (static_cast<unsigned>(Fixup.getKind())) { - default: llvm_unreachable("unsupported relocation type"); + default: { + const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind()); + report_fatal_error(Twine("unsupported relocation type: ") + Info.Name); + } case FK_Data_4: switch (Modifier) { case MCSymbolRefExpr::VK_COFF_IMGREL32: diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 35fe9b3..51e519d 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -381,7 +381,7 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); TRI = Fn.getSubtarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); - const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); + const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>(); isLikeA9 = STI->isLikeA9() || STI->isSwift(); isSwift = STI->isSwift(); diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 6deab4f..7dcc64e 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -52,9 +52,9 @@ void Thumb1FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const Thumb1InstrInfo &TII = - *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo()); - const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>( - MF.getSubtarget().getRegisterInfo()); + *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); + const Thumb1RegisterInfo *RegInfo = + static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo()); if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount @@ -89,15 +89,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>( - MF.getSubtarget().getRegisterInfo()); + const Thumb1RegisterInfo *RegInfo = + static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = - *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo()); + *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); - unsigned Align = MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment(); + unsigned Align = STI.getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); assert(NumBytes >= ArgRegsSaveSize && @@ -124,7 +121,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } if (!AFI->hasStackFrame()) { @@ -135,7 +133,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } return; } @@ -199,7 +198,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { @@ -226,7 +226,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); break; } } @@ -244,13 +245,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, @@ -267,7 +270,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } } @@ -324,15 +328,12 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>( - MF.getSubtarget().getRegisterInfo()); + const Thumb1RegisterInfo *RegInfo = + static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo()); const Thumb1InstrInfo &TII = - *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo()); + *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); - unsigned Align = MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment(); + unsigned Align = STI.getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && @@ -459,8 +460,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, return false; DebugLoc DL; - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -499,7 +499,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI->getDebugLoc(); diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 8ea912e..c24f740 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -44,7 +44,7 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool KillSrc) const { // Need to check the arch. MachineFunction &MF = *MBB.getParent(); - const ARMSubtarget &st = MF.getTarget().getSubtarget<ARMSubtarget>(); + const ARMSubtarget &st = MF.getSubtarget<ARMSubtarget>(); assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && "Thumb1 can only copy GPR registers"); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index c10c809..5e2cbdc 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -71,7 +71,7 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, "Thumb1 does not have ldr to high register"); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); @@ -234,7 +234,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, // If we would emit the copy with an immediate of 0, just use tMOVr. if (CopyOpc && Bytes < CopyScale) { CopyOpc = ARM::tMOVr; - CopyBits = 0; CopyScale = 1; CopyNeedsCC = false; CopyRange = 0; @@ -389,12 +388,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { - const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo *>(MI.getParent() - ->getParent() - ->getTarget() - .getSubtargetImpl() - ->getInstrInfo()); + const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -420,7 +414,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // off the frame pointer (if, for example, there are alloca() calls in // the function, the offset will be negative. Use R12 instead since that's // a call clobbered register that we know won't be used in Thumb1 mode. - const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); DebugLoc DL; AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) .addReg(ARM::R12, RegState::Define) @@ -466,8 +460,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); + const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); MachineInstrBuilder MIB(*MBB.getParent(), &MI); @@ -478,8 +471,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MF.getFrameInfo()->getStackSize() + SPAdj; if (MF.getFrameInfo()->hasVarSizedObjects()) { - assert(SPAdj == 0 && MF.getSubtarget().getFrameLowering()->hasFP(MF) && - "Unexpected"); + assert(SPAdj == 0 && STI.getFrameLowering()->hasFP(MF) && "Unexpected"); // There are alloca()'s in this function, must reference off the frame // pointer or base pointer instead. if (!hasBasePointer(MF)) { @@ -495,10 +487,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // when !hasReservedCallFrame(). #ifndef NDEBUG if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){ - assert(MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->hasReservedCallFrame(MF) && + assert(STI.getFrameLowering()->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); assert(!MF.getFrameInfo()->hasVarSizedObjects() && diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index fdcb522..b657f2d 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -253,12 +253,12 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { } bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { - const TargetMachine &TM = Fn.getTarget(); + const ARMSubtarget &STI = + static_cast<const ARMSubtarget &>(Fn.getSubtarget()); AFI = Fn.getInfo<ARMFunctionInfo>(); - TII = static_cast<const Thumb2InstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); - TRI = TM.getSubtargetImpl()->getRegisterInfo(); - restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT(); + TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); + TRI = STI.getRegisterInfo(); + restrictIT = STI.restrictIT(); if (!AFI->isThumbFunction()) return false; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 91973e1..62c3752 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -574,13 +574,10 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, } } else if (AddrMode == ARMII::AddrModeT2_i8s4) { Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4; - NumBits = 8; - // MCInst operand has already scaled value. + NumBits = 10; // 8 bits scaled by 4 + // MCInst operand expects already scaled value. Scale = 1; - if (Offset < 0) { - isSub = true; - Offset = -Offset; - } + assert((Offset & 3) == 0 && "Can't encode this offset!"); } else { llvm_unreachable("Unsupported addressing mode!"); } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index c51eb8b..2ee908b 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -1001,17 +1001,12 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { - const TargetMachine &TM = MF.getTarget(); - TII = static_cast<const Thumb2InstrInfo *>( - TM.getSubtargetImpl()->getInstrInfo()); - STI = &TM.getSubtarget<ARMSubtarget>(); + STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); + TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo()); // Optimizing / minimizing size? - AttributeSet FnAttrs = MF.getFunction()->getAttributes(); - OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize); - MinimizeSize = - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + OptimizeSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + MinimizeSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs()); |