diff options
Diffstat (limited to 'lib/Target/ARM')
90 files changed, 3595 insertions, 4423 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 92eaf9e..387f1f6 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -34,6 +34,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <map> #include <set> using namespace llvm; @@ -676,8 +678,8 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { } bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { - TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); - TRI = Fn.getTarget().getRegisterInfo(); + TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); + TRI = Fn.getSubtarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); bool Modified = false; diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 55df29c..02db53a 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef TARGET_ARM_H -#define TARGET_ARM_H +#ifndef LLVM_LIB_TARGET_ARM_ARM_H +#define LLVM_LIB_TARGET_ARM_ARM_H #include "llvm/Support/CodeGen.h" @@ -23,7 +23,6 @@ class ARMAsmPrinter; class ARMBaseTargetMachine; class FunctionPass; class ImmutablePass; -class JITCodeEmitter; class MachineInstr; class MCInst; class TargetLowering; @@ -31,10 +30,6 @@ class TargetMachine; FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); - -FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, - JITCodeEmitter &JCE); - FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 25385a6..80b976b 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -228,6 +228,15 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", FeatureAvoidPartialCPSR, FeatureTrustZone, FeatureVirtualization]>; +def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", + "Cortex-A17 ARM processors", + [FeatureVMLxForwarding, + FeatureT2XtPk, FeatureVFP4, + FeatureHWDiv, FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureVirtualization, + FeatureTrustZone]>; + def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", "Cortex-A53 ARM processors", [FeatureHWDiv, FeatureHWDivARM, @@ -351,12 +360,8 @@ def : ProcessorModel<"cortex-a8", CortexA8Model, FeatureAClass]>; def : ProcessorModel<"cortex-a9", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS, + FeatureDSPThumb2, FeatureHasRAS, FeatureMP, FeatureAClass]>; -def : ProcessorModel<"cortex-a9-mp", CortexA9Model, - [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureMP, - FeatureHasRAS, FeatureAClass]>; // FIXME: A12 has currently the same Schedule model as A9 def : ProcessorModel<"cortex-a12", CortexA9Model, @@ -370,6 +375,12 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, FeatureDSPThumb2, FeatureHasRAS, FeatureAClass]>; +// FIXME: A17 has currently the same Schedule model as A9 +def : ProcessorModel<"cortex-a17", CortexA9Model, + [ProcA17, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureMP, + FeatureHasRAS, FeatureAClass]>; + // FIXME: krait has currently the same Schedule model as A9 def : ProcessorModel<"krait", CortexA9Model, [ProcKrait, HasV7Ops, @@ -396,6 +407,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureD16, FeatureMClass]>; +def : ProcNoItin<"cortex-m7", [HasV7Ops, + FeatureThumb2, FeatureNoARM, FeatureDB, + FeatureHWDiv, FeatureDSPThumb2, + FeatureT2XtPk, FeatureFPARMv8, + FeatureD16, FeatureMClass]>; + // Swift uArch Processors. def : ProcessorModel<"swift", SwiftModel, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 28d2610..695fd4d 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -76,7 +76,8 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { } void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { - uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); + uint64_t Size = + TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType()); assert(Size && "C++ constructor pointer had zero size!"); const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); @@ -136,7 +137,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, assert(!MO.getSubReg() && "Subregs should be eliminated!"); if(ARM::GPRPairRegClass.contains(Reg)) { const MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); Reg = TRI->getSubReg(Reg, ARM::gsub_0); } O << ARMInstPrinter::getRegisterName(Reg); @@ -182,7 +183,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, MCSymbol *ARMAsmPrinter:: GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); SmallString<60> Name; raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '_' << uid2; @@ -191,7 +192,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); SmallString<60> Name; raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH" << getFunctionNumber(); @@ -229,7 +230,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, case 'y': // Print a VFP single precision register as indexed double. if (MI->getOperand(OpNum).isReg()) { unsigned Reg = MI->getOperand(OpNum).getReg(); - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); // Find the 'd' register that has this 's' register as a sub-register, // and determine the lane number. for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) { @@ -261,7 +262,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // inline asm statement. O << "{"; if (ARM::GPRPairRegClass.contains(RegBegin)) { - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); O << ARMInstPrinter::getRegisterName(Reg0) << ", "; RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); @@ -317,7 +318,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); if (!MO.isReg()) return true; - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ? ARM::gsub_0 : ARM::gsub_1); O << ARMInstPrinter::getRegisterName(Reg); @@ -343,7 +344,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned Reg = MI->getOperand(OpNum).getReg(); if (!ARM::QPRRegClass.contains(Reg)) return true; - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? ARM::dsub_0 : ARM::dsub_1); O << ARMInstPrinter::getRegisterName(SubReg); @@ -358,7 +359,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (!MO.isReg()) return true; const MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned Reg = MO.getReg(); if(!ARM::GPRPairRegClass.contains(Reg)) return false; @@ -478,6 +479,9 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // Emit ARM Build Attributes if (Subtarget->isTargetELF()) emitAttributes(); + + if (!M.getModuleInlineAsm().empty() && Subtarget->isThumb()) + OutStreamer.EmitAssemblerFlag(MCAF_Code16); } static void @@ -558,7 +562,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); if (!Stubs.empty()) { OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); for (auto &stub: Stubs) { OutStreamer.EmitLabel(stub.first); @@ -663,7 +667,9 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::AllowNeonARMv8); } else { if (Subtarget->hasFPARMv8()) - ATS.emitFPU(ARM::FP_ARMV8); + // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one + // FPU, but there are two different names for it depending on the CPU. + ATS.emitFPU(Subtarget->hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8); else if (Subtarget->hasVFP4()) ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4); else if (Subtarget->hasVFP3()) @@ -700,6 +706,13 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::AllowIEE754); + if (Subtarget->allowsUnalignedMem()) + ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Allowed); + else + ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Not_Allowed); + // FIXME: add more flags to ARMBuildAttributes.h // 8-bytes alignment stuff. ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1); @@ -757,6 +770,17 @@ void ARMAsmPrinter::emitAttributes() { } } + // TODO: We currently only support either reserving the register, or treating + // it as another callee-saved register, but not as SB or a TLS pointer; It + // would instead be nicer to push this from the frontend as metadata, as we do + // for the wchar and enum size tags + if (Subtarget->isR9Reserved()) + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, + ARMBuildAttrs::R9Reserved); + else + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, + ARMBuildAttrs::R9IsGPR); + if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization()) ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZVirtualization); @@ -834,8 +858,9 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, void ARMAsmPrinter:: EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { - const DataLayout *DL = TM.getDataLayout(); - int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType()); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + int Size = + TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); @@ -1013,7 +1038,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { MCTargetStreamer &TS = *OutStreamer.getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); const MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -1151,7 +1176,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { #include "ARMGenMCPseudoLowering.inc" void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); // If we just ended a constant pool, mark it as such. if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) { @@ -1567,6 +1592,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitJumpTable(MI); return; } + case ARM::SPACE: + OutStreamer.EmitZeros(MI->getOperand(1).getImm()); + return; case ARM::TRAP: { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 7c103c6..5ff20ce 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMASMPRINTER_H -#define ARMASMPRINTER_H +#ifndef LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H +#define LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H #include "ARMSubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0288db9..7a315c4 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -108,7 +108,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const { if (usePreRAHazardRecognizer()) { const InstrItineraryData *II = - &static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); + static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); } return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); @@ -518,6 +518,42 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, return Found; } +static bool isCPSRDefined(const MachineInstr *MI) { + for (const auto &MO : MI->operands()) + if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef()) + return true; + return false; +} + +static bool isEligibleForITBlock(const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return true; + case ARM::tADC: // ADC (register) T1 + case ARM::tADDi3: // ADD (immediate) T1 + case ARM::tADDi8: // ADD (immediate) T2 + case ARM::tADDrr: // ADD (register) T1 + case ARM::tAND: // AND (register) T1 + case ARM::tASRri: // ASR (immediate) T1 + case ARM::tASRrr: // ASR (register) T1 + case ARM::tBIC: // BIC (register) T1 + case ARM::tEOR: // EOR (register) T1 + case ARM::tLSLri: // LSL (immediate) T1 + case ARM::tLSLrr: // LSL (register) T1 + case ARM::tLSRri: // LSR (immediate) T1 + case ARM::tLSRrr: // LSR (register) T1 + case ARM::tMUL: // MUL T1 + case ARM::tMVN: // MVN (register) T1 + case ARM::tORR: // ORR (register) T1 + case ARM::tROR: // ROR (register) T1 + case ARM::tRSB: // RSB (immediate) T1 + case ARM::tSBC: // SBC (register) T1 + case ARM::tSUBi3: // SUB (immediate) T1 + case ARM::tSUBi8: // SUB (immediate) T2 + case ARM::tSUBrr: // SUB (register) T1 + return !isCPSRDefined(MI); + } +} + /// isPredicable - Return true if the specified instruction can be predicated. /// By default, this returns true for every instruction with a /// PredicateOperand. @@ -525,6 +561,9 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { if (!MI->isPredicable()) return false; + if (!isEligibleForITBlock(MI)) + return false; + ARMFunctionInfo *AFI = MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); @@ -555,16 +594,6 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { } } -/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. -LLVM_ATTRIBUTE_NOINLINE -static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, - unsigned JTI); -static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, - unsigned JTI) { - assert(JTI < JT.size()); - return JT[JTI].MBBs.size(); -} - /// GetInstSize - Return the size of the specified MachineInstr. /// unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { @@ -637,7 +666,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // bytes, we can use 16-bit entries instead. Then there won't be an // alignment issue. unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; - unsigned NumEntries = getNumJTEntries(JT, JTI); + unsigned NumEntries = JT[JTI].MBBs.size(); if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) // Make sure the instruction that follows TBB is 2-byte aligned. // FIXME: Constant island pass should insert an "ALIGN" instruction @@ -645,6 +674,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { ++NumEntries; return NumEntries * EntrySize + InstSize; } + case ARM::SPACE: + return MI->getOperand(1).getImm(); } } @@ -659,6 +690,49 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { return Size; } +void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, bool KillSrc, + const ARMSubtarget &Subtarget) const { + unsigned Opc = Subtarget.isThumb() + ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) + : ARM::MRS; + + MachineInstrBuilder MIB = + BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); + + // There is only 1 A/R class MRS instruction, and it always refers to + // APSR. However, there are lots of other possibilities on M-class cores. + if (Subtarget.isMClass()) + MIB.addImm(0x800); + + AddDefaultPred(MIB); + + MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); +} + +void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool KillSrc, + const ARMSubtarget &Subtarget) const { + unsigned Opc = Subtarget.isThumb() + ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) + : ARM::MSR; + + MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); + + if (Subtarget.isMClass()) + MIB.addImm(0x800); + else + MIB.addImm(8); + + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + + AddDefaultPred(MIB); + + MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define); +} + void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -682,7 +756,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVRS; else if (SPRDest && GPRSrc) Opc = ARM::VMOVSR; - else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) + else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP()) Opc = ARM::VMOVD; else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) Opc = ARM::VORRq; @@ -742,6 +816,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BeginIdx = ARM::dsub_0; SubRegs = 4; Spacing = 2; + } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) { + Opc = ARM::VMOVS; + BeginIdx = ARM::ssub_0; + SubRegs = 2; + } else if (SrcReg == ARM::CPSR) { + copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); + return; + } else if (DestReg == ARM::CPSR) { + copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); + return; } assert(Opc && "Impossible reg-to-reg copy"); @@ -1174,12 +1258,26 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); } -bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ +bool +ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + MachineFunction &MF = *MI->getParent()->getParent(); + Reloc::Model RM = MF.getTarget().getRelocationModel(); + + if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { + assert(getSubtarget().getTargetTriple().getObjectFormat() == + Triple::MachO && + "LOAD_STACK_GUARD currently supported only for MachO."); + expandLoadStackGuard(MI, RM); + MI->getParent()->erase(MI); + return true; + } + // This hook gets to expand COPY instructions before they become // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) + if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() || + Subtarget.isFPOnlySP()) return false; // Look for a copy between even S-registers. That is where we keep floats @@ -2832,7 +2930,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, // FIXME: The current MachineInstr design does not support relying on machine // mem operands to determine the width of a memory access. Instead, we expect // the target to provide this information based on the instruction opcode and -// operands. However, using MachineMemOperand is a the best solution now for +// operands. However, using MachineMemOperand is the best solution now for // two reasons: // // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI @@ -3933,6 +4031,38 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, return true; } +// LoadStackGuard has so far only been implemented for MachO. Different code +// sequence is needed for other targets. +void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, + unsigned LoadImmOpc, + unsigned LoadOpc, + Reloc::Model RM) const { + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Reg = MI->getOperand(0).getReg(); + const GlobalValue *GV = + cast<GlobalValue>((*MI->memoperands_begin())->getValue()); + MachineInstrBuilder MIB; + + BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) + .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); + + if (Subtarget.GVIsIndirectSymbol(GV, RM)) { + MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); + MIB.addReg(Reg, RegState::Kill).addImm(0); + unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + MachineMemOperand *MMO = MBB.getParent()-> + getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4); + MIB.addMemOperand(MMO); + AddDefaultPred(MIB); + } + + MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); + MIB.addReg(Reg, RegState::Kill).addImm(0); + MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + AddDefaultPred(MIB); +} + bool ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, unsigned &AddSubOpc, @@ -4361,29 +4491,6 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, MI->addRegisterKilled(DReg, TRI, true); } -void ARMBaseInstrInfo::getUnconditionalBranch( - MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const { - if (Subtarget.isThumb()) - Branch.setOpcode(ARM::tB); - else if (Subtarget.isThumb2()) - Branch.setOpcode(ARM::t2B); - else - Branch.setOpcode(ARM::Bcc); - - Branch.addOperand(MCOperand::CreateExpr(BranchTarget)); - Branch.addOperand(MCOperand::CreateImm(ARMCC::AL)); - Branch.addOperand(MCOperand::CreateReg(0)); -} - -void ARMBaseInstrInfo::getTrap(MCInst &MI) const { - if (Subtarget.isThumb()) - MI.setOpcode(ARM::tTRAP); - else if (Subtarget.useNaClTrap()) - MI.setOpcode(ARM::TRAPNaCl); - else - MI.setOpcode(ARM::TRAP); -} - bool ARMBaseInstrInfo::hasNOP() const { return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; } @@ -4401,3 +4508,72 @@ bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { return false; } + +bool ARMBaseInstrInfo::getRegSequenceLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { + assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); + assert(MI.isRegSequenceLike() && "Invalid kind of instruction"); + + switch (MI.getOpcode()) { + case ARM::VMOVDRR: + // dX = VMOVDRR rY, rZ + // is the same as: + // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 + // Populate the InputRegs accordingly. + // rY + const MachineOperand *MOReg = &MI.getOperand(1); + InputRegs.push_back( + RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0)); + // rZ + MOReg = &MI.getOperand(2); + InputRegs.push_back( + RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1)); + return true; + } + llvm_unreachable("Target dependent opcode missing"); +} + +bool ARMBaseInstrInfo::getExtractSubregLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const { + assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); + assert(MI.isExtractSubregLike() && "Invalid kind of instruction"); + + switch (MI.getOpcode()) { + case ARM::VMOVRRD: + // rX, rY = VMOVRRD dZ + // is the same as: + // rX = EXTRACT_SUBREG dZ, ssub_0 + // rY = EXTRACT_SUBREG dZ, ssub_1 + const MachineOperand &MOReg = MI.getOperand(2); + InputReg.Reg = MOReg.getReg(); + InputReg.SubReg = MOReg.getSubReg(); + InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; + return true; + } + llvm_unreachable("Target dependent opcode missing"); +} + +bool ARMBaseInstrInfo::getInsertSubregLikeInputs( + const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, + RegSubRegPairAndIdx &InsertedReg) const { + assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); + assert(MI.isInsertSubregLike() && "Invalid kind of instruction"); + + switch (MI.getOpcode()) { + case ARM::VSETLNi32: + // dX = VSETLNi32 dY, rZ, imm + const MachineOperand &MOBaseReg = MI.getOperand(1); + const MachineOperand &MOInsertedReg = MI.getOperand(2); + const MachineOperand &MOIndex = MI.getOperand(3); + BaseReg.Reg = MOBaseReg.getReg(); + BaseReg.SubReg = MOBaseReg.getSubReg(); + + InsertedReg.Reg = MOInsertedReg.getReg(); + InsertedReg.SubReg = MOInsertedReg.getSubReg(); + InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1; + return true; + } + llvm_unreachable("Target dependent opcode missing"); +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b8d6758..0ae291b 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMBASEINSTRUCTIONINFO_H -#define ARMBASEINSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H +#define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER @@ -34,6 +35,57 @@ protected: // Can be only subclassed. explicit ARMBaseInstrInfo(const ARMSubtarget &STI); + void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, + unsigned LoadImmOpc, unsigned LoadOpc, + Reloc::Model RM) const; + + /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI + /// and \p DefIdx. + /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of + /// the list is modeled as <Reg:SubReg, SubIdx>. + /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce + /// two elements: + /// - vreg1:sub1, sub0 + /// - vreg2<:0>, sub1 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isRegSequenceLike(). + bool getRegSequenceLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const override; + + /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI + /// and \p DefIdx. + /// \p [out] InputReg of the equivalent EXTRACT_SUBREG. + /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce: + /// - vreg1:sub1, sub0 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isExtractSubregLike(). + bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const override; + + /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI + /// and \p DefIdx. + /// \p [out] BaseReg and \p [out] InsertedReg contain + /// the equivalent inputs of INSERT_SUBREG. + /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce: + /// - BaseReg: vreg0:sub0 + /// - InsertedReg: vreg1:sub1, sub3 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isInsertSubregLike(). + bool + getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPair &BaseReg, + RegSubRegPairAndIdx &InsertedReg) const override; + public: // Return whether the target has an explicit NOP encoding. bool hasNOP() const; @@ -104,6 +156,13 @@ public: unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const override; + void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool KillSrc, + const ARMSubtarget &Subtarget) const; + void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, bool KillSrc, + const ARMSubtarget &Subtarget) const; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; @@ -230,12 +289,6 @@ public: void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned, const TargetRegisterInfo *TRI) const override; - void - getUnconditionalBranch(MCInst &Branch, - const MCSymbolRefExpr *BranchTarget) const override; - - void getTrap(MCInst &MI) const override; - /// Get the number of addresses by LDM or VLDM or zero for unknown. unsigned getNumLDMAddresses(const MachineInstr *MI) const; @@ -286,6 +339,9 @@ private: bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const override; + virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const = 0; + private: /// Modeling special VFP / NEON fp MLA / MLS hazards. diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index cdd91c7..6dc0493 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -38,6 +38,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#define DEBUG_TYPE "arm-register-info" + #define GET_REGINFO_TARGET_DESC #include "ARMGenRegisterInfo.inc" @@ -121,7 +123,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); @@ -180,14 +182,14 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind const TargetRegisterClass * ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &ARM::CCRRegClass) - return nullptr; // Can't copy CCR registers. + return &ARM::rGPRRegClass; // Can't copy CCR registers. return RC; } unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); switch (RC->getID()) { default: @@ -309,7 +311,7 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the @@ -354,7 +356,10 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. - if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) + if (MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->hasReservedCallFrame(MF)) return true; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. @@ -365,7 +370,10 @@ bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, @@ -385,7 +393,7 @@ cannotEliminateFrame(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); if (TFI->hasFP(MF)) return FramePtr; @@ -402,7 +410,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); @@ -415,11 +423,6 @@ emitLoadConstPool(MachineBasicBlock &MBB, .setMIFlags(MIFlags); } -bool ARMBaseRegisterInfo::mayOverrideLocalAssignment() const { - // The native linux build hits a downstream codegen bug when this is enabled. - return STI.isTargetDarwin(); -} - bool ARMBaseRegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { return true; @@ -529,7 +532,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -582,7 +585,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, int64_t Offset) const { ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : - (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri); + (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri); MachineBasicBlock::iterator Ins = MBB->begin(); DebugLoc DL; // Defaults to "unknown" @@ -591,15 +594,15 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, const MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); - MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) - .addFrameIndex(FrameIdx).addImm(Offset)); + MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); if (!AFI->isThumb1OnlyFunction()) - AddDefaultCC(MIB); + AddDefaultCC(AddDefaultPred(MIB)); } void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, @@ -607,7 +610,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -706,9 +709,9 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); - const ARMFrameLowering *TFI = - static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); + const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>( + MF.getSubtarget().getFrameLowering()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); @@ -775,3 +778,60 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true); } } + +bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC) const { + auto MBB = MI->getParent(); + auto MF = MBB->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + // If not copying into a sub-register this should be ok because we shouldn't + // need to split the reg. + if (!DstSubReg) + return true; + // Small registers don't frequently cause a problem, so we can coalesce them. + if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32) + return true; + + auto NewRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(NewRC); + auto SrcRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(SrcRC); + auto DstRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(DstRC); + // If the source register class is more expensive than the destination, the + // coalescing is probably profitable. + if (SrcRCWeight.RegWeight > NewRCWeight.RegWeight) + return true; + if (DstRCWeight.RegWeight > NewRCWeight.RegWeight) + return true; + + // If the register allocator isn't constrained, we can always allow coalescing + // unfortunately we don't know yet if we will be constrained. + // The goal of this heuristic is to restrict how many expensive registers + // we allow to coalesce in a given basic block. + auto AFI = MF->getInfo<ARMFunctionInfo>(); + auto It = AFI->getCoalescedWeight(MBB); + + DEBUG(dbgs() << "\tARM::shouldCoalesce - Coalesced Weight: " + << It->second << "\n"); + DEBUG(dbgs() << "\tARM::shouldCoalesce - Reg Weight: " + << NewRCWeight.RegWeight << "\n"); + + // This number is the largest round number that which meets the criteria: + // (1) addresses PR18825 + // (2) generates better code in some test cases (like vldm-shed-a9.ll) + // (3) Doesn't regress any test cases (in-tree, test-suite, and SPEC) + // In practice the SizeMultiplier will only factor in for straight line code + // that uses a lot of NEON vectors, which isn't terribly common. + unsigned SizeMultiplier = MBB->size()/100; + SizeMultiplier = SizeMultiplier ? SizeMultiplier : 1; + if (It->second < NewRCWeight.WeightLimit * SizeMultiplier) { + It->second += NewRCWeight.RegWeight; + return true; + } + return false; +} diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 91df565..e9bc412 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMBASEREGISTERINFO_H -#define ARMBASEREGISTERINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H +#define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -174,8 +174,6 @@ public: unsigned MIFlags = MachineInstr::NoFlags)const; /// Code Generation virtual methods... - bool mayOverrideLocalAssignment() const override; - bool requiresRegisterScavenging(const MachineFunction &MF) const override; bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; @@ -187,6 +185,14 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; + + /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true + bool shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC) const override; }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index dc41c1c..bd07236 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMCALLINGCONV_H -#define ARMCALLINGCONV_H +#ifndef LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H +#define LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H #include "ARM.h" #include "ARMBaseInstrInfo.h" @@ -177,8 +177,9 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs(); + // AAPCS HFAs must have 1-4 elements, all of the same type - assert(PendingHAMembers.size() < 8); + assert(PendingHAMembers.size() < 4); if (PendingHAMembers.size() > 0) assert(PendingHAMembers[0].getLocVT() == LocVT); @@ -188,7 +189,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); if (ArgFlags.isInConsecutiveRegsLast()) { - assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 && + assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 && "Homogeneous aggregates must have between 1 and 4 members"); // Try to allocate a contiguous block of registers, each of the correct @@ -196,7 +197,6 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, const uint16_t *RegList; unsigned NumRegs; switch (LocVT.SimpleTy) { - case MVT::i32: case MVT::f32: RegList = SRegList; NumRegs = 16; @@ -235,20 +235,11 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State.AllocateReg(SRegList[regNo]); unsigned Size = LocVT.getSizeInBits() / 8; - unsigned Align = Size; - - if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) { - // Vectors are always aligned to 8 bytes. If we've seen an i32 here - // it's because it's been split from a larger type, also with align 8. - Align = 8; - } + unsigned Align = std::min(Size, 8U); for (auto It : PendingHAMembers) { It.convertToMem(State.AllocateStack(Size, Align)); State.addLoc(It); - - // Only the first member needs to be aligned. - Align = 1; } // All pending members have now been allocated diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp deleted file mode 100644 index 5fb6ebf..0000000 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ /dev/null @@ -1,1909 +0,0 @@ -//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the pass that transforms the ARM machine instructions into -// relocatable machine code. -// -//===----------------------------------------------------------------------===// - -#include "ARM.h" -#include "ARMBaseInstrInfo.h" -#include "ARMConstantPoolValue.h" -#include "ARMMachineFunctionInfo.h" -#include "ARMRelocations.h" -#include "ARMSubtarget.h" -#include "ARMTargetMachine.h" -#include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/PassManager.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#ifndef NDEBUG -#include <iomanip> -#endif -using namespace llvm; - -#define DEBUG_TYPE "jit" - -STATISTIC(NumEmitted, "Number of machine instructions emitted"); - -namespace { - - class ARMCodeEmitter : public MachineFunctionPass { - ARMJITInfo *JTI; - const ARMBaseInstrInfo *II; - const DataLayout *TD; - const ARMSubtarget *Subtarget; - TargetMachine &TM; - JITCodeEmitter &MCE; - MachineModuleInfo *MMI; - const std::vector<MachineConstantPoolEntry> *MCPEs; - const std::vector<MachineJumpTableEntry> *MJTEs; - bool IsPIC; - bool IsThumb; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineModuleInfo>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - static char ID; - public: - ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(ID), JTI(nullptr), - II((const ARMBaseInstrInfo *)tm.getInstrInfo()), - TD(tm.getDataLayout()), TM(tm), - MCE(mce), MCPEs(nullptr), MJTEs(nullptr), - IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} - - /// getBinaryCodeForInstr - This function, generated by the - /// CodeEmitterGenerator using TableGen, produces the binary encoding for - /// machine instructions. - uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; - - bool runOnMachineFunction(MachineFunction &MF) override; - - const char *getPassName() const override { - return "ARM Machine Code Emitter"; - } - - void emitInstruction(const MachineInstr &MI); - - private: - - void emitWordLE(unsigned Binary); - void emitDWordLE(uint64_t Binary); - void emitConstPoolInstruction(const MachineInstr &MI); - void emitMOVi32immInstruction(const MachineInstr &MI); - void emitMOVi2piecesInstruction(const MachineInstr &MI); - void emitLEApcrelJTInstruction(const MachineInstr &MI); - void emitPseudoMoveInstruction(const MachineInstr &MI); - void addPCLabel(unsigned LabelID); - void emitPseudoInstruction(const MachineInstr &MI); - unsigned getMachineSoRegOpValue(const MachineInstr &MI, - const MCInstrDesc &MCID, - const MachineOperand &MO, - unsigned OpIdx); - - unsigned getMachineSoImmOpValue(unsigned SoImm); - unsigned getAddrModeSBit(const MachineInstr &MI, - const MCInstrDesc &MCID) const; - - void emitDataProcessingInstruction(const MachineInstr &MI, - unsigned ImplicitRd = 0, - unsigned ImplicitRn = 0); - - void emitLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRd = 0, - unsigned ImplicitRn = 0); - - void emitMiscLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRn = 0); - - void emitLoadStoreMultipleInstruction(const MachineInstr &MI); - - void emitMulFrmInstruction(const MachineInstr &MI); - - void emitExtendInstruction(const MachineInstr &MI); - - void emitMiscArithInstruction(const MachineInstr &MI); - - void emitSaturateInstruction(const MachineInstr &MI); - - void emitBranchInstruction(const MachineInstr &MI); - - void emitInlineJumpTable(unsigned JTIndex); - - void emitMiscBranchInstruction(const MachineInstr &MI); - - void emitVFPArithInstruction(const MachineInstr &MI); - - void emitVFPConversionInstruction(const MachineInstr &MI); - - void emitVFPLoadStoreInstruction(const MachineInstr &MI); - - void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI); - - void emitNEONLaneInstruction(const MachineInstr &MI); - void emitNEONDupInstruction(const MachineInstr &MI); - void emitNEON1RegModImmInstruction(const MachineInstr &MI); - void emitNEON2RegInstruction(const MachineInstr &MI); - void emitNEON3RegInstruction(const MachineInstr &MI); - - /// getMachineOpValue - Return binary encoding of operand. If the machine - /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const; - unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const { - return getMachineOpValue(MI, MI.getOperand(OpIdx)); - } - - // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the - // TableGen'erated getBinaryCodeForInstr() function to encode any - // operand values, instead querying getMachineOpValue() directly for - // each operand it needs to encode. Thus, any of the new encoder - // helper functions can simply return 0 as the values the return - // are already handled elsewhere. They are placeholders to allow this - // encoder to continue to function until the MC encoder is sufficiently - // far along that this one can be eliminated entirely. - unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) - const { return 0; } - unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) - const { return 0; } - unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) - const { return 0; } - unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val) - const { return 0; } - unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) - const { return 0; } - unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI, - unsigned Op) const { return 0; } - unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op) - const { return 0; } - unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI, - unsigned Op) - const { return 0; } - unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI, - unsigned Op) const { return 0; } - uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx) - const { return 0; } - - unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op) - const { - // {17-13} = reg - // {12} = (U)nsigned (add == '1', sub == '0') - // {11-0} = imm12 - const MachineOperand &MO = MI.getOperand(Op); - const MachineOperand &MO1 = MI.getOperand(Op + 1); - if (!MO.isReg()) { - emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); - return 0; - } - unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); - int32_t Imm12 = MO1.getImm(); - uint32_t Binary; - Binary = Imm12 & 0xfff; - if (Imm12 >= 0) - Binary |= (1 << 12); - Binary |= (Reg << 13); - return Binary; - } - - unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const { - return 0; - } - - uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) - const { return 0;} - uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx) - const { return 0;} - uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) - const { return 0;} - uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const { - // {17-13} = reg - // {12} = (U)nsigned (add == '1', sub == '0') - // {11-0} = imm12 - const MachineOperand &MO = MI.getOperand(Op); - const MachineOperand &MO1 = MI.getOperand(Op + 1); - if (!MO.isReg()) { - emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); - return 0; - } - unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); - int32_t Imm12 = MO1.getImm(); - - // Special value for #-0 - if (Imm12 == INT32_MIN) - Imm12 = 0; - - // Immediate is always encoded as positive. The 'U' bit controls add vs - // sub. - bool isAdd = true; - if (Imm12 < 0) { - Imm12 = -Imm12; - isAdd = false; - } - - uint32_t Binary = Imm12 & 0xfff; - if (isAdd) - Binary |= (1 << 12); - Binary |= (Reg << 13); - return Binary; - } - unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - - unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - - unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op) - const { return 0; } - - /// getMovi32Value - Return binary encoding of operand for movw/movt. If the - /// machine operand requires relocation, record the relocation and return - /// zero. - unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, - unsigned Reloc); - - /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. - /// - unsigned getShiftOp(unsigned Imm) const ; - - /// Routines that handle operands which add machine relocations which are - /// fixed up by the relocation stage. - void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub, bool Indirect, - intptr_t ACPV = 0) const; - void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; - void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; - void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; - void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, - intptr_t JTBase = 0) const; - unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const; - unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const; - unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const; - unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const; - unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const; - unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const; - }; -} - -char ARMCodeEmitter::ID = 0; - -/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM -/// code to the specified MCE object. -FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, - JITCodeEmitter &JCE) { - return new ARMCodeEmitter(TM, JCE); -} - -bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { - TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget()); - - assert((Target.getRelocationModel() != Reloc::Default || - Target.getRelocationModel() != Reloc::Static) && - "JIT relocation model must be set to static or default!"); - - JTI = static_cast<ARMJITInfo*>(Target.getJITInfo()); - II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo()); - TD = Target.getDataLayout(); - - Subtarget = &TM.getSubtarget<ARMSubtarget>(); - MCPEs = &MF.getConstantPool()->getConstants(); - MJTEs = nullptr; - if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); - IsPIC = TM.getRelocationModel() == Reloc::PIC_; - IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction(); - JTI->Initialize(MF, IsPIC); - MMI = &getAnalysis<MachineModuleInfo>(); - MCE.setModuleInfo(MMI); - - do { - DEBUG(errs() << "JITTing function '" - << MF.getName() << "'\n"); - MCE.startFunction(MF); - for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); - MBB != E; ++MBB) { - MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) - emitInstruction(*I); - } - } while (MCE.finishFunction(MF)); - - return false; -} - -/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. -/// -unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { - switch (ARM_AM::getAM2ShiftOpc(Imm)) { - default: llvm_unreachable("Unknown shift opc!"); - case ARM_AM::asr: return 2; - case ARM_AM::lsl: return 0; - case ARM_AM::lsr: return 1; - case ARM_AM::ror: - case ARM_AM::rrx: return 3; - } -} - -/// getMovi32Value - Return binary encoding of operand for movw/movt. If the -/// machine operand requires relocation, record the relocation and return zero. -unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, - const MachineOperand &MO, - unsigned Reloc) { - assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw)) - && "Relocation to this function should be for movt or movw"); - - if (MO.isImm()) - return static_cast<unsigned>(MO.getImm()); - else if (MO.isGlobal()) - emitGlobalAddress(MO.getGlobal(), Reloc, true, false); - else if (MO.isSymbol()) - emitExternalSymbolAddress(MO.getSymbolName(), Reloc); - else if (MO.isMBB()) - emitMachineBasicBlock(MO.getMBB(), Reloc); - else { -#ifndef NDEBUG - errs() << MO; -#endif - llvm_unreachable("Unsupported operand type for movw/movt"); - } - return 0; -} - -/// getMachineOpValue - Return binary encoding of operand. If the machine -/// operand requires relocation, record the relocation and return zero. -unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const { - if (MO.isReg()) - return II->getRegisterInfo().getEncodingValue(MO.getReg()); - else if (MO.isImm()) - return static_cast<unsigned>(MO.getImm()); - else if (MO.isGlobal()) - emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false); - else if (MO.isSymbol()) - emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch); - else if (MO.isCPI()) { - const MCInstrDesc &MCID = MI.getDesc(); - // For VFP load, the immediate offset is multiplied by 4. - unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) - ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry; - emitConstPoolAddress(MO.getIndex(), Reloc); - } else if (MO.isJTI()) - emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative); - else if (MO.isMBB()) - emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch); - else - llvm_unreachable("Unable to encode MachineOperand!"); - return 0; -} - -/// emitGlobalAddress - Emit the specified address to the code stream. -/// -void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub, bool Indirect, - intptr_t ACPV) const { - MachineRelocation MR = Indirect - ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - const_cast<GlobalValue *>(GV), - ACPV, MayNeedFarStub) - : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - const_cast<GlobalValue *>(GV), ACPV, - MayNeedFarStub); - MCE.addRelocation(MR); -} - -/// emitExternalSymbolAddress - Arrange for the address of an external symbol to -/// be emitted to the current location in the function, and allow it to be PC -/// relative. -void ARMCodeEmitter:: -emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { - MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), - Reloc, ES)); -} - -/// emitConstPoolAddress - Arrange for the address of an constant pool -/// to be emitted to the current location in the function, and allow it to be PC -/// relative. -void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { - // Tell JIT emitter we'll resolve the address. - MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), - Reloc, CPI, 0, true)); -} - -/// emitJumpTableAddress - Arrange for the address of a jump table to -/// be emitted to the current location in the function, and allow it to be PC -/// relative. -void ARMCodeEmitter:: -emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { - MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), - Reloc, JTIndex, 0, true)); -} - -/// emitMachineBasicBlock - Emit the specified address basic block. -void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, - unsigned Reloc, - intptr_t JTBase) const { - MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), - Reloc, BB, JTBase)); -} - -void ARMCodeEmitter::emitWordLE(unsigned Binary) { - DEBUG(errs() << " 0x"; - errs().write_hex(Binary) << "\n"); - MCE.emitWordLE(Binary); -} - -void ARMCodeEmitter::emitDWordLE(uint64_t Binary) { - DEBUG(errs() << " 0x"; - errs().write_hex(Binary) << "\n"); - MCE.emitDWordLE(Binary); -} - -void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { - DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); - - MCE.processDebugLoc(MI.getDebugLoc(), true); - - ++NumEmitted; // Keep track of the # of mi's emitted - switch (MI.getDesc().TSFlags & ARMII::FormMask) { - default: { - llvm_unreachable("Unhandled instruction encoding format!"); - } - case ARMII::MiscFrm: - if (MI.getOpcode() == ARM::LEApcrelJT) { - // Materialize jumptable address. - emitLEApcrelJTInstruction(MI); - break; - } - llvm_unreachable("Unhandled instruction encoding!"); - case ARMII::Pseudo: - emitPseudoInstruction(MI); - break; - case ARMII::DPFrm: - case ARMII::DPSoRegFrm: - emitDataProcessingInstruction(MI); - break; - case ARMII::LdFrm: - case ARMII::StFrm: - emitLoadStoreInstruction(MI); - break; - case ARMII::LdMiscFrm: - case ARMII::StMiscFrm: - emitMiscLoadStoreInstruction(MI); - break; - case ARMII::LdStMulFrm: - emitLoadStoreMultipleInstruction(MI); - break; - case ARMII::MulFrm: - emitMulFrmInstruction(MI); - break; - case ARMII::ExtFrm: - emitExtendInstruction(MI); - break; - case ARMII::ArithMiscFrm: - emitMiscArithInstruction(MI); - break; - case ARMII::SatFrm: - emitSaturateInstruction(MI); - break; - case ARMII::BrFrm: - emitBranchInstruction(MI); - break; - case ARMII::BrMiscFrm: - emitMiscBranchInstruction(MI); - break; - // VFP instructions. - case ARMII::VFPUnaryFrm: - case ARMII::VFPBinaryFrm: - emitVFPArithInstruction(MI); - break; - case ARMII::VFPConv1Frm: - case ARMII::VFPConv2Frm: - case ARMII::VFPConv3Frm: - case ARMII::VFPConv4Frm: - case ARMII::VFPConv5Frm: - emitVFPConversionInstruction(MI); - break; - case ARMII::VFPLdStFrm: - emitVFPLoadStoreInstruction(MI); - break; - case ARMII::VFPLdStMulFrm: - emitVFPLoadStoreMultipleInstruction(MI); - break; - - // NEON instructions. - case ARMII::NGetLnFrm: - case ARMII::NSetLnFrm: - emitNEONLaneInstruction(MI); - break; - case ARMII::NDupFrm: - emitNEONDupInstruction(MI); - break; - case ARMII::N1RegModImmFrm: - emitNEON1RegModImmInstruction(MI); - break; - case ARMII::N2RegFrm: - emitNEON2RegInstruction(MI); - break; - case ARMII::N3RegFrm: - emitNEON3RegInstruction(MI); - break; - } - MCE.processDebugLoc(MI.getDebugLoc(), false); -} - -void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { - unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index. - unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index. - const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex]; - - // Remember the CONSTPOOL_ENTRY address for later relocation. - JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue()); - - // Emit constpool island entry. In most cases, the actual values will be - // resolved and relocated after code emission. - if (MCPE.isMachineConstantPoolEntry()) { - ARMConstantPoolValue *ACPV = - static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); - - DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ " - << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); - - assert(ACPV->isGlobalValue() && "unsupported constant pool value"); - const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV(); - if (GV) { - Reloc::Model RelocM = TM.getRelocationModel(); - emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, - isa<Function>(GV), - Subtarget->GVIsIndirectSymbol(GV, RelocM), - (intptr_t)ACPV); - } else { - const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(); - emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute); - } - emitWordLE(0); - } else { - const Constant *CV = MCPE.Val.ConstVal; - - DEBUG({ - errs() << " ** Constant pool #" << CPI << " @ " - << (void*)MCE.getCurrentPCValue() << " "; - if (const Function *F = dyn_cast<Function>(CV)) - errs() << F->getName(); - else - errs() << *CV; - errs() << '\n'; - }); - - if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { - emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false); - emitWordLE(0); - } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - uint32_t Val = uint32_t(*CI->getValue().getRawData()); - emitWordLE(Val); - } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { - if (CFP->getType()->isFloatTy()) - emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); - else if (CFP->getType()->isDoubleTy()) - emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); - else { - llvm_unreachable("Unable to handle this constantpool entry!"); - } - } else { - llvm_unreachable("Unable to handle this constantpool entry!"); - } - } -} - -void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) { - const MachineOperand &MO0 = MI.getOperand(0); - const MachineOperand &MO1 = MI.getOperand(1); - - // Emit the 'movw' instruction. - unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000 - - unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF; - - // Set the conditional execution predicate. - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode Rd. - Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; - - // Encode imm16 as imm4:imm12 - Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12 - Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4 - emitWordLE(Binary); - - unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16; - // Emit the 'movt' instruction. - Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100 - - // Set the conditional execution predicate. - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode Rd. - Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; - - // Encode imm16 as imm4:imm1, same as movw above. - Binary |= Hi16 & 0xFFF; - Binary |= ((Hi16 >> 12) & 0xF) << 16; - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { - const MachineOperand &MO0 = MI.getOperand(0); - const MachineOperand &MO1 = MI.getOperand(1); - assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) && - "Not a valid so_imm value!"); - unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm()); - unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm()); - - // Emit the 'mov' instruction. - unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101 - - // Set the conditional execution predicate. - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode Rd. - Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; - - // Encode so_imm. - // Set bit I(25) to identify this is the immediate form of <shifter_op> - Binary |= 1 << ARMII::I_BitShift; - Binary |= getMachineSoImmOpValue(V1); - emitWordLE(Binary); - - // Now the 'orr' instruction. - Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100 - - // Set the conditional execution predicate. - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode Rd. - Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; - - // Encode Rn. - Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift; - - // Encode so_imm. - // Set bit I(25) to identify this is the immediate form of <shifter_op> - Binary |= 1 << ARMII::I_BitShift; - Binary |= getMachineSoImmOpValue(V2); - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { - // It's basically add r, pc, (LJTI - $+8) - - const MCInstrDesc &MCID = MI.getDesc(); - - // Emit the 'add' instruction. - unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100 - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, MCID); - - // Encode Rd. - Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; - - // Encode Rn which is PC. - Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift; - - // Encode the displacement. - Binary |= 1 << ARMII::I_BitShift; - emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) { - unsigned Opcode = MI.getDesc().Opcode; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode S bit if MI modifies CPSR. - if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag) - Binary |= 1 << ARMII::S_BitShift; - - // Encode register def if there is one. - Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; - - // Encode the shift operation. - switch (Opcode) { - default: break; - case ARM::RRX: - // rrx - Binary |= 0x6 << 4; - break; - case ARM::MOVsrl_flag: - // lsr #1 - Binary |= (0x2 << 4) | (1 << 7); - break; - case ARM::MOVsra_flag: - // asr #1 - Binary |= (0x4 << 4) | (1 << 7); - break; - } - - // Encode register Rm. - Binary |= getMachineOpValue(MI, 1); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::addPCLabel(unsigned LabelID) { - DEBUG(errs() << " ** LPC" << LabelID << " @ " - << (void*)MCE.getCurrentPCValue() << '\n'); - JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue()); -} - -void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { - unsigned Opcode = MI.getDesc().Opcode; - switch (Opcode) { - default: - llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); - case ARM::BX_CALL: - case ARM::BMOVPCRX_CALL: { - // First emit mov lr, pc - unsigned Binary = 0x01a0e00f; - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - emitWordLE(Binary); - - // and then emit the branch. - emitMiscBranchInstruction(MI); - break; - } - case TargetOpcode::INLINEASM: { - // We allow inline assembler nodes with empty bodies - they can - // implicitly define registers, which is ok for JIT. - if (MI.getOperand(0).getSymbolName()[0]) { - report_fatal_error("JIT does not support inline asm!"); - } - break; - } - case TargetOpcode::CFI_INSTRUCTION: - break; - case TargetOpcode::EH_LABEL: - MCE.emitLabel(MI.getOperand(0).getMCSymbol()); - break; - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - // Do nothing. - break; - case ARM::CONSTPOOL_ENTRY: - emitConstPoolInstruction(MI); - break; - case ARM::PICADD: { - // Remember of the address of the PC label for relocation later. - addPCLabel(MI.getOperand(2).getImm()); - // PICADD is just an add instruction that implicitly read pc. - emitDataProcessingInstruction(MI, 0, ARM::PC); - break; - } - case ARM::PICLDR: - case ARM::PICLDRB: - case ARM::PICSTR: - case ARM::PICSTRB: { - // Remember of the address of the PC label for relocation later. - addPCLabel(MI.getOperand(2).getImm()); - // These are just load / store instructions that implicitly read pc. - emitLoadStoreInstruction(MI, 0, ARM::PC); - break; - } - case ARM::PICLDRH: - case ARM::PICLDRSH: - case ARM::PICLDRSB: - case ARM::PICSTRH: { - // Remember of the address of the PC label for relocation later. - addPCLabel(MI.getOperand(2).getImm()); - // These are just load / store instructions that implicitly read pc. - emitMiscLoadStoreInstruction(MI, ARM::PC); - break; - } - - case ARM::MOVi32imm: - // Two instructions to materialize a constant. - if (Subtarget->hasV6T2Ops()) - emitMOVi32immInstruction(MI); - else - emitMOVi2piecesInstruction(MI); - break; - - case ARM::LEApcrelJT: - // Materialize jumptable address. - emitLEApcrelJTInstruction(MI); - break; - case ARM::RRX: - case ARM::MOVsrl_flag: - case ARM::MOVsra_flag: - emitPseudoMoveInstruction(MI); - break; - } -} - -unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, - const MCInstrDesc &MCID, - const MachineOperand &MO, - unsigned OpIdx) { - unsigned Binary = getMachineOpValue(MI, MO); - - const MachineOperand &MO1 = MI.getOperand(OpIdx + 1); - const MachineOperand &MO2 = MI.getOperand(OpIdx + 2); - ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); - - // Encode the shift opcode. - unsigned SBits = 0; - unsigned Rs = MO1.getReg(); - if (Rs) { - // Set shift operand (bit[7:4]). - // LSL - 0001 - // LSR - 0011 - // ASR - 0101 - // ROR - 0111 - // RRX - 0110 and bit[11:8] clear. - switch (SOpc) { - default: llvm_unreachable("Unknown shift opc!"); - case ARM_AM::lsl: SBits = 0x1; break; - case ARM_AM::lsr: SBits = 0x3; break; - case ARM_AM::asr: SBits = 0x5; break; - case ARM_AM::ror: SBits = 0x7; break; - case ARM_AM::rrx: SBits = 0x6; break; - } - } else { - // Set shift operand (bit[6:4]). - // LSL - 000 - // LSR - 010 - // ASR - 100 - // ROR - 110 - switch (SOpc) { - default: llvm_unreachable("Unknown shift opc!"); - case ARM_AM::lsl: SBits = 0x0; break; - case ARM_AM::lsr: SBits = 0x2; break; - case ARM_AM::asr: SBits = 0x4; break; - case ARM_AM::ror: SBits = 0x6; break; - } - } - Binary |= SBits << 4; - if (SOpc == ARM_AM::rrx) - return Binary; - - // Encode the shift operation Rs or shift_imm (except rrx). - if (Rs) { - // Encode Rs bit[11:8]. - assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); - } - - // Encode shift_imm bit[11:7]. - return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7; -} - -unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { - int SoImmVal = ARM_AM::getSOImmVal(SoImm); - assert(SoImmVal != -1 && "Not a valid so_imm value!"); - - // Encode rotate_imm. - unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1) - << ARMII::SoRotImmShift; - - // Encode immed_8. - Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal); - return Binary; -} - -unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, - const MCInstrDesc &MCID) const { - for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){ - const MachineOperand &MO = MI.getOperand(i-1); - if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) - return 1 << ARMII::S_BitShift; - } - return 0; -} - -void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, - unsigned ImplicitRd, - unsigned ImplicitRn) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, MCID); - - // Encode register def if there is one. - unsigned NumDefs = MCID.getNumDefs(); - unsigned OpIdx = 0; - if (NumDefs) - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; - else if (ImplicitRd) - // Special handling for implicit use (e.g. PC). - Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); - - if (MCID.Opcode == ARM::MOVi16) { - // Get immediate from MI. - unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx), - ARM::reloc_arm_movw); - // Encode imm which is the same as in emitMOVi32immInstruction(). - Binary |= Lo16 & 0xFFF; - Binary |= ((Lo16 >> 12) & 0xF) << 16; - emitWordLE(Binary); - return; - } else if(MCID.Opcode == ARM::MOVTi16) { - unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx), - ARM::reloc_arm_movt) >> 16); - Binary |= Hi16 & 0xFFF; - Binary |= ((Hi16 >> 12) & 0xF) << 16; - emitWordLE(Binary); - return; - } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) { - uint32_t v = ~MI.getOperand(2).getImm(); - int32_t lsb = countTrailingZeros(v); - int32_t msb = (32 - countLeadingZeros(v)) - 1; - // Instr{20-16} = msb, Instr{11-7} = lsb - Binary |= (msb & 0x1F) << 16; - Binary |= (lsb & 0x1F) << 7; - emitWordLE(Binary); - return; - } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) { - // Encode Rn in Instr{0-3} - Binary |= getMachineOpValue(MI, OpIdx++); - - uint32_t lsb = MI.getOperand(OpIdx++).getImm(); - uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1; - - // Instr{20-16} = widthm1, Instr{11-7} = lsb - Binary |= (widthm1 & 0x1F) << 16; - Binary |= (lsb & 0x1F) << 7; - emitWordLE(Binary); - return; - } - - // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - - // Encode first non-shifter register operand if there is one. - bool isUnary = MCID.TSFlags & ARMII::UnaryDP; - if (!isUnary) { - if (ImplicitRn) - // Special handling for implicit use (e.g. PC). - Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); - else { - Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift; - ++OpIdx; - } - } - - // Encode shifter operand. - const MachineOperand &MO = MI.getOperand(OpIdx); - if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) { - // Encode SoReg. - emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx)); - return; - } - - if (MO.isReg()) { - // Encode register Rm. - emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg())); - return; - } - - // Encode so_imm. - Binary |= getMachineSoImmOpValue((unsigned)MO.getImm()); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRd, - unsigned ImplicitRn) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned Form = MCID.TSFlags & ARMII::FormMask; - bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done. - if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp || - MI.getOpcode() == ARM::STRi12) { - emitWordLE(Binary); - return; - } - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - unsigned OpIdx = 0; - - // Operand 0 of a pre- and post-indexed store is the address base - // writeback. Skip it. - bool Skipped = false; - if (IsPrePost && Form == ARMII::StFrm) { - ++OpIdx; - Skipped = true; - } - - // Set first operand - if (ImplicitRd) - // Special handling for implicit use (e.g. PC). - Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); - else - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; - - // Set second operand - if (ImplicitRn) - // Special handling for implicit use (e.g. PC). - Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); - else - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; - - // If this is a two-address operand, skip it. e.g. LDR_PRE. - if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - - const MachineOperand &MO2 = MI.getOperand(OpIdx); - unsigned AM2Opc = (ImplicitRn == ARM::PC) - ? 0 : MI.getOperand(OpIdx+1).getImm(); - - // Set bit U(23) according to sign of immed value (positive or negative). - Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) << - ARMII::U_BitShift); - if (!MO2.getReg()) { // is immediate - if (ARM_AM::getAM2Offset(AM2Opc)) - // Set the value of offset_12 field - Binary |= ARM_AM::getAM2Offset(AM2Opc); - emitWordLE(Binary); - return; - } - - // Set bit I(25), because this is not in immediate encoding. - Binary |= 1 << ARMII::I_BitShift; - assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); - // Set bit[3:0] to the corresponding Rm register - Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); - - // If this instr is in scaled register offset/index instruction, set - // shift_immed(bit[11:7]) and shift(bit[6:5]) fields. - if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) { - Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift - Binary |= ShImm << ARMII::ShiftShift; // shift_immed - } - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRn) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned Form = MCID.TSFlags & ARMII::FormMask; - bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - unsigned OpIdx = 0; - - // Operand 0 of a pre- and post-indexed store is the address base - // writeback. Skip it. - bool Skipped = false; - if (IsPrePost && Form == ARMII::StMiscFrm) { - ++OpIdx; - Skipped = true; - } - - // Set first operand - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; - - // Skip LDRD and STRD's second operand. - if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD) - ++OpIdx; - - // Set second operand - if (ImplicitRn) - // Special handling for implicit use (e.g. PC). - Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); - else - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; - - // If this is a two-address operand, skip it. e.g. LDRH_POST. - if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - - const MachineOperand &MO2 = MI.getOperand(OpIdx); - unsigned AM3Opc = (ImplicitRn == ARM::PC) - ? 0 : MI.getOperand(OpIdx+1).getImm(); - - // Set bit U(23) according to sign of immed value (positive or negative) - Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) << - ARMII::U_BitShift); - - // If this instr is in register offset/index encoding, set bit[3:0] - // to the corresponding Rm register. - if (MO2.getReg()) { - Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); - emitWordLE(Binary); - return; - } - - // This instr is in immediate offset/index encoding, set bit 22 to 1. - Binary |= 1 << ARMII::AM3_I_BitShift; - if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) { - // Set operands - Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH - Binary |= (ImmOffs & 0xF); // immedL - } - - emitWordLE(Binary); -} - -static unsigned getAddrModeUPBits(unsigned Mode) { - unsigned Binary = 0; - - // Set addressing mode by modifying bits U(23) and P(24) - // IA - Increment after - bit U = 1 and bit P = 0 - // IB - Increment before - bit U = 1 and bit P = 1 - // DA - Decrement after - bit U = 0 and bit P = 0 - // DB - Decrement before - bit U = 0 and bit P = 1 - switch (Mode) { - default: llvm_unreachable("Unknown addressing sub-mode!"); - case ARM_AM::da: break; - case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break; - case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break; - case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break; - } - - return Binary; -} - -void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Skip operand 0 of an instruction with base register update. - unsigned OpIdx = 0; - if (IsUpdating) - ++OpIdx; - - // Set base address operand - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; - - // Set addressing mode by modifying bits U(23) and P(24) - ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode()); - Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode)); - - // Set bit W(21) - if (IsUpdating) - Binary |= 0x1 << ARMII::W_BitShift; - - // Set registers - for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - break; - unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg()); - assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && - RegNum < 16); - Binary |= 0x1 << RegNum; - } - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, MCID); - - // 32x32->64bit operations have two destination registers. The number - // of register definitions will tell us if that's what we're dealing with. - unsigned OpIdx = 0; - if (MCID.getNumDefs() == 2) - Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift; - - // Encode Rd - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift; - - // Encode Rm - Binary |= getMachineOpValue(MI, OpIdx++); - - // Encode Rs - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift; - - // Many multiple instructions (e.g. MLA) have three src operands. Encode - // it as Rn (for multiply, that's in the same offset as RdLo. - if (MCID.getNumOperands() > OpIdx && - !MCID.OpInfo[OpIdx].isPredicate() && - !MCID.OpInfo[OpIdx].isOptionalDef()) - Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift; - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - unsigned OpIdx = 0; - - // Encode Rd - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; - - const MachineOperand &MO1 = MI.getOperand(OpIdx++); - const MachineOperand &MO2 = MI.getOperand(OpIdx); - if (MO2.isReg()) { - // Two register operand form. - // Encode Rn. - Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift; - - // Encode Rm. - Binary |= getMachineOpValue(MI, MO2); - ++OpIdx; - } else { - Binary |= getMachineOpValue(MI, MO1); - } - - // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand. - if (MI.getOperand(OpIdx).isImm() && - !MCID.OpInfo[OpIdx].isPredicate() && - !MCID.OpInfo[OpIdx].isOptionalDef()) - Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift; - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // PKH instructions are finished at this point - if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) { - emitWordLE(Binary); - return; - } - - unsigned OpIdx = 0; - - // Encode Rd - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; - - const MachineOperand &MO = MI.getOperand(OpIdx++); - if (OpIdx == MCID.getNumOperands() || - MCID.OpInfo[OpIdx].isPredicate() || - MCID.OpInfo[OpIdx].isOptionalDef()) { - // Encode Rm and it's done. - Binary |= getMachineOpValue(MI, MO); - emitWordLE(Binary); - return; - } - - // Encode Rn. - Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift; - - // Encode Rm. - Binary |= getMachineOpValue(MI, OpIdx++); - - // Encode shift_imm. - unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); - if (MCID.Opcode == ARM::PKHTB) { - assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!"); - if (ShiftAmt == 32) - ShiftAmt = 0; - } - assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); - Binary |= ShiftAmt << ARMII::ShiftShift; - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGen. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode Rd - Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; - - // Encode saturate bit position. - unsigned Pos = MI.getOperand(1).getImm(); - if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16) - Pos -= 1; - assert((Pos < 16 || (Pos < 32 && - MCID.Opcode != ARM::SSAT16 && - MCID.Opcode != ARM::USAT16)) && - "saturate bit position out of range"); - Binary |= Pos << 16; - - // Encode Rm - Binary |= getMachineOpValue(MI, 2); - - // Encode shift_imm. - if (MCID.getNumOperands() == 4) { - unsigned ShiftOp = MI.getOperand(3).getImm(); - ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); - if (Opc == ARM_AM::asr) - Binary |= (1 << 6); - unsigned ShiftAmt = MI.getOperand(3).getImm(); - if (ShiftAmt == 32 && Opc == ARM_AM::asr) - ShiftAmt = 0; - assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); - Binary |= ShiftAmt << ARMII::ShiftShift; - } - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - if (MCID.Opcode == ARM::TPsoft) { - llvm_unreachable("ARM::TPsoft FIXME"); // FIXME - } - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Set signed_immed_24 field - Binary |= getMachineOpValue(MI, 0); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) { - // Remember the base address of the inline jump table. - uintptr_t JTBase = MCE.getCurrentPCValue(); - JTI->addJumpTableBaseAddr(JTIndex, JTBase); - DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase - << '\n'); - - // Now emit the jump table entries. - const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs; - for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { - if (IsPIC) - // DestBB address - JT base. - emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase); - else - // Absolute DestBB address. - emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute); - emitWordLE(0); - } -} - -void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Handle jump tables. - if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) { - // First emit a ldr pc, [] instruction. - emitDataProcessingInstruction(MI, ARM::PC); - - // Then emit the inline jump table. - unsigned JTIndex = - (MCID.Opcode == ARM::BR_JTr) - ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex(); - emitInlineJumpTable(JTIndex); - return; - } else if (MCID.Opcode == ARM::BR_JTm) { - // First emit a ldr pc, [] instruction. - emitLoadStoreInstruction(MI, ARM::PC); - - // Then emit the inline jump table. - emitInlineJumpTable(MI.getOperand(3).getIndex()); - return; - } - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR) - // The return register is LR. - Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR); - else - // otherwise, set the return register - Binary |= getMachineOpValue(MI, 0); - - emitWordLE(Binary); -} - -unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegD = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - bool isSPVFP = ARM::SPRRegClass.contains(RegD); - RegD = II->getRegisterInfo().getEncodingValue(RegD); - if (!isSPVFP) - Binary |= RegD << ARMII::RegRdShift; - else { - Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift; - Binary |= (RegD & 0x01) << ARMII::D_BitShift; - } - return Binary; -} - -unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegN = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - bool isSPVFP = ARM::SPRRegClass.contains(RegN); - RegN = II->getRegisterInfo().getEncodingValue(RegN); - if (!isSPVFP) - Binary |= RegN << ARMII::RegRnShift; - else { - Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift; - Binary |= (RegN & 0x01) << ARMII::N_BitShift; - } - return Binary; -} - -unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegM = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - bool isSPVFP = ARM::SPRRegClass.contains(RegM); - RegM = II->getRegisterInfo().getEncodingValue(RegM); - if (!isSPVFP) - Binary |= RegM; - else { - Binary |= ((RegM & 0x1E) >> 1); - Binary |= (RegM & 0x01) << ARMII::M_BitShift; - } - return Binary; -} - -void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - unsigned OpIdx = 0; - assert((Binary & ARMII::D_BitShift) == 0 && - (Binary & ARMII::N_BitShift) == 0 && - (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!"); - - // Encode Dd / Sd. - Binary |= encodeVFPRd(MI, OpIdx++); - - // If this is a two-address operand, skip it, e.g. FMACD. - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - - // Encode Dn / Sn. - if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm) - Binary |= encodeVFPRn(MI, OpIdx++); - - if (OpIdx == MCID.getNumOperands() || - MCID.OpInfo[OpIdx].isPredicate() || - MCID.OpInfo[OpIdx].isOptionalDef()) { - // FCMPEZD etc. has only one operand. - emitWordLE(Binary); - return; - } - - // Encode Dm / Sm. - Binary |= encodeVFPRm(MI, OpIdx); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned Form = MCID.TSFlags & ARMII::FormMask; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - switch (Form) { - default: break; - case ARMII::VFPConv1Frm: - case ARMII::VFPConv2Frm: - case ARMII::VFPConv3Frm: - // Encode Dd / Sd. - Binary |= encodeVFPRd(MI, 0); - break; - case ARMII::VFPConv4Frm: - // Encode Dn / Sn. - Binary |= encodeVFPRn(MI, 0); - break; - case ARMII::VFPConv5Frm: - // Encode Dm / Sm. - Binary |= encodeVFPRm(MI, 0); - break; - } - - switch (Form) { - default: break; - case ARMII::VFPConv1Frm: - // Encode Dm / Sm. - Binary |= encodeVFPRm(MI, 1); - break; - case ARMII::VFPConv2Frm: - case ARMII::VFPConv3Frm: - // Encode Dn / Sn. - Binary |= encodeVFPRn(MI, 1); - break; - case ARMII::VFPConv4Frm: - case ARMII::VFPConv5Frm: - // Encode Dd / Sd. - Binary |= encodeVFPRd(MI, 1); - break; - } - - if (Form == ARMII::VFPConv5Frm) - // Encode Dn / Sn. - Binary |= encodeVFPRn(MI, 2); - else if (Form == ARMII::VFPConv3Frm) - // Encode Dm / Sm. - Binary |= encodeVFPRm(MI, 2); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - unsigned OpIdx = 0; - - // Encode Dd / Sd. - Binary |= encodeVFPRd(MI, OpIdx++); - - // Encode address base. - const MachineOperand &Base = MI.getOperand(OpIdx++); - Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift; - - // If there is a non-zero immediate offset, encode it. - if (Base.isReg()) { - const MachineOperand &Offset = MI.getOperand(OpIdx); - if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) { - if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add) - Binary |= 1 << ARMII::U_BitShift; - Binary |= ImmOffs; - emitWordLE(Binary); - return; - } - } - - // If immediate offset is omitted, default to +0. - Binary |= 1 << ARMII::U_BitShift; - - emitWordLE(Binary); -} - -void -ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Skip operand 0 of an instruction with base register update. - unsigned OpIdx = 0; - if (IsUpdating) - ++OpIdx; - - // Set base address operand - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; - - // Set addressing mode by modifying bits U(23) and P(24) - ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode()); - Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode)); - - // Set bit W(21) - if (IsUpdating) - Binary |= 0x1 << ARMII::W_BitShift; - - // First register is encoded in Dd. - Binary |= encodeVFPRd(MI, OpIdx+2); - - // Count the number of registers. - unsigned NumRegs = 1; - for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - break; - ++NumRegs; - } - // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0) - // Otherwise, it will be 0, in the case of 32-bit registers. - if(Binary & 0x100) - Binary |= NumRegs * 2; - else - Binary |= NumRegs; - - emitWordLE(Binary); -} - -unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegD = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - RegD = II->getRegisterInfo().getEncodingValue(RegD); - Binary |= (RegD & 0xf) << ARMII::RegRdShift; - Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; - return Binary; -} - -unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegN = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - RegN = II->getRegisterInfo().getEncodingValue(RegN); - Binary |= (RegN & 0xf) << ARMII::RegRnShift; - Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; - return Binary; -} - -unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegM = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - RegM = II->getRegisterInfo().getEncodingValue(RegM); - Binary |= (RegM & 0xf); - Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; - return Binary; -} - -/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON -/// data-processing instruction to the corresponding Thumb encoding. -static unsigned convertNEONDataProcToThumb(unsigned Binary) { - assert((Binary & 0xfe000000) == 0xf2000000 && - "not an ARM NEON data-processing instruction"); - unsigned UBit = (Binary >> 24) & 1; - return 0xef000000 | (UBit << 28) | (Binary & 0xffffff); -} - -void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { - unsigned Binary = getBinaryCodeForInstr(MI); - - unsigned RegTOpIdx, RegNOpIdx, LnOpIdx; - const MCInstrDesc &MCID = MI.getDesc(); - if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { - RegTOpIdx = 0; - RegNOpIdx = 1; - LnOpIdx = 2; - } else { // ARMII::NSetLnFrm - RegTOpIdx = 2; - RegNOpIdx = 0; - LnOpIdx = 3; - } - - // Set the conditional execution predicate - Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; - - unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); - RegT = II->getRegisterInfo().getEncodingValue(RegT); - Binary |= (RegT << ARMII::RegRdShift); - Binary |= encodeNEONRn(MI, RegNOpIdx); - - unsigned LaneShift; - if ((Binary & (1 << 22)) != 0) - LaneShift = 0; // 8-bit elements - else if ((Binary & (1 << 5)) != 0) - LaneShift = 1; // 16-bit elements - else - LaneShift = 2; // 32-bit elements - - unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift; - unsigned Opc1 = Lane >> 2; - unsigned Opc2 = Lane & 3; - assert((Opc1 & 3) == 0 && "out-of-range lane number operand"); - Binary |= (Opc1 << 21); - Binary |= (Opc2 << 5); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; - - unsigned RegT = MI.getOperand(1).getReg(); - RegT = II->getRegisterInfo().getEncodingValue(RegT); - Binary |= (RegT << ARMII::RegRdShift); - Binary |= encodeNEONRn(MI, 0); - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) { - unsigned Binary = getBinaryCodeForInstr(MI); - // Destination register is encoded in Dd. - Binary |= encodeNEONRd(MI, 0); - // Immediate fields: Op, Cmode, I, Imm3, Imm4 - unsigned Imm = MI.getOperand(1).getImm(); - unsigned Op = (Imm >> 12) & 1; - unsigned Cmode = (Imm >> 8) & 0xf; - unsigned I = (Imm >> 7) & 1; - unsigned Imm3 = (Imm >> 4) & 0x7; - unsigned Imm4 = Imm & 0xf; - Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4; - if (IsThumb) - Binary = convertNEONDataProcToThumb(Binary); - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned Binary = getBinaryCodeForInstr(MI); - // Destination register is encoded in Dd; source register in Dm. - unsigned OpIdx = 0; - Binary |= encodeNEONRd(MI, OpIdx++); - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - Binary |= encodeNEONRm(MI, OpIdx); - if (IsThumb) - Binary = convertNEONDataProcToThumb(Binary); - // FIXME: This does not handle VDUPfdf or VDUPfqf. - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned Binary = getBinaryCodeForInstr(MI); - // Destination register is encoded in Dd; source registers in Dn and Dm. - unsigned OpIdx = 0; - Binary |= encodeNEONRd(MI, OpIdx++); - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - Binary |= encodeNEONRn(MI, OpIdx++); - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - Binary |= encodeNEONRm(MI, OpIdx); - if (IsThumb) - Binary = convertNEONDataProcToThumb(Binary); - // FIXME: This does not handle VMOVDneon or VMOVQ. - emitWordLE(Binary); -} - -#include "ARMGenCodeEmitter.inc" diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index ce264ee..29405eb 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -275,6 +275,7 @@ namespace { private: void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs); + bool BBHasFallthrough(MachineBasicBlock *MBB); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); unsigned getCPELogAlign(const MachineInstr *CPEMI); void scanFunctionJumpTables(); @@ -382,7 +383,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlignment() << " bytes *****\n"); - TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo(); + TII = (const ARMBaseInstrInfo *)MF->getTarget() + .getSubtargetImpl() + ->getInstrInfo(); AFI = MF->getInfo<ARMFunctionInfo>(); STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); @@ -529,7 +532,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // identity mapping of CPI's to CPE's. const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); - const DataLayout &TD = *MF->getTarget().getDataLayout(); + const DataLayout &TD = *MF->getSubtarget().getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); assert(Size >= 4 && "Too small constant pool entry"); @@ -554,9 +557,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { InsPoint[a] = CPEMI; // Add a new CPEntry, but no corresponding CPUser yet. - std::vector<CPEntry> CPEs; - CPEs.push_back(CPEntry(CPEMI, i)); - CPEntries.push_back(CPEs); + CPEntries.emplace_back(1, CPEntry(CPEMI, i)); ++NumCPEs; DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = " << Size << ", align = " << Align <<'\n'); @@ -566,7 +567,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { /// BBHasFallthrough - Return true if the specified basic block can fallthrough /// into the block immediately after it. -static bool BBHasFallthrough(MachineBasicBlock *MBB) { +bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. MachineFunction::iterator MBBI = MBB; // Can't fall off end of function. @@ -574,12 +575,15 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) { return false; MachineBasicBlock *NextBB = std::next(MBBI); - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) - if (*I == NextBB) - return true; + if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end()) + return false; - return false; + // Try to analyze the end of the block. A potential fallthrough may already + // have an unconditional branch for whatever reason. + MachineBasicBlock *TBB, *FBB; + SmallVector<MachineOperand, 4> Cond; + bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond); + return TooDifficult || FBB == nullptr; } /// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI, @@ -1203,7 +1207,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, unsigned Growth; if (isWaterInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || - NewWaterList.count(WaterBB)) && Growth < BestGrowth) { + NewWaterList.count(WaterBB) || WaterBB == U.MI->getParent()) && + Growth < BestGrowth) { // This is the least amount of required padding seen so far. BestGrowth = Growth; WaterIter = IP; @@ -1309,7 +1314,12 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Back past any possible branches (allow for a conditional and a maximally // long unconditional). if (BaseInsertOffset + 8 >= UserBBI.postOffset()) { - BaseInsertOffset = UserBBI.postOffset() - UPad - 8; + // Ensure BaseInsertOffset is larger than the offset of the instruction + // following UserMI so that the loop which searches for the split point + // iterates at least once. + BaseInsertOffset = + std::max(UserBBI.postOffset() - UPad - 8, + UserOffset + TII->GetInstSizeInBytes(UserMI) + 1); DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); } unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + @@ -1352,6 +1362,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, if (CC != ARMCC::AL) MI = LastIT; } + + // We really must not split an IT block. + DEBUG(unsigned PredReg; + assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL)); + NewMBB = splitBlockBeforeInstr(MI); } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index c7a8415..13bef54 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H -#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H +#ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H +#define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/Support/Casting.h" diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 51d3dbb..2d80518 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -867,7 +867,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, if (RI.hasBasePointer(MF)) { int32_t NumBytes = AFI->getFramePtrSpillOffset(); unsigned FramePtr = RI.getFrameRegister(MF); - assert(MF.getTarget().getFrameLowering()->hasFP(MF) && + assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) && "base pointer without frame pointer?"); if (AFI->isThumb2Function()) { @@ -1343,8 +1343,9 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); - TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); - TRI = TM.getRegisterInfo(); + TII = static_cast<const ARMBaseInstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); + TRI = TM.getSubtargetImpl()->getRegisterInfo(); STI = &TM.getSubtarget<ARMSubtarget>(); AFI = MF.getInfo<ARMFunctionInfo>(); diff --git a/lib/Target/ARM/ARMFPUName.def b/lib/Target/ARM/ARMFPUName.def index 1fef3b3..34ce85d 100644 --- a/lib/Target/ARM/ARMFPUName.def +++ b/lib/Target/ARM/ARMFPUName.def @@ -23,6 +23,7 @@ ARM_FPU_NAME("vfpv3", VFPV3) ARM_FPU_NAME("vfpv3-d16", VFPV3_D16) ARM_FPU_NAME("vfpv4", VFPV4) ARM_FPU_NAME("vfpv4-d16", VFPV4_D16) +ARM_FPU_NAME("fpv5-d16", FPV5_D16) ARM_FPU_NAME("fp-armv8", FP_ARMV8) ARM_FPU_NAME("neon", NEON) ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4) diff --git a/lib/Target/ARM/ARMFPUName.h b/lib/Target/ARM/ARMFPUName.h index 2a64cce..86acffb 100644 --- a/lib/Target/ARM/ARMFPUName.h +++ b/lib/Target/ARM/ARMFPUName.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMFPUNAME_H -#define ARMFPUNAME_H +#ifndef LLVM_LIB_TARGET_ARM_ARMFPUNAME_H +#define LLVM_LIB_TARGET_ARM_ARMFPUNAME_H namespace llvm { namespace ARM { @@ -23,4 +23,4 @@ enum FPUKind { } // namespace ARM } // namespace llvm -#endif // ARMFPUNAME_H +#endif diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index e2d90cd..a5f635e 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -92,11 +92,11 @@ class ARMFastISel final : public FastISel { public: explicit ARMFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) - : FastISel(funcInfo, libInfo), - M(const_cast<Module&>(*funcInfo.Fn->getParent())), - TM(funcInfo.MF->getTarget()), - TII(*TM.getInstrInfo()), - TLI(*TM.getTargetLowering()) { + : FastISel(funcInfo, libInfo), + M(const_cast<Module &>(*funcInfo.Fn->getParent())), + TM(funcInfo.MF->getTarget()), + TII(*TM.getSubtargetImpl()->getInstrInfo()), + TLI(*TM.getSubtargetImpl()->getTargetLowering()) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); isThumb2 = AFI->isThumbFunction(); @@ -105,39 +105,39 @@ class ARMFastISel final : public FastISel { // Code from FastISel.cpp. private: - unsigned FastEmitInst_r(unsigned MachineInstOpcode, + unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill); - unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); - unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, + unsigned fastEmitInst_rrr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill); - unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + unsigned fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm); - unsigned FastEmitInst_rri(unsigned MachineInstOpcode, + unsigned fastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm); - unsigned FastEmitInst_i(unsigned MachineInstOpcode, + unsigned fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm); // Backend specific FastISel code. private: - bool TargetSelectInstruction(const Instruction *I) override; - unsigned TargetMaterializeConstant(const Constant *C) override; - unsigned TargetMaterializeAlloca(const AllocaInst *AI) override; + bool fastSelectInstruction(const Instruction *I) override; + unsigned fastMaterializeConstant(const Constant *C) override; + unsigned fastMaterializeAlloca(const AllocaInst *AI) override; bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) override; - bool FastLowerArguments() override; + bool fastLowerArguments() override; private: #include "ARMGenFastISel.inc" @@ -189,7 +189,9 @@ class ARMFastISel final : public FastISel { unsigned ARMSelectCallOp(bool UseReg); unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); - const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); } + const TargetLowering *getTargetLowering() { + return TM.getSubtargetImpl()->getTargetLowering(); + } // Call handling routines. private: @@ -283,7 +285,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { return MIB; } -unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); @@ -305,7 +307,7 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { @@ -333,7 +335,7 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, @@ -365,7 +367,7 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm) { @@ -391,7 +393,7 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, @@ -421,7 +423,7 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); @@ -511,7 +513,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) - return false; + return 0; // If we can do this in a single instruction without a constant pool entry // do so now. @@ -534,7 +536,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { (ARM_AM::getSOImmVal(Imm) != -1); if (UseImm) { unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi; - unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass : + &ARM::GPRRegClass; + unsigned ImmReg = createResultReg(RC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) .addImm(Imm)); @@ -542,11 +546,16 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { } } + unsigned ResultReg = 0; + if (Subtarget->useMovt(*FuncInfo.MF)) + ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + + if (ResultReg) + return ResultReg; + // Load from constant pool. For now 32-bit only. if (VT != MVT::i32) - return false; - - unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + return 0; // MachineConstantPool wants an explicit alignment. unsigned Align = DL.getPrefTypeAlignment(C->getType()); @@ -555,21 +564,20 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { Align = DL.getTypeAllocSize(C->getType()); } unsigned Idx = MCP.getConstantPoolIndex(C, Align); - + ResultReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM::t2LDRpci), DestReg) - .addConstantPoolIndex(Idx)); + TII.get(ARM::t2LDRpci), ResultReg) + .addConstantPoolIndex(Idx)); else { // The extra immediate is for addrmode2. - DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); + ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM::LDRcp), DestReg) - .addConstantPoolIndex(Idx) - .addImm(0)); + TII.get(ARM::LDRcp), ResultReg) + .addConstantPoolIndex(Idx) + .addImm(0)); } - - return DestReg; + return ResultReg; } unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { @@ -679,7 +687,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { return DestReg; } -unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { +unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) { EVT CEVT = TLI.getValueType(C->getType(), true); // Only handle simple types. @@ -698,7 +706,7 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF); -unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { +unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) { // Don't handle dynamic allocas. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; @@ -901,7 +909,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) { // Since the offset is too large for the load/store instruction // get the reg+offset into a register. if (needsLowering) { - Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, + Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, /*Op0IsKill*/false, Addr.Offset, MVT::i32); Addr.Offset = 0; } @@ -1074,7 +1082,7 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { unsigned ResultReg; if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment())) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1276,7 +1284,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); - FastEmitBranch(FBB, DbgLoc); + fastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; } @@ -1301,7 +1309,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); - FastEmitBranch(FBB, DbgLoc); + fastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; } @@ -1309,7 +1317,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { dyn_cast<ConstantInt>(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - FastEmitBranch(Target, DbgLoc); + fastEmitBranch(Target, DbgLoc); return true; } @@ -1339,7 +1347,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); - FastEmitBranch(FBB, DbgLoc); + fastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; } @@ -1497,13 +1505,13 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); - unsigned ZeroReg = TargetMaterializeConstant(Zero); + unsigned ZeroReg = fastMaterializeConstant(Zero); // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg) .addReg(ZeroReg).addImm(1) .addImm(ARMPred).addReg(ARM::CPSR); - UpdateValueMap(I, DestReg); + updateValueMap(I, DestReg); return true; } @@ -1522,7 +1530,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VCVTDS), Result) .addReg(Op)); - UpdateValueMap(I, Result); + updateValueMap(I, Result); return true; } @@ -1541,7 +1549,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VCVTSD), Result) .addReg(Op)); - UpdateValueMap(I, Result); + updateValueMap(I, Result); return true; } @@ -1585,7 +1593,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg).addReg(FP)); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1617,7 +1625,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); if (IntReg == 0) return false; - UpdateValueMap(I, IntReg); + updateValueMap(I, IntReg); return true; } @@ -1693,7 +1701,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { .addImm(ARMCC::EQ) .addReg(ARM::CPSR); } - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1783,7 +1791,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2)); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1825,7 +1833,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(Op1).addReg(Op2)); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1883,7 +1891,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, unsigned &NumBytes, bool isVarArg) { SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false, isVarArg)); @@ -1941,6 +1949,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, // Process the args. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; + const Value *ArgVal = Args[VA.getValNo()]; unsigned Arg = ArgRegs[VA.getValNo()]; MVT ArgVT = ArgVTs[VA.getValNo()]; @@ -1967,7 +1976,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, break; } case CCValAssign::BCvt: { - unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, + unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, /*TODO: Kill=*/false); assert(BC != 0 && "Failed to emit a bitcast!"); Arg = BC; @@ -2001,6 +2010,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, } else { assert(VA.isMemLoc()); // Need to store on the stack. + + // Don't emit stores for undef values. + if (isa<UndefValue>(ArgVal)) + continue; + Address Addr; Addr.BaseType = Address::RegBase; Addr.Base.Reg = ARM::SP; @@ -2026,7 +2040,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, // Now the return value. if (RetVT != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg)); // Copy all of the result registers out of their specified physreg. @@ -2045,7 +2059,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, UsedRegs.push_back(RVLocs[1].getLocReg()); // Finally update the result. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); } else { assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); MVT CopyVT = RVLocs[0].getValVT(); @@ -2063,7 +2077,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, UsedRegs.push_back(RVLocs[0].getLocReg()); // Finally update the result. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); } } @@ -2087,7 +2101,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; - CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext()); + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */, F.isVarArg())); @@ -2192,7 +2206,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Can't handle non-double multi-reg retvals. if (RetVT != MVT::isVoid && RetVT != MVT::i32) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); + CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false)); if (RVLocs.size() >= 2 && RetVT != MVT::f64) return false; @@ -2303,7 +2317,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 && RetVT != MVT::i16 && RetVT != MVT::i32) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg)); if (RVLocs.size() >= 2 && RetVT != MVT::f64) return false; @@ -2487,7 +2501,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { } const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo()); + static_cast<const ARMBaseRegisterInfo *>( + TM.getSubtargetImpl()->getRegisterInfo()); unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); unsigned SrcReg = FramePtr; @@ -2505,7 +2520,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { .addReg(SrcReg).addImm(0)); SrcReg = DestReg; } - UpdateValueMap(&I, SrcReg); + updateValueMap(&I, SrcReg); return true; } case Intrinsic::memcpy: @@ -2583,7 +2598,7 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) { // Because the high bits are undefined, a truncate doesn't generate // any code. - UpdateValueMap(I, SrcReg); + updateValueMap(I, SrcReg); return true; } @@ -2745,7 +2760,7 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) { MVT DestVT = DestEVT.getSimpleVT(); unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt); if (ResultReg == 0) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -2800,12 +2815,12 @@ bool ARMFastISel::SelectShift(const Instruction *I, } AddOptionalDefs(MIB); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } // TODO: SoftFP support. -bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { +bool ARMFastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { case Instruction::Load: @@ -2983,7 +2998,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, return DestReg2; } -bool ARMFastISel::FastLowerArguments() { +bool ARMFastISel::fastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; @@ -3050,7 +3065,7 @@ bool ARMFastISel::FastLowerArguments() { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(DstReg, getKillRegState(true)); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); } return true; diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h index e191a3c..0c910ab 100644 --- a/lib/Target/ARM/ARMFeatures.h +++ b/lib/Target/ARM/ARMFeatures.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef TARGET_ARM_FEATURES_H -#define TARGET_ARM_FEATURES_H +#ifndef LLVM_LIB_TARGET_ARM_ARMFEATURES_H +#define LLVM_LIB_TARGET_ARM_ARMFEATURES_H #include "MCTargetDesc/ARMMCTargetDesc.h" diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index a67b360..80add7a 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -47,7 +47,7 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); // iOS requires FP not to be clobbered for backtracing purpose. if (STI.isTargetIOS()) @@ -137,12 +137,27 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, } static int sizeOfSPAdjustment(const MachineInstr *MI) { - assert(MI->getOpcode() == ARM::VSTMDDB_UPD); + int RegSize; + switch (MI->getOpcode()) { + case ARM::VSTMDDB_UPD: + RegSize = 8; + break; + case ARM::STMDB_UPD: + case ARM::t2STMDB_UPD: + RegSize = 4; + break; + case ARM::t2STR_PRE: + case ARM::STR_PRE_IMM: + return 4; + default: + llvm_unreachable("Unknown push or pop like instruction"); + } + int count = 0; // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ // pred) so the list starts at 4. for (int i = MI->getNumOperands() - 1; i >= 4; --i) - count += 8; + count += RegSize; return count; } @@ -154,6 +169,46 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF, return StackSizeInBytes >= 4096; } +namespace { +struct StackAdjustingInsts { + struct InstInfo { + MachineBasicBlock::iterator I; + unsigned SPAdjust; + bool BeforeFPSet; + }; + + SmallVector<InstInfo, 4> Insts; + + void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust, + bool BeforeFPSet = false) { + InstInfo Info = {I, SPAdjust, BeforeFPSet}; + Insts.push_back(Info); + } + + void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { + auto Info = std::find_if(Insts.begin(), Insts.end(), + [&](InstInfo &Info) { return Info.I == I; }); + assert(Info != Insts.end() && "invalid sp adjusting instruction"); + Info->SPAdjust += ExtraBytes; + } + + void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB, + DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) { + unsigned CFAOffset = 0; + for (auto &Info : Insts) { + if (HasFP && !Info.BeforeFPSet) + return; + + CFAOffset -= Info.SPAdjust; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, std::next(Info.I), dl, + TII.get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); + } + } +}; +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -163,20 +218,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MCContext &Context = MMI.getContext(); const TargetMachine &TM = MF.getTarget(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo()); - const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + TM.getSubtargetImpl()->getRegisterInfo()); + const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = TM.getFrameLowering()->getStackAlignment(); + unsigned Align = + TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); - int CFAOffset = 0; // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. @@ -189,15 +244,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; + StackAdjustingInsts DefCFAOffsetCandidates; + // Allocate the vararg register save area. if (ArgRegsSaveSize) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, MachineInstr::FrameSetup); - CFAOffset -= ArgRegsSaveSize; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true); } if (!AFI->hasStackFrame() && @@ -205,11 +258,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes - ArgRegsSaveSize != 0) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); - CFAOffset -= NumBytes - ArgRegsSaveSize; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), + NumBytes - ArgRegsSaveSize, true); } return; } @@ -252,21 +302,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } // Move past area 1. - MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push, - DPRCSPush; - if (GPRCS1Size > 0) + MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; + if (GPRCS1Size > 0) { GPRCS1Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); + } // Determine starting offsets of spill areas. bool HasFP = hasFP(MF); - unsigned DPRCSOffset = NumBytes - (ArgRegsSaveSize + GPRCS1Size - + GPRCS2Size + DPRCSSize); - unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; - unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; + unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; + unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U; + unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign; + unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; int FramePtrOffsetInPush = 0; if (HasFP) { - FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) - + GPRCS1Size + ArgRegsSaveSize; + FramePtrOffsetInPush = + MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -275,16 +327,32 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 2. - if (GPRCS2Size > 0) + if (GPRCS2Size > 0) { GPRCS2Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + } + + // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our + // .cfi_offset operations will reflect that. + if (DPRGapSize) { + assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs"); + if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize)) + DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize); + else { + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, + MachineInstr::FrameSetup); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize); + } + } // Move past area 3. if (DPRCSSize > 0) { - DPRCSPush = MBBI; // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. - while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) + while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) { + DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI)); LastPush = MBBI++; + } } // Move past the aligned DPRCS2 area. @@ -343,18 +411,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { NumBytes = 0; } - unsigned adjustedGPRCS1Size = GPRCS1Size; if (NumBytes) { // Adjust SP after all the callee-save spills. - if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) { - if (LastPush == GPRCS1Push) { - FramePtrOffsetInPush += NumBytes; - adjustedGPRCS1Size += NumBytes; - NumBytes = 0; - } - } else + if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) + DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes); + else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes); + } if (HasFP && isARM) // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 @@ -368,13 +433,40 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } - if (adjustedGPRCS1Size > 0) { - CFAOffset -= adjustedGPRCS1Size; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - MachineBasicBlock::iterator Pos = ++GPRCS1Push; - BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + // Set FP to point to the stack slot that contains the previous FP. + // For iOS, FP is R7, which has now been stored in spill area 1. + // Otherwise, if this is not iOS, all the callee-saved registers go + // into spill area 1, including the FP in R11. In either case, it + // is in area one and the adjustment needs to take place just after + // that push. + if (HasFP) { + MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); + unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, + dl, TII, FramePtr, ARM::SP, + PushSize + FramePtrOffsetInPush, + MachineInstr::FrameSetup); + if (FramePtrOffsetInPush + PushSize != 0) { + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( + nullptr, MRI->getDwarfRegNum(FramePtr, true), + -(ArgRegsSaveSize - FramePtrOffsetInPush))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } else { + unsigned CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(FramePtr, true))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } + + // Now that the prologue's actual instructions are finalised, we can insert + // the necessary DWARF cf instructions to describe the situation. Start by + // recording where each register ended up: + if (GPRCS1Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); + int CFIIndex; for (const auto &Entry : CSI) { unsigned Reg = Entry.getReg(); int FI = Entry.getFrameIdx(); @@ -405,41 +497,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } } - // Set FP to point to the stack slot that contains the previous FP. - // For iOS, FP is R7, which has now been stored in spill area 1. - // Otherwise, if this is not iOS, all the callee-saved registers go - // into spill area 1, including the FP in R11. In either case, it - // is in area one and the adjustment needs to take place just after - // that push. - if (HasFP) { - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, GPRCS1Push, dl, TII, - FramePtr, ARM::SP, FramePtrOffsetInPush, - MachineInstr::FrameSetup); - if (FramePtrOffsetInPush) { - CFAOffset += FramePtrOffsetInPush; - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); - BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - } else { - unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( - nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } - if (GPRCS2Size > 0) { - MachineBasicBlock::iterator Pos = ++GPRCS2Push; - if (!HasFP) { - CFAOffset -= GPRCS2Size; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } + MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); for (const auto &Entry : CSI) { unsigned Reg = Entry.getReg(); int FI = Entry.getFrameIdx(); @@ -465,17 +524,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (DPRCSSize > 0) { // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. - do { - MachineBasicBlock::iterator Push = DPRCSPush++; - if (!HasFP) { - CFAOffset -= sizeOfSPAdjustment(Push); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD); - + MachineBasicBlock::iterator Pos = std::next(LastPush); for (const auto &Entry : CSI) { unsigned Reg = Entry.getReg(); int FI = Entry.getFrameIdx(); @@ -485,21 +534,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Offset = MFI->getObjectOffset(FI); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); - BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } } - if (NumBytes) { - if (!HasFP) { - CFAOffset -= NumBytes; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } + // Now we can emit descriptions of where the canonical frame address was + // throughout the process. If we have a frame pointer, it takes over the job + // half-way through, so only the first few .cfi_def_cfa_offset instructions + // actually get emitted. + DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - @@ -507,6 +552,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedGapSize(DPRGapSize); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // If we need dynamic stack realignment, do it here. Be paranoid and make @@ -574,14 +620,17 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -609,6 +658,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, NumBytes -= (ArgRegsSaveSize + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedGapSize() + AFI->getDPRCalleeSavedAreaSize()); // Reset SP based on frame pointer only if the stack frame extends beyond @@ -657,6 +707,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) MBBI++; } + if (AFI->getDPRCalleeSavedGapSize()) { + assert(AFI->getDPRCalleeSavedGapSize() == 4 && + "unexpected DPR alignment gap"); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize()); + } + if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } @@ -717,8 +773,8 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); int FPOffset = Offset - AFI->getFramePtrSpillOffset(); @@ -803,7 +859,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned NumAlignedDPRCS2Regs, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -876,7 +932,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); @@ -966,7 +1022,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); // Mark the D-register spill slots as properly aligned. Since MFI computes @@ -1125,7 +1181,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // Find the frame index assigned to d8. int D8SpillFI = 0; @@ -1340,12 +1396,15 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { return; // Don't bother if the default stack alignment is sufficiently high. - if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8) + if (MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment() >= 8) return; // Aligned spills require stack realignment. - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); if (!RegInfo->canRealignStack(MF)) return; @@ -1384,10 +1443,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, unsigned NumGPRSpills = 0; SmallVector<unsigned, 4> UnspilledCS1GPRs; SmallVector<unsigned, 4> UnspilledCS2GPRs; - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1550,7 +1609,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // of GPRs, spill one extra callee save GPR so we won't have to pad between // the integer and double callee save areas. unsigned TargetAlign = getStackAlignment(); - if (TargetAlign == 8 && (NumGPRSpills & 1)) { + if (TargetAlign >= 8 && (NumGPRSpills & 1)) { if (CS1Spilled && !UnspilledCS1GPRs.empty()) { for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { unsigned Reg = UnspilledCS1GPRs[i]; @@ -1628,7 +1687,7 @@ void ARMFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount @@ -1746,7 +1805,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MCContext &Context = MMI.getContext(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL; diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 709afbc..a83b773 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARM_FRAMEINFO_H -#define ARM_FRAMEINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H +#define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H #include "llvm/Target/TargetFrameLowering.h" diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 0885c4e..0e4f81c 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -46,8 +46,8 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { const MCInstrDesc &LastMCID = LastMI->getDesc(); const TargetMachine &TM = MI->getParent()->getParent()->getTarget(); - const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); // Skip over one non-VFP / NEON instruction. if (!LastMI->isBarrier() && diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h index a8198e2..ccf09db 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.h +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMHAZARDRECOGNIZER_H -#define ARMHAZARDRECOGNIZER_H +#ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H +#define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" @@ -46,4 +46,4 @@ public: } // end namespace llvm -#endif // ARMHAZARDRECOGNIZER_H +#endif diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 38547cf..6941579 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -425,7 +425,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { return true; if (Use->isMachineOpcode()) { const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( - CurDAG->getTarget().getInstrInfo()); + CurDAG->getSubtarget().getInstrInfo()); const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); if (MCID.mayStore()) @@ -526,8 +526,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -542,16 +541,15 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, } if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); + int RHSC = (int)RHS->getSExtValue(); if (N.getOpcode() == ISD::SUB) RHSC = -RHSC; - if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) + if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -697,8 +695,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); @@ -718,8 +715,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -896,8 +892,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); @@ -911,8 +906,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -957,8 +951,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); @@ -975,8 +968,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } ARM_AM::AddrOpc AddSub = ARM_AM::add; @@ -1199,8 +1191,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1217,8 +1208,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1266,8 +1256,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1296,8 +1285,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1326,8 +1314,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1392,10 +1379,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, OffReg = OffReg.getOperand(0); else { ShAmt = 0; - ShOpcVal = ARM_AM::no_shift; } - } else { - ShOpcVal = ARM_AM::no_shift; } } @@ -1425,7 +1409,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32); @@ -2361,6 +2345,25 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } + + if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { + unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); + unsigned LSB = 0; + if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && + !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) + return nullptr; + + if (LSB + Width > 32) + return nullptr; + + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0).getOperand(0), + CurDAG->getTargetConstant(LSB, MVT::i32), + CurDAG->getTargetConstant(Width - 1, MVT::i32), + getAL(CurDAG), Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + } + return nullptr; } @@ -2457,10 +2460,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } if (UseCP) { - SDValue CPIdx = - CurDAG->getTargetConstantPool(ConstantInt::get( - Type::getInt32Ty(*CurDAG->getContext()), Val), - getTargetLowering()->getPointerTy()); + SDValue CPIdx = CurDAG->getTargetConstantPool( + ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), + TLI->getPointerTy()); SDNode *ResNode; if (Subtarget->isThumb()) { @@ -2490,12 +2492,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::FrameIndex: { // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); if (Subtarget->isThumb1Only()) { - SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), - getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops); + return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, + CurDAG->getTargetConstant(0, MVT::i32)); } else { unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? ARM::t2ADDri : ARM::ADDri); @@ -2509,6 +2509,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) return I; break; + case ISD::SIGN_EXTEND_INREG: case ISD::SRA: if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true)) return I; @@ -3393,7 +3394,6 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); Ops.push_back(T1.getValue(1)); CurDAG->UpdateNodeOperands(GU, Ops); - GU = T1.getNode(); } else { // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4bfa5a8..0d0d81f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -70,9 +71,9 @@ namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs, - LLVMContext &C, ParmContext PC) - : CCState(CC, isVarArg, MF, TM, locs, C) { + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, + ParmContext PC) + : CCState(CC, isVarArg, MF, locs, C) { assert(((PC == Call) || (PC == Prologue)) && "ARMCCState users must specify whether their context is call" "or prologue generation."); @@ -155,19 +156,11 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } -static TargetLoweringObjectFile *createTLOF(const Triple &TT) { - if (TT.isOSBinFormatMachO()) - return new TargetLoweringObjectFileMachO(); - if (TT.isOSWindows()) - return new TargetLoweringObjectFileCOFF(); - return new ARMElfTargetObjectFile(); -} - -ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { +ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM) + : TargetLowering(TM) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); - RegInfo = TM.getRegisterInfo(); - Itins = TM.getInstrItineraryData(); + RegInfo = TM.getSubtargetImpl()->getRegisterInfo(); + Itins = TM.getSubtargetImpl()->getInstrItineraryData(); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); @@ -312,6 +305,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Conversions between floating types. // RTABI chapter 4.1.2, Table 7 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Integer to floating-point conversions. @@ -387,6 +381,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } + // The half <-> float conversion functions are always soft-float, but are + // needed for some targets which use a hard-float calling convention by + // default. + if (Subtarget->isAAPCS_ABI()) { + setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); + } else { + setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); + setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); + setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, &ARM::tGPRRegClass); else @@ -394,10 +401,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); - if (!Subtarget->isFPOnlySP()) - addRegisterClass(MVT::f64, &ARM::DPRRegClass); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); + addRegisterClass(MVT::f64, &ARM::DPRRegClass); } for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -579,11 +583,50 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->isThumb1Only()) setTargetDAGCombine(ISD::ADDC); + if (Subtarget->isFPOnlySP()) { + // When targetting a floating-point unit with only single-precision + // operations, f64 is legal for the few double-precision instructions which + // are present However, no double-precision operations other than moves, + // loads and stores are provided by the hardware. + setOperationAction(ISD::FADD, MVT::f64, Expand); + setOperationAction(ISD::FSUB, MVT::f64, Expand); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FDIV, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOWI, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FLOG, MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP, MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); + } computeRegisterProperties(); - // ARM does not have f32 extending load. + // ARM does not have floating-point extending loads. setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + + // ... or truncating stores + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -716,8 +759,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use - // the default expansion. - if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { + // the default expansion. If we are targeting a single threaded system, + // then set them all for expand so we can lower them later into their + // non-atomic form. + if (TM.Options.ThreadModel == ThreadModel::Single) + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { // ATOMIC_FENCE needs custom lowering; the others should have been expanded // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); @@ -725,7 +772,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasV8Ops()) { - // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); } } else { @@ -825,10 +872,17 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } - // Special handling for half-precision FP. + + // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. + if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + } + + // fp16 is a special v7 extension that adds f16 <-> f32 conversions. if (!Subtarget->hasFP16()) { - setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); - setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } } @@ -836,7 +890,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { + if (Subtarget->getTargetTriple().isiOS()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); @@ -844,6 +898,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + // FP-ARMv8 implements a lot of rounding-like FP operations. + if (Subtarget->hasFPARMv8()) { + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FROUND, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + if (!Subtarget->isFPOnlySP()) { + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FROUND, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + } + } // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); @@ -1119,7 +1190,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Load are scheduled for latency even if there instruction itinerary // is not available. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); if (MCID.getNumDefs() == 0) @@ -1256,8 +1328,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext(), Call); + ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext(), Call); CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv, /* Return*/ true, isVarArg)); @@ -1417,8 +1489,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext(), Call); + ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext(), Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); @@ -1510,7 +1582,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // True if this byval aggregate will be split between registers // and memory. unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); - unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); + unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); if (CurByValIdx < ByValArgsCount) { @@ -1646,14 +1718,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); bool isStub = (isExt && Subtarget->isTargetMachO()) && getTargetMachine().getRelocationModel() != Reloc::Static; - isARMFunc = !Subtarget->isThumb() || isStub; + isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), - DAG.getTargetGlobalAddress(GV, dl, getPointerTy())); + DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), + 0, ARMII::MO_NONLAZY)); + Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, + MachinePointerInfo::getGOT(), false, false, true, 0); } else if (Subtarget->isTargetCOFF()) { assert(Subtarget->isTargetWindows() && "Windows is the only supported COFF target"); @@ -1679,7 +1754,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isDirect = true; bool isStub = Subtarget->isTargetMachO() && getTargetMachine().getRelocationModel() != Reloc::Static; - isARMFunc = !Subtarget->isThumb() || isStub; + isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { @@ -1740,7 +1815,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. if (!isTailCall) { const uint32_t *Mask; - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); if (isThisReturn) { // For 'this' returns, use the R0-preserving mask if applicable @@ -1940,17 +2016,30 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (Subtarget->isThumb1Only()) return false; + // Externally-defined functions with weak linkage should not be + // tail-called on ARM when the OS does not support dynamic + // pre-emption of symbols, as the AAELF spec requires normal calls + // to undefined weak functions to be replaced with a NOP or jump to the + // next instruction. The behaviour of branch instructions in this + // situation (as used for tail calls) is implementation-defined, so we + // cannot rely on the linker replacing the tail call with a return. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + if (GV->hasExternalWeakLinkage()) + return false; + } + // If the calling conventions do not match, then we'd better make sure the // results are returned in the same way as what the caller expects. if (!CCMatch) { SmallVector<CCValAssign, 16> RVLocs1; - ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs1, *DAG.getContext(), Call); + ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1, + *DAG.getContext(), Call); CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); SmallVector<CCValAssign, 16> RVLocs2; - ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs2, *DAG.getContext(), Call); + ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2, + *DAG.getContext(), Call); CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); if (RVLocs1.size() != RVLocs2.size()) @@ -1984,8 +2073,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext(), Call); + ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext(), Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC, false, isVarArg)); if (CCInfo.getNextStackOffset()) { @@ -1995,7 +2084,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { @@ -2038,7 +2128,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, isVarArg)); } @@ -2086,8 +2176,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SmallVector<CCValAssign, 16> RVLocs; // CCState - Info about the registers and stack slots. - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext(), Call); + ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext(), Call); // Analyze outgoing return values. CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, @@ -2098,6 +2188,10 @@ ARMTargetLowering::LowerReturn(SDValue Chain, RetOps.push_back(Chain); // Operand #0 = Chain (updated below) bool isLittleEndian = Subtarget->isLittle(); + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + AFI->setReturnRegsCount(RVLocs.size()); + // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); @@ -2216,9 +2310,15 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (Copies.count(UseChain.getNode())) // Second CopyToReg Copy = *UI; - else + else { + // We are at the top of this chain. + // If the copy has a glue operand, we conservatively assume it + // isn't safe to perform a tail call. + if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue) + return false; // First CopyToReg TCChain = UseChain; + } } } else if (Copy->getOpcode() == ISD::BITCAST) { // f32 returned in a single GPR. @@ -2227,6 +2327,10 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; + // If the copy has a glue operand, we conservatively assume it isn't safe to + // perform a tail call. + if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) + return false; TCChain = Copy->getOperand(0); } else { return false; @@ -2567,9 +2671,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::arm_rbit: { - assert(Op.getOperand(0).getValueType() == MVT::i32 && + assert(Op.getOperand(1).getValueType() == MVT::i32 && "RBIT intrinsic must have i32 type!"); - return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0)); + return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1)); } case Intrinsic::arm_thread_pointer: { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -2626,7 +2730,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); - unsigned Domain = ARM_MB::ISH; + ARM_MB::MemBOpt Domain = ARM_MB::ISH; if (Subtarget->isMClass()) { // Only a full system barrier exists in the M-class architectures. Domain = ARM_MB::SY; @@ -2739,7 +2843,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; } - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); ArgRegsSize = NumGPRs * 4; // If parameter is split between stack and GPRs... @@ -2916,8 +3023,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); + ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext(), Prologue); CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); @@ -2951,7 +3058,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (Flags.isByVal()) { unsigned ExtraArgRegsSize; unsigned ExtraArgRegsSaveSize; - computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(), + computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(), Flags.getByValSize(), ExtraArgRegsSize, ExtraArgRegsSaveSize); @@ -2966,7 +3073,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, } CCInfo.rewindByValRegsInfo(); lastInsIndex = -1; - if (isVarArg) { + if (isVarArg && MFI->hasVAStart()) { unsigned ExtraArgRegsSize; unsigned ExtraArgRegsSaveSize; computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, @@ -3075,7 +3182,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { - unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); + unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); int FrameIndex = StoreByValRegs( @@ -3108,7 +3215,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, } // varargs - if (isVarArg) + if (isVarArg && MFI->hasVAStart()) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), TotalArgRegsSaveSize); @@ -3130,6 +3237,18 @@ static bool isFloatingPointZero(SDValue Op) { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) return CFP->getValueAPF().isPosZero(); } + } else if (Op->getOpcode() == ISD::BITCAST && + Op->getValueType(0) == MVT::f64) { + // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) + // created by LowerConstantFP(). + SDValue BitcastOp = Op->getOperand(0); + if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) { + SDValue MoveOp = BitcastOp->getOperand(0); + if (MoveOp->getOpcode() == ISD::TargetConstant && + cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) { + return true; + } + } } return false; } @@ -3198,6 +3317,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDLoc dl) const { + assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); @@ -3313,9 +3433,8 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); EVT VT = Op.getValueType(); - return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, - ARMcc, CCR, OverflowCmp); - + return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR, + OverflowCmp, DAG); } // Convert: @@ -3349,7 +3468,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = Cond.getOperand(3); SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); assert(True.getValueType() == VT); - return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); + return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); } } } @@ -3419,6 +3538,32 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, } } +SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, + SDValue TrueVal, SDValue ARMcc, SDValue CCR, + SDValue Cmp, SelectionDAG &DAG) const { + if (Subtarget->isFPOnlySP() && VT == MVT::f64) { + FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), FalseVal); + TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), TrueVal); + + SDValue TrueLow = TrueVal.getValue(0); + SDValue TrueHigh = TrueVal.getValue(1); + SDValue FalseLow = FalseVal.getValue(0); + SDValue FalseHigh = FalseVal.getValue(1); + + SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, + ARMcc, CCR, Cmp); + SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, + ARMcc, CCR, duplicateCmp(Cmp, DAG)); + + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); + } else { + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, + Cmp); + } +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -3428,6 +3573,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue FalseVal = Op.getOperand(3); SDLoc dl(Op); + if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { + DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, + dl); + + // If softenSetCCOperands only returned one value, we should compare it to + // zero. + if (!RHS.getNode()) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + if (LHS.getValueType() == MVT::i32) { // Try to generate VSEL on ARMv8. // The VSEL instruction can't use all the usual ARM condition @@ -3452,8 +3609,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, - Cmp); + return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); } ARMCC::CondCodes CondCode, CondCode2; @@ -3468,12 +3624,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // select c, a, b // We only do this in unsafe-fp-math, because signed zeros and NaNs are // handled differently than the original code sequence. - if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal && - RHS == FalseVal) { - if (CC == ISD::SETOGT || CC == ISD::SETUGT) - return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETOLT || CC == ISD::SETULT) - return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + if (getTargetMachine().Options.UnsafeFPMath) { + if (LHS == TrueVal && RHS == FalseVal) { + if (CC == ISD::SETOGT || CC == ISD::SETUGT) + return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); + if (CC == ISD::SETOLT || CC == ISD::SETULT) + return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + } else if (LHS == FalseVal && RHS == TrueVal) { + if (CC == ISD::SETOLT || CC == ISD::SETULT) + return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); + if (CC == ISD::SETOGT || CC == ISD::SETUGT) + return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + } } bool swpCmpOps = false; @@ -3492,14 +3654,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - ARMcc, CCR, Cmp); + SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); - Result = DAG.getNode(ARMISD::CMOV, dl, VT, - Result, TrueVal, ARMcc2, CCR, Cmp2); + Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; } @@ -3632,6 +3792,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); SDLoc dl(Op); + if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { + DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, + dl); + + // If softenSetCCOperands only returned one value, we should compare it to + // zero. + if (!RHS.getNode()) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); @@ -3724,11 +3896,23 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } -static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); + if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::FP_TO_SINT) + LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), + Op.getValueType()); + else + LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), + Op.getValueType()); + return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + /*isSigned*/ false, SDLoc(Op)).first; + } + SDLoc dl(Op); unsigned Opc; @@ -3778,11 +3962,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(Opc, dl, VT, Op); } -static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); + if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::SINT_TO_FP) + LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), + Op.getValueType()); + else + LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), + Op.getValueType()); + return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + /*isSigned*/ false, SDLoc(Op)).first; + } + SDLoc dl(Op); unsigned Opc; @@ -4291,7 +4487,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); SDLoc dl(Op); - if (Op.getOperand(1).getValueType().isFloatingPoint()) { + if (Op1.getValueType().isFloatingPoint()) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: @@ -4555,6 +4751,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); + // Use the default (constant pool) lowering for double constants when we have + // an SP-only FPU + if (IsDouble && Subtarget->isFPOnlySP()) + return SDValue(); + // Try splatting with a VMOV.f32... APFloat FPVal = CFP->getValueAPF(); int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); @@ -5733,7 +5934,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), + LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(), LD->isNonTemporal(), LD->getAlignment()); } @@ -6088,7 +6289,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. - StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); // Create stack object for sret. const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy); @@ -6258,6 +6459,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) return LowerDYNAMIC_STACKALLOC(Op, DAG); llvm_unreachable("Don't know how to custom lower this!"); + case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); } } @@ -6294,7 +6497,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, void ARMTargetLowering:: SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -6377,9 +6581,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3, RegState::Kill)); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) - .addFrameIndex(FI) - .addImm(36)); // &jbuf[1] :: pc + BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) + .addFrameIndex(FI) + .addImm(36); // &jbuf[1] :: pc AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg5, RegState::Kill) @@ -6409,7 +6613,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *ARMTargetLowering:: EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -6738,16 +6943,14 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { for (std::vector<MachineBasicBlock*>::iterator I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { MachineBasicBlock *CurMBB = *I; - if (SeenMBBs.insert(CurMBB)) + if (SeenMBBs.insert(CurMBB).second) DispContBB->addSuccessor(CurMBB); } // N.B. the order the invoke BBs are processed in doesn't matter here. const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector<MachineBasicBlock*, 64> MBBLPads; - for (SmallPtrSet<MachineBasicBlock*, 64>::iterator - I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { - MachineBasicBlock *BB = *I; + for (MachineBasicBlock *BB : InvokeBBs) { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. @@ -6926,7 +7129,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; @@ -7160,7 +7364,7 @@ MachineBasicBlock * ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, MachineBasicBlock *MBB) const { const TargetMachine &TM = getTargetMachine(); - const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); assert(Subtarget->isTargetWindows() && @@ -7216,8 +7420,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::R4, RegState::Kill))); + .addReg(ARM::SP).addReg(ARM::R4))); MI->eraseFromParent(); return MBB; @@ -7226,7 +7429,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { @@ -7479,12 +7683,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - if (!MI->hasPostISelHook()) { - assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && - "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); - return; - } - const MCInstrDesc *MCID = &MI->getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional @@ -7496,8 +7694,8 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); if (NewOpc) { - const ARMBaseInstrInfo *TII = - static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); + const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( + getTargetMachine().getSubtargetImpl()->getInstrInfo()); MCID = &TII->get(NewOpc); assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && @@ -8398,10 +8596,11 @@ static SDValue PerformBFICombine(SDNode *N, /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // vmovrrd(vmovdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); - if (InDouble.getOpcode() == ARMISD::VMOVDRR) + if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP()) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); // vmovrrd(load f64) -> (load i32), (load i32) @@ -8432,8 +8631,6 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); - DCI.RemoveFromWorklist(LD); - DAG.DeleteNode(LD); return Result; } @@ -8596,7 +8793,7 @@ static SDValue PerformSTORECombine(SDNode *N, return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment(), - St->getTBAAInfo()); + St->getAAInfo()); } /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node @@ -8616,7 +8813,8 @@ static bool hasNormalLoadOperand(SDNode *N) { /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for /// ISD::BUILD_VECTOR. static SDValue PerformBUILD_VECTORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI){ + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value // into a pair of GPRs, which is fine when the value is used as a scalar, @@ -8922,7 +9120,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Tys[n] = VecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2)); + SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); SmallVector<SDValue, 8> Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); @@ -9001,7 +9199,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1)); + SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, @@ -9631,10 +9829,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); - case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); + case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); case ISD::STORE: return PerformSTORECombine(N, DCI); - case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); + case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); @@ -9686,8 +9884,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } -bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned, - bool *Fast) const { +bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); @@ -9741,11 +9941,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || - (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && + Fast))) { return MVT::f64; } } @@ -10348,6 +10549,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return RCPair(0U, &ARM::hGPRRegClass); break; case 'r': + if (Subtarget->isThumb1Only()) + return RCPair(0U, &ARM::tGPRRegClass); return RCPair(0U, &ARM::GPRRegClass); case 'w': if (VT == MVT::Other) @@ -10552,7 +10755,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && - "Invalid opcode for Div/Rem lowering"); + "Invalid opcode for Div/Rem lowering"); bool isSigned = (Opcode == ISD::SDIVREM); EVT VT = Op->getValueType(0); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); @@ -10560,10 +10763,10 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { RTLIB::Libcall LC; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; - case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; - case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; - case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; } SDValue InChain = DAG.getEntryNode(); @@ -10583,7 +10786,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); - Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); + Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr); SDLoc dl(Op); TargetLowering::CallLoweringInfo CLI(DAG); @@ -10611,7 +10814,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); Flag = Chain.getValue(1); - SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); @@ -10621,6 +10824,31 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(Ops, DL); } +SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && + "Unexpected type for custom-lowering FP_EXTEND"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, SDLoc(Op)).first; +} + +SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOperand(0).getValueType() == MVT::f64 && + Subtarget->isFPOnlySP() && + "Unexpected type for custom-lowering FP_ROUND"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, SDLoc(Op)).first; +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -10648,7 +10876,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return false; if (VT == MVT::f32) return ARM_AM::getFP32Imm(Imm) != -1; - if (VT == MVT::f64) + if (VT == MVT::f64 && !Subtarget->isFPOnlySP()) return ARM_AM::getFP64Imm(Imm) != -1; return false; } @@ -10775,31 +11003,154 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } -bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { - // Loads and stores less than 64-bits are already atomic; ones above that - // are doomed anyway, so defer to the default libcall and blame the OS when - // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit - // anything for those. - bool IsMClass = Subtarget->isMClass(); - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - return Size == 64 && !IsMClass; - } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass; +bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; } + +Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, + ARM_MB::MemBOpt Domain) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + + // First, if the target has no DMB, see what fallback we can use. + if (!Subtarget->hasDataBarrier()) { + // Some ARMv6 cpus can support data barriers with an mcr instruction. + // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get + // here. + if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { + Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); + Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), + Builder.getInt32(0), Builder.getInt32(7), + Builder.getInt32(10), Builder.getInt32(5)}; + return Builder.CreateCall(MCR, args); + } else { + // Instead of using barriers, atomic accesses on these subtargets use + // libcalls. + llvm_unreachable("makeDMB on a target so old that it has no barriers"); + } + } else { + Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); + // Only a full system barrier exists in the M-class architectures. + Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain; + Constant *CDomain = Builder.getInt32(Domain); + return Builder.CreateCall(DMB, CDomain); + } +} + +// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html +Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, + AtomicOrdering Ord, bool IsStore, + bool IsLoad) const { + if (!getInsertFencesForAtomic()) + return nullptr; + + switch (Ord) { + case NotAtomic: + case Unordered: + llvm_unreachable("Invalid fence: unordered/non-atomic"); + case Monotonic: + case Acquire: + return nullptr; // Nothing to do + case SequentiallyConsistent: + if (!IsStore) + return nullptr; // Nothing to do + /*FALLTHROUGH*/ + case Release: + case AcquireRelease: + if (Subtarget->isSwift()) + return makeDMB(Builder, ARM_MB::ISHST); + // FIXME: add a comment with a link to documentation justifying this. + else + return makeDMB(Builder, ARM_MB::ISH); + } + llvm_unreachable("Unknown fence ordering in emitLeadingFence"); +} + +Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, + AtomicOrdering Ord, bool IsStore, + bool IsLoad) const { + if (!getInsertFencesForAtomic()) + return nullptr; + + switch (Ord) { + case NotAtomic: + case Unordered: + llvm_unreachable("Invalid fence: unordered/not-atomic"); + case Monotonic: + case Release: + return nullptr; // Nothing to do + case Acquire: + case AcquireRelease: + case SequentiallyConsistent: + return makeDMB(Builder, ARM_MB::ISH); } + llvm_unreachable("Unknown fence ordering in emitTrailingFence"); +} + +// Loads and stores less than 64-bits are already atomic; ones above that +// are doomed anyway, so defer to the default libcall and blame the OS when +// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit +// anything for those. +bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return (Size == 64) && !Subtarget->isMClass(); +} + +// Loads and stores less than 64-bits are already atomic; ones above that +// are doomed anyway, so defer to the default libcall and blame the OS when +// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit +// anything for those. +// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that +// guarantee, see DDI0406C ARM architecture reference manual, +// sections A8.8.72-74 LDRD) +bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + unsigned Size = LI->getType()->getPrimitiveSizeInBits(); + return (Size == 64) && !Subtarget->isMClass(); +} + +// For the real atomic operations, we have ldrex/strex up to 32 bits, +// and up to 64 bits on the non-M profiles +bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + return Size <= (Subtarget->isMClass() ? 32U : 64U); +} + +// This has so far only been implemented for MachO. +bool ARMTargetLowering::useLoadStackGuardNode() const { + return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO; +} + +bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, + unsigned &Cost) const { + // If we do not have NEON, vector types are not natively supported. + if (!Subtarget->hasNEON()) + return false; - // For the real atomic operations, we have ldrex/strex up to 32 bits, - // and up to 64 bits on the non-M profiles - unsigned AtomicLimit = IsMClass ? 32 : 64; - return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit; + // Floating point values and vector values map to the same register file. + // Therefore, althought we could do a store extract of a vector type, this is + // better to leave at float as we have more freedom in the addressing mode for + // those. + if (VectorTy->isFPOrFPVectorTy()) + return false; + + // If the index is unknown at compile time, this is very expensive to lower + // and it is not possible to combine the store with the extract. + if (!isa<ConstantInt>(Idx)) + return false; + + assert(VectorTy->isVectorTy() && "VectorTy is not a vector type"); + unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth(); + // We can do a store + vector extract on any vector that fits perfectly in a D + // or Q register. + if (BitWidth == 64 || BitWidth == 128) { + Cost = 0; + return true; + } + return false; } Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = cast<PointerType>(Addr->getType())->getElementType(); - bool IsAcquire = - Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + bool IsAcquire = isAtLeastAcquire(Ord); // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i32, i32} and we have to recombine them into a @@ -10835,8 +11186,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - bool IsRelease = - Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + bool IsRelease = isAtLeastRelease(Ord); // Since the intrinsics must have legal type, the i64 intrinsics take two // parameters: "i32, i32". We must marshal Val into the appropriate form @@ -10934,6 +11284,6 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( HABaseType Base = HA_UNKNOWN; uint64_t Members = 0; bool result = isHomogeneousAggregate(Ty, Base, Members); - DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump()); return result; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 1ace0f3..89b0c31 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMISELLOWERING_H -#define ARMISELLOWERING_H +#ifndef LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H +#define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -232,7 +232,7 @@ namespace llvm { class ARMTargetLowering : public TargetLowering { public: - explicit ARMTargetLowering(TargetMachine &TM); + explicit ARMTargetLowering(const TargetMachine &TM); unsigned getJumpTableEncoding() const override; @@ -266,11 +266,12 @@ namespace llvm { bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; - /// allowsUnalignedMemoryAccesses - Returns true if the target allows + /// allowsMisalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses of the specified type. Returns whether it /// is "fast" by reference in the second argument. - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - bool *Fast) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, + unsigned Align, + bool *Fast) const override; EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, @@ -391,12 +392,26 @@ namespace llvm { bool functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + bool hasLoadLinkedStoreConditional() const override; + Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const; Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - bool shouldExpandAtomicInIR(Instruction *Inst) const override; + Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + bool IsStore, bool IsLoad) const override; + Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + bool IsStore, bool IsLoad) const override; + + bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + + bool useLoadStackGuardNode() const override; + + bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, + unsigned &Cost) const override; protected: std::pair<const TargetRegisterClass*, uint8_t> @@ -473,6 +488,10 @@ namespace llvm { SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; unsigned getRegisterByName(const char* RegName, EVT VT) const override; @@ -562,6 +581,9 @@ namespace llvm { bool mayBeEmittedAsTailCall(CallInst *CI) const override; + SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal, + SDValue ARMcc, SDValue CCR, SDValue Cmp, + SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 59e9260..7d27cf3 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -203,6 +203,16 @@ def msr_mask : Operand<i32> { let ParserMatchClass = MSRMaskOperand; } +def BankedRegOperand : AsmOperandClass { + let Name = "BankedReg"; + let ParserMethod = "parseBankedRegOperand"; +} +def banked_reg : Operand<i32> { + let PrintMethod = "printBankedRegOperand"; + let DecoderMethod = "DecodeBankedReg"; + let ParserMatchClass = BankedRegOperand; +} + // Shift Right Immediate - A shift right immediate is encoded differently from // other shift immediates. The imm6 field is encoded like so: // diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index f235ac2..17d1ffa 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -90,6 +90,49 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } +void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const { + MachineFunction &MF = *MI->getParent()->getParent(); + const ARMSubtarget &Subtarget = MF.getTarget().getSubtarget<ARMSubtarget>(); + + if (!Subtarget.useMovt(MF)) { + if (RM == Reloc::PIC_) + expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12, RM); + else + expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12, RM); + return; + } + + if (RM != Reloc::PIC_) { + expandLoadStackGuardBase(MI, ARM::MOVi32imm, ARM::LDRi12, RM); + return; + } + + const GlobalValue *GV = + cast<GlobalValue>((*MI->memoperands_begin())->getValue()); + + if (!Subtarget.GVIsIndirectSymbol(GV, RM)) { + expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12, RM); + return; + } + + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Reg = MI->getOperand(0).getReg(); + MachineInstrBuilder MIB; + + MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg) + .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); + unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( + MachinePointerInfo::getGOT(), Flag, 4, 4); + MIB.addMemOperand(MMO); + MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg); + MIB.addReg(Reg, RegState::Kill).addImm(0); + MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + AddDefaultPred(MIB); +} + namespace { /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC /// global base register for ARM ELF. @@ -113,8 +156,9 @@ namespace { ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj); - unsigned Align = TM->getDataLayout() - ->getPrefTypeAlignment(Type::getInt32PtrTy(*Context)); + unsigned Align = + TM->getSubtargetImpl()->getDataLayout()->getPrefTypeAlignment( + Type::getInt32PtrTy(*Context)); unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); MachineBasicBlock &FirstMBB = MF.front(); @@ -124,7 +168,7 @@ namespace { MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? ARM::t2LDRpci : ARM::LDRcp; - const TargetInstrInfo &TII = *TM->getInstrInfo(); + const TargetInstrInfo &TII = *TM->getSubtargetImpl()->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), TempReg) .addConstantPoolIndex(Idx); diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index b09958a..90f34ea 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMINSTRUCTIONINFO_H -#define ARMINSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H +#define LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" @@ -37,6 +37,10 @@ public: /// always be able to get register info as well (through this method). /// const ARMRegisterInfo &getRegisterInfo() const override { return RI; } + +private: + void expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const override; }; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 2bb8976..3177114 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -241,6 +241,9 @@ def HasDB : Predicate<"Subtarget->hasDataBarrier()">, def HasMP : Predicate<"Subtarget->hasMPExtension()">, AssemblerPredicate<"FeatureMP", "mp-extensions">; +def HasVirtualization: Predicate<"false">, + AssemblerPredicate<"FeatureVirtualization", + "virtualization-extensions">; def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">, AssemblerPredicate<"FeatureTrustZone", "TrustZone">; @@ -633,6 +636,8 @@ def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> { let ParserMatchClass = Imm32AsmOperand; } +def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>; + /// imm1_7 predicate - Immediate in the range [1,7]. def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; } def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> { @@ -1961,7 +1966,7 @@ def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary, } def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", - []>, Requires<[IsARM, HasV7]> { + [(int_arm_dbg imm0_15:$opt)]>, Requires<[IsARM, HasV7]> { bits<4> opt; let Inst{27-4} = 0b001100100000111100001111; let Inst{3-0} = opt; @@ -2708,7 +2713,8 @@ multiclass AI2_stridx<bit isByte, string opc, def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre, StFrm, iii, - opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + opc, "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { bits<17> addr; let Inst{25} = 0; let Inst{23} = addr{12}; // U (add = ('U' == 1)) @@ -2720,7 +2726,8 @@ multiclass AI2_stridx<bit isByte, string opc, def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, ldst_so_reg:$addr), IndexModePre, StFrm, iir, - opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + opc, "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { bits<17> addr; let Inst{25} = 1; let Inst{23} = addr{12}; // U (add = ('U' == 1)) @@ -2733,7 +2740,7 @@ multiclass AI2_stridx<bit isByte, string opc, (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), IndexModePost, StFrm, iir, opc, "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm bits<14> offset; @@ -2751,7 +2758,7 @@ multiclass AI2_stridx<bit isByte, string opc, (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), IndexModePost, StFrm, iii, opc, "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm bits<14> offset; @@ -2828,7 +2835,8 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre, StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + "strh", "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { bits<14> addr; let Inst{23} = addr{8}; // U bit let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm @@ -2841,7 +2849,8 @@ def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset), IndexModePost, StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", + "strh", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, addr_offset_none:$addr, am3offset:$offset))]> { @@ -3417,7 +3426,8 @@ def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR), (SBCri GPR:$src, so_imm_not:$imm)>; def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR), - (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>; + (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>, + Requires<[IsARM, HasV6T2]>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -3932,14 +3942,12 @@ multiclass AI_smul<string opc, PatFrag opnode> { def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (sra (opnode GPR:$Rn, - (sext_inreg GPR:$Rm, i16)), (i32 16)))]>, + []>, Requires<[IsARM, HasV5TE]>; def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (sra (opnode GPR:$Rn, - (sra GPR:$Rm, (i32 16))), (i32 16)))]>, + []>, Requires<[IsARM, HasV5TE]>; } @@ -3981,17 +3989,13 @@ multiclass AI_smla<string opc, PatFrag opnode> { def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, - (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>, + []>, Requires<[IsARM, HasV5TE, UseMulOps]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, - (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>, + []>, Requires<[IsARM, HasV5TE, UseMulOps]>; } } @@ -4111,7 +4115,7 @@ def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, // Misc. Arithmetic Instructions. // -def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm), +def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "clz", "\t$Rd, $Rm", [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>, Sched<[WriteALU]>; @@ -4629,7 +4633,7 @@ def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), class acquiring_load<PatFrag base> : PatFrag<(ops node:$ptr), (base node:$ptr), [{ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - return Ordering == Acquire || Ordering == SequentiallyConsistent; + return isAtLeastAcquire(Ordering); }]>; def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; @@ -4639,7 +4643,7 @@ def atomic_load_acquire_32 : acquiring_load<atomic_load_32>; class releasing_store<PatFrag base> : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - return Ordering == Release || Ordering == SequentiallyConsistent; + return isAtLeastRelease(Ordering); }]>; def atomic_store_release_8 : releasing_store<atomic_store_8>; @@ -5060,12 +5064,31 @@ def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, let Unpredictable{11-0} = 0b110100001111; } +// However, the MRS (banked register) system instruction (ARMv7VE) *does* have a +// separate encoding (distinguished by bit 5. +def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked), + NoItinerary, "mrs", "\t$Rd, $banked", []>, + Requires<[IsARM, HasVirtualization]> { + bits<6> banked; + bits<4> Rd; + + let Inst{23} = 0; + let Inst{22} = banked{5}; // R bit + let Inst{21-20} = 0b10; + let Inst{19-16} = banked{3-0}; + let Inst{15-12} = Rd; + let Inst{11-9} = 0b001; + let Inst{8} = banked{4}; + let Inst{7-0} = 0b00000000; +} + // Move from ARM core register to Special Register // -// No need to have both system and application versions, the encodings are the -// same and the assembly parser has no way to distinguish between them. The mask -// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains -// the mask with the fields to be accessed in the special register. +// No need to have both system and application versions of MSR (immediate) or +// MSR (register), the encodings are the same and the assembly parser has no way +// to distinguish between them. The mask operand contains the special register +// (R Bit) in bit 4 and bits 3-0 contains the mask with the fields to be +// accessed in the special register. def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, "msr", "\t$mask, $Rn", []> { bits<5> mask; @@ -5093,6 +5116,25 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, let Inst{11-0} = a; } +// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a +// separate encoding (distinguished by bit 5. +def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn), + NoItinerary, "msr", "\t$banked, $Rn", []>, + Requires<[IsARM, HasVirtualization]> { + bits<6> banked; + bits<4> Rn; + + let Inst{23} = 0; + let Inst{22} = banked{5}; // R bit + let Inst{21-20} = 0b10; + let Inst{19-16} = banked{3-0}; + let Inst{15-12} = 0b1111; + let Inst{11-9} = 0b001; + let Inst{8} = banked{4}; + let Inst{7-4} = 0b0000; + let Inst{3-0} = Rn; +} + // Dynamic stack allocation yields a _chkstk for Windows targets. These calls // are needed to probe the stack when allocating more than // 4k bytes in one go. Touching the stack at 4K increments is necessary to @@ -5278,11 +5320,6 @@ def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), (SMULTB GPR:$a, GPR:$b)>; def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), (SMULTB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), - (i32 16)), - (SMULWB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), - (SMULWB GPR:$a, GPR:$b)>; def : ARMV5MOPat<(add GPR:$acc, (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), @@ -5305,13 +5342,6 @@ def : ARMV5MOPat<(add GPR:$acc, def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5MOPat<(add GPR:$acc, - (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), - (i32 16))), - (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5MOPat<(add GPR:$acc, - (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), - (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; // Pre-v7 uses MCR for synchronization barriers. @@ -5591,3 +5621,8 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", // is discarded. def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, ComplexDeprecationPredicate<"IT">; + +let mayLoad = 1, mayStore =1, hasSideEffects = 1 in +def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), + NoItinerary, + [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c02bb3b..a0c627c 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -34,6 +34,14 @@ def nImmSplatI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmSplatI32AsmOperand; } +def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } +def nImmSplatNotI16 : Operand<i32> { + let ParserMatchClass = nImmSplatNotI16AsmOperand; +} +def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } +def nImmSplatNotI32 : Operand<i32> { + let ParserMatchClass = nImmSplatNotI32AsmOperand; +} def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } def nImmVMOVI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; @@ -4376,7 +4384,7 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlsl", "s", null_frag>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), @@ -5429,7 +5437,7 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, IIC_VMOVSI, "vmov", "32", "$R, $V$lane", [(set GPR:$R, (extractelt (v2i32 DPR:$V), imm:$lane))]>, - Requires<[HasNEON, HasFastVGETLNi32]> { + Requires<[HasVFP2, HasFastVGETLNi32]> { let Inst{21} = lane{0}; } // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -5497,8 +5505,12 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), IIC_VMOVISL, "vmov", "32", "$V$lane, $R", [(set DPR:$V, (insertelt (v2i32 DPR:$src1), - GPR:$R, imm:$lane))]> { + GPR:$R, imm:$lane))]>, + Requires<[HasVFP2]> { let Inst{21} = lane{0}; + // This instruction is equivalent as + // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) + let isInsertSubreg = 1; } } def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), @@ -6635,6 +6647,16 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +// ... immediates +def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", + (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; +def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", + (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; +def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", + (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; +def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", + (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; + // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index e17f73a..a867844 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -311,7 +311,7 @@ def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val", } def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end", - []>, T1Encoding<0b101101>, Deprecated<HasV8Ops> { + []>, T1Encoding<0b101101>, Requires<[IsNotMClass]>, Deprecated<HasV8Ops> { bits<1> end; // A8.6.156 let Inst{9-5} = 0b10010; @@ -360,6 +360,14 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm), let DecoderMethod = "DecodeThumbAddSpecialReg"; } +// Thumb1 frame lowering is rather fragile, we hope to be able to use +// tADDrSPi, but we may need to insert a sequence that clobbers CPSR. +def tADDframe : PseudoInst<(outs tGPR:$dst), (ins i32imm:$base, i32imm:$offset), + NoItinerary, []>, + Requires<[IsThumb, IsThumb1Only]> { + let Defs = [CPSR]; +} + // ADD sp, sp, #<imm7> def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), IIC_iALUi, "add", "\t$Rdn, $imm", []>, @@ -466,7 +474,7 @@ let isCall = 1, (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T]>, Sched<[WriteBrL]> { + Requires<[IsThumb, HasV5T, IsNotMClass]>, Sched<[WriteBrL]> { bits<24> func; let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; @@ -1355,7 +1363,7 @@ def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>, Requires<[IsThumb]>; def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>, - Requires<[IsThumb, HasV5T]>; + Requires<[IsThumb, HasV5T, IsNotMClass]>; // Indirect calls to ARM routines def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 85e9351..807c252 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1262,15 +1262,15 @@ defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR, // Loads with zero extension defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPR, UnOpFrag<(zextloadi16 node:$Src)>>; + GPRnopc, UnOpFrag<(zextloadi16 node:$Src)>>; defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPR, UnOpFrag<(zextloadi8 node:$Src)>>; + GPRnopc, UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPR, UnOpFrag<(sextloadi16 node:$Src)>>; + GPRnopc, UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPR, UnOpFrag<(sextloadi8 node:$Src)>>; + GPRnopc, UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword @@ -1973,6 +1973,16 @@ def t2SXTAH : T2I_exta_rrot<0b000, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">; +// A simple right-shift can also be used in most cases (the exception is the +// SXTH operations with a rotate of 24: there the non-contiguous bits are +// relevant). +def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)), + (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)), + (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; + // Zero extenders let AddedComplexity = 16 in { @@ -1999,8 +2009,16 @@ def t2UXTAB : T2I_exta_rrot<0b101, "uxtab", def t2UXTAH : T2I_exta_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">; + +def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)), + (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), + (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; } + //===----------------------------------------------------------------------===// // Arithmetic Instructions. // @@ -2708,8 +2726,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> { def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (sra (opnode rGPR:$Rn, - (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>, + []>, Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -2721,8 +2738,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> { def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (sra (opnode rGPR:$Rn, - (sra rGPR:$Rm, (i32 16))), (i32 16)))]>, + []>, Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -2791,8 +2807,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def WB : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, - (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>, + []>, Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -2804,8 +2819,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def WT : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, - (sra rGPR:$Rm, (i32 16))), (i32 16))))]>, + []>, Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -3291,7 +3305,8 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", - [], {?, ?, ?, ?}> { + [], {?, ?, ?, ?}>, + Requires<[IsThumb2, IsNotMClass]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3367,7 +3382,8 @@ def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], - {?, ?, ?, ?}> { + {?, ?, ?, ?}>, + Requires<[IsThumb2, IsNotMClass]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3614,7 +3630,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>, - Sched<[WriteBr]> { + Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> { bits<4> func; let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -3656,7 +3672,8 @@ let isBranch = 1, isTerminator = 1 in { // operands, create 3 versions of the same instruction. Once there's a clean // framework to represent optional operands, change this behavior. class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary, - !strconcat("cps", asm_op), []> { + !strconcat("cps", asm_op), []>, + Requires<[IsThumb2, IsNotMClass]> { bits<2> imod; bits<3> iflags; bits<5> mode; @@ -3702,7 +3719,8 @@ def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> { let Predicates = [IsThumb2, HasV8]; } -def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { +def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", + [(int_arm_dbg imm0_15:$opt)]> { bits<4> opt; let Inst{31-20} = 0b111100111010; let Inst{19-16} = 0b1111; @@ -3739,7 +3757,8 @@ def t2DCPS3 : T2DCPS<0b11, "dcps3">; class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { + : T2I<oops, iops, itin, opc, asm, pattern>, + Requires<[IsThumb2,IsNotMClass]> { bits<5> mode; let Inst{31-25} = 0b1110100; let Inst{24-23} = Op; @@ -3770,7 +3789,8 @@ def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>; // Return From Exception is a system instruction. class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { + : T2I<oops, iops, itin, opc, asm, pattern>, + Requires<[IsThumb2,IsNotMClass]> { let Inst{31-20} = op31_20{11-0}; bits<4> Rn; @@ -3797,7 +3817,7 @@ let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary, "subs", "\tpc, lr, $imm", [(ARMintretflag imm0_255:$imm)]>, - Requires<[IsThumb2]> { + Requires<[IsThumb2,IsNotMClass]> { let Inst{31-8} = 0b111100111101111010001111; bits<8> imm; @@ -3941,10 +3961,10 @@ defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">; defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">; defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">; defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">; -defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8]>; -defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8]>; -defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8]>; -defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8]>; +defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8,IsThumb2]>; +defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8,IsThumb2]>; +defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8,IsThumb2]>; +defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8,IsThumb2]>; //===----------------------------------------------------------------------===// @@ -3960,7 +3980,7 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", bits<4> Rd; let Inst{31-12} = 0b11110011111011111000; let Inst{11-8} = Rd; - let Inst{7-0} = 0b0000; + let Inst{7-0} = 0b00000000; } def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>; @@ -3970,22 +3990,41 @@ def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", bits<4> Rd; let Inst{31-12} = 0b11110011111111111000; let Inst{11-8} = Rd; - let Inst{7-0} = 0b0000; + let Inst{7-0} = 0b00000000; +} + +def t2MRSbanked : T2I<(outs rGPR:$Rd), (ins banked_reg:$banked), + NoItinerary, "mrs", "\t$Rd, $banked", []>, + Requires<[IsThumb, HasVirtualization]> { + bits<6> banked; + bits<4> Rd; + + let Inst{31-21} = 0b11110011111; + let Inst{20} = banked{5}; // R bit + let Inst{19-16} = banked{3-0}; + let Inst{15-12} = 0b1000; + let Inst{11-8} = Rd; + let Inst{7-5} = 0b001; + let Inst{4} = banked{4}; + let Inst{3-0} = 0b0000; } + // M class MRS. // // This MRS has a mask field in bits 7-0 and can take more values than // the A/R class (a full msr_mask). -def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary, - "mrs", "\t$Rd, $mask", []>, +def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$SYSm), NoItinerary, + "mrs", "\t$Rd, $SYSm", []>, Requires<[IsThumb,IsMClass]> { bits<4> Rd; - bits<8> mask; + bits<8> SYSm; let Inst{31-12} = 0b11110011111011111000; let Inst{11-8} = Rd; - let Inst{19-16} = 0b1111; - let Inst{7-0} = mask; + let Inst{7-0} = SYSm; + + let Unpredictable{20-16} = 0b11111; + let Unpredictable{13} = 0b1; } @@ -4010,6 +4049,25 @@ def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn), let Inst{7-0} = 0; } +// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a +// separate encoding (distinguished by bit 5. +def t2MSRbanked : T2I<(outs), (ins banked_reg:$banked, rGPR:$Rn), + NoItinerary, "msr", "\t$banked, $Rn", []>, + Requires<[IsThumb, HasVirtualization]> { + bits<6> banked; + bits<4> Rn; + + let Inst{31-21} = 0b11110011100; + let Inst{20} = banked{5}; // R bit + let Inst{19-16} = Rn; + let Inst{15-12} = 0b1000; + let Inst{11-8} = banked{3-0}; + let Inst{7-5} = 0b001; + let Inst{4} = banked{4}; + let Inst{3-0} = 0b0000; +} + + // M class MSR. // // Move from ARM core register to Special Register @@ -4022,7 +4080,13 @@ def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn), let Inst{20} = 0b0; let Inst{19-16} = Rn; let Inst{15-12} = 0b1000; - let Inst{11-0} = SYSm; + let Inst{11-10} = SYSm{11-10}; + let Inst{9-8} = 0b00; + let Inst{7-0} = SYSm{7-0}; + + let Unpredictable{20} = 0b1; + let Unpredictable{13} = 0b1; + let Unpredictable{9-8} = 0b11; } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 1d7802a..d78f2ac 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -515,6 +515,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, let Inst{5} = Sm{0}; let Inst{15-12} = Dd{3-0}; let Inst{22} = Dd{4}; + + let Predicates = [HasVFP2, HasDPVFP]; } // Special case encoding: bits 11-8 is 0b1011. @@ -551,12 +553,6 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def : Pat<(f32_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; - -def : Pat<(f16_to_f32 GPR:$a), - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; - def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; @@ -619,26 +615,42 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, let Inst{5} = Dm{4}; } -multiclass vcvt_inst<string opc, bits<2> rm> { +def : Pat<(fp_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + +def : Pat<(fp_to_f16 (f64 DPR:$a)), + (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>; + +def : Pat<(f16_to_fp GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : Pat<(f64 (f16_to_fp GPR:$a)), + (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>; + +multiclass vcvt_inst<string opc, bits<2> rm, + SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), - []>, Requires<[HasFPARMv8]> { + [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), - []>, Requires<[HasFPARMv8]> { + [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), - []>, Requires<[HasFPARMv8, HasDPVFP]> { + [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>, + Requires<[HasFPARMv8, HasDPVFP]> { bits<5> Dm; let Inst{17-16} = rm; @@ -652,7 +664,8 @@ multiclass vcvt_inst<string opc, bits<2> rm> { def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), - []>, Requires<[HasFPARMv8, HasDPVFP]> { + [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>, + Requires<[HasFPARMv8, HasDPVFP]> { bits<5> Dm; let Inst{17-16} = rm; @@ -665,10 +678,10 @@ multiclass vcvt_inst<string opc, bits<2> rm> { } } -defm VCVTA : vcvt_inst<"a", 0b00>; +defm VCVTA : vcvt_inst<"a", 0b00, frnd>; defm VCVTN : vcvt_inst<"n", 0b01>; -defm VCVTP : vcvt_inst<"p", 0b10>; -defm VCVTM : vcvt_inst<"m", 0b11>; +defm VCVTP : vcvt_inst<"p", 0b10, fceil>; +defm VCVTM : vcvt_inst<"m", 0b11, ffloor>; def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), @@ -684,18 +697,20 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, let D = VFPNeonA8Domain; } -multiclass vrint_inst_zrx<string opc, bit op, bit op2> { +multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> { def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", - []>, Requires<[HasFPARMv8]> { + [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { let Inst{7} = op2; let Inst{16} = op; } def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", - []>, Requires<[HasFPARMv8, HasDPVFP]> { + [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, + Requires<[HasFPARMv8, HasDPVFP]> { let Inst{7} = op2; let Inst{16} = op; } @@ -708,22 +723,25 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2> { Requires<[HasFPARMv8,HasDPVFP]>; } -defm VRINTZ : vrint_inst_zrx<"z", 0, 1>; -defm VRINTR : vrint_inst_zrx<"r", 0, 0>; -defm VRINTX : vrint_inst_zrx<"x", 1, 0>; +defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc>; +defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint>; +defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>; -multiclass vrint_inst_anpm<string opc, bits<2> rm> { +multiclass vrint_inst_anpm<string opc, bits<2> rm, + SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), - []>, Requires<[HasFPARMv8]> { + [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"), - []>, Requires<[HasFPARMv8, HasDPVFP]> { + [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, + Requires<[HasFPARMv8, HasDPVFP]> { let Inst{17-16} = rm; } } @@ -736,10 +754,10 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm> { Requires<[HasFPARMv8,HasDPVFP]>; } -defm VRINTA : vrint_inst_anpm<"a", 0b00>; +defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>; defm VRINTN : vrint_inst_anpm<"n", 0b01>; -defm VRINTP : vrint_inst_anpm<"p", 0b10>; -defm VRINTM : vrint_inst_anpm<"m", 0b11>; +defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>; +defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), @@ -830,6 +848,11 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + + // This instruction is equivalent to + // $Rt = EXTRACT_SUBREG $Dm, ssub_0 + // $Rt2 = EXTRACT_SUBREG $Dm, ssub_1 + let isExtractSubreg = 1; } def VMOVRRS : AVConv3I<0b11000101, 0b1010, @@ -878,6 +901,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + + // This instruction is equivalent to + // $Dm = REG_SEQUENCE $Rt, ssub_0, $Rt2, ssub_1 + let isRegSequence = 1; } let neverHasSideEffects = 1 in diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp deleted file mode 100644 index 6d1114d..0000000 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ /dev/null @@ -1,344 +0,0 @@ -//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the JIT interfaces for the ARM target. -// -//===----------------------------------------------------------------------===// - -#include "ARMJITInfo.h" -#include "ARMConstantPoolValue.h" -#include "ARMMachineFunctionInfo.h" -#include "ARMRelocations.h" -#include "MCTargetDesc/ARMBaseInfo.h" -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/raw_ostream.h" -#include <cstdlib> -using namespace llvm; - -#define DEBUG_TYPE "jit" - -void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction"); -} - -/// JITCompilerFunction - This contains the address of the JIT function used to -/// compile a function lazily. -static TargetJITInfo::JITCompilerFn JITCompilerFunction; - -// Get the ASMPREFIX for the current host. This is often '_'. -#ifndef __USER_LABEL_PREFIX__ -#define __USER_LABEL_PREFIX__ -#endif -#define GETASMPREFIX2(X) #X -#define GETASMPREFIX(X) GETASMPREFIX2(X) -#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) - -// CompilationCallback stub - We can't use a C function with inline assembly in -// it, because the prolog/epilog inserted by GCC won't work for us. (We need -// to preserve more context and manipulate the stack directly). Instead, -// write our own wrapper, which does things our way, so we have complete -// control over register saving and restoring. -extern "C" { -#if defined(__arm__) - void ARMCompilationCallback(); - asm( - ".text\n" - ".align 2\n" - ".globl " ASMPREFIX "ARMCompilationCallback\n" - ASMPREFIX "ARMCompilationCallback:\n" - // Save caller saved registers since they may contain stuff - // for the real target function right now. We have to act as if this - // whole compilation callback doesn't exist as far as the caller is - // concerned, so we can't just preserve the callee saved regs. - "stmdb sp!, {r0, r1, r2, r3, lr}\n" -#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) - "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" -#endif - // The LR contains the address of the stub function on entry. - // pass it as the argument to the C part of the callback - "mov r0, lr\n" - "sub sp, sp, #4\n" - // Call the C portion of the callback - "bl " ASMPREFIX "ARMCompilationCallbackC\n" - "add sp, sp, #4\n" - // Restoring the LR to the return address of the function that invoked - // the stub and de-allocating the stack space for it requires us to - // swap the two saved LR values on the stack, as they're backwards - // for what we need since the pop instruction has a pre-determined - // order for the registers. - // +--------+ - // 0 | LR | Original return address - // +--------+ - // 1 | LR | Stub address (start of stub) - // 2-5 | R3..R0 | Saved registers (we need to preserve all regs) - // 6-20 | D0..D7 | Saved VFP registers - // +--------+ - // -#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) - // Restore VFP caller-saved registers. - "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" -#endif - // - // We need to exchange the values in slots 0 and 1 so we can - // return to the address in slot 1 with the address in slot 0 - // restored to the LR. - "ldr r0, [sp,#20]\n" - "ldr r1, [sp,#16]\n" - "str r1, [sp,#20]\n" - "str r0, [sp,#16]\n" - // Return to the (newly modified) stub to invoke the real function. - // The above twiddling of the saved return addresses allows us to - // deallocate everything, including the LR the stub saved, with two - // updating load instructions. - "ldmia sp!, {r0, r1, r2, r3, lr}\n" - "ldr pc, [sp], #4\n" - ); -#else // Not an ARM host - void ARMCompilationCallback() { - llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!"); - } -#endif -} - -/// ARMCompilationCallbackC - This is the target-specific function invoked -/// by the function stub when we did not know the real target of a call. -/// This function must locate the start of the stub or call site and pass -/// it into the JIT compiler function. -extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) { - // Get the address of the compiled code for this function. - intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr); - - // Rewrite the call target... so that we don't end up here every time we - // execute the call. We're replacing the first two instructions of the - // stub with: - // ldr pc, [pc,#-4] - // <addr> - if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) { - llvm_unreachable("ERROR: Unable to mark stub writable"); - } - *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4] - *(intptr_t *)(StubAddr+4) = NewVal; - if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) { - llvm_unreachable("ERROR: Unable to mark stub executable"); - } -} - -TargetJITInfo::LazyResolverFn -ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) { - JITCompilerFunction = F; - return ARMCompilationCallback; -} - -void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, - JITCodeEmitter &JCE) { - uint8_t Buffer[4]; - uint8_t *Cur = Buffer; - MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr); - void *PtrAddr = JCE.allocIndirectGV( - GV, Buffer, sizeof(Buffer), /*Alignment=*/4); - addIndirectSymAddr(Ptr, (intptr_t)PtrAddr); - return PtrAddr; -} - -TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() { - // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a - // 4-byte address. See emitFunctionStub for details. - StubLayout Result = {16, 4}; - return Result; -} - -void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE) { - void *Addr; - // If this is just a call to an external function, emit a branch instead of a - // call. The code is the same except for one bit of the last instruction. - if (Fn != (void*)(intptr_t)ARMCompilationCallback) { - // Branch to the corresponding function addr. - if (IsPIC) { - // The stub is 16-byte size and 4-aligned. - intptr_t LazyPtr = getIndirectSymAddr(Fn); - if (!LazyPtr) { - // In PIC mode, the function stub is loading a lazy-ptr. - LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE); - DEBUG(if (F) - errs() << "JIT: Indirect symbol emitted at [" << LazyPtr - << "] for GV '" << F->getName() << "'\n"; - else - errs() << "JIT: Stub emitted at [" << LazyPtr - << "] for external function at '" << Fn << "'\n"); - } - JCE.emitAlignment(4); - Addr = (void*)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable(Addr, 16)) { - llvm_unreachable("ERROR: Unable to mark stub writable"); - } - JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4] - JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip - JCE.emitWordLE(0xe59cf000); // ldr pc, [ip] - JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8) - sys::Memory::InvalidateInstructionCache(Addr, 16); - if (!sys::Memory::setRangeExecutable(Addr, 16)) { - llvm_unreachable("ERROR: Unable to mark stub executable"); - } - } else { - // The stub is 8-byte size and 4-aligned. - JCE.emitAlignment(4); - Addr = (void*)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable(Addr, 8)) { - llvm_unreachable("ERROR: Unable to mark stub writable"); - } - JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] - JCE.emitWordLE((intptr_t)Fn); // addr of function - sys::Memory::InvalidateInstructionCache(Addr, 8); - if (!sys::Memory::setRangeExecutable(Addr, 8)) { - llvm_unreachable("ERROR: Unable to mark stub executable"); - } - } - } else { - // The compilation callback will overwrite the first two words of this - // stub with indirect branch instructions targeting the compiled code. - // This stub sets the return address to restart the stub, so that - // the new branch will be invoked when we come back. - // - // Branch and link to the compilation callback. - // The stub is 16-byte size and 4-byte aligned. - JCE.emitAlignment(4); - Addr = (void*)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable(Addr, 16)) { - llvm_unreachable("ERROR: Unable to mark stub writable"); - } - // Save LR so the callback can determine which stub called it. - // The compilation callback is responsible for popping this prior - // to returning. - JCE.emitWordLE(0xe92d4000); // push {lr} - // Set the return address to go back to the start of this stub. - JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12 - // Invoke the compilation callback. - JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] - // The address of the compilation callback. - JCE.emitWordLE((intptr_t)ARMCompilationCallback); - sys::Memory::InvalidateInstructionCache(Addr, 16); - if (!sys::Memory::setRangeExecutable(Addr, 16)) { - llvm_unreachable("ERROR: Unable to mark stub executable"); - } - } - - return Addr; -} - -intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const { - ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType(); - switch (RT) { - default: - return (intptr_t)(MR->getResultPointer()); - case ARM::reloc_arm_pic_jt: - // Destination address - jump table base. - return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal(); - case ARM::reloc_arm_jt_base: - // Jump table base address. - return getJumpTableBaseAddr(MR->getJumpTableIndex()); - case ARM::reloc_arm_cp_entry: - case ARM::reloc_arm_vfp_cp_entry: - // Constant pool entry address. - return getConstantPoolEntryAddr(MR->getConstantPoolIndex()); - case ARM::reloc_arm_machine_cp_entry: { - ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal(); - assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) && - "Can't handle this machine constant pool entry yet!"); - intptr_t Addr = (intptr_t)(MR->getResultPointer()); - Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment(); - return Addr; - } - } -} - -/// relocate - Before the JIT can run a block of code that has been emitted, -/// it must rewrite the code to contain the actual addresses of any -/// referenced global symbols. -void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase) { - for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { - void *RelocPos = (char*)Function + MR->getMachineCodeOffset(); - intptr_t ResultPtr = resolveRelocDestAddr(MR); - switch ((ARM::RelocationType)MR->getRelocationType()) { - case ARM::reloc_arm_cp_entry: - case ARM::reloc_arm_vfp_cp_entry: - case ARM::reloc_arm_relative: { - // It is necessary to calculate the correct PC relative value. We - // subtract the base addr from the target addr to form a byte offset. - ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; - // If the result is positive, set bit U(23) to 1. - if (ResultPtr >= 0) - *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift; - else { - // Otherwise, obtain the absolute value and set bit U(23) to 0. - *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift); - ResultPtr = - ResultPtr; - } - // Set the immed value calculated. - // VFP immediate offset is multiplied by 4. - if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry) - ResultPtr = ResultPtr >> 2; - *((intptr_t*)RelocPos) |= ResultPtr; - // Set register Rn to PC (which is register 15 on all architectures). - // FIXME: This avoids the need for register info in the JIT class. - *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift; - break; - } - case ARM::reloc_arm_pic_jt: - case ARM::reloc_arm_machine_cp_entry: - case ARM::reloc_arm_absolute: { - // These addresses have already been resolved. - *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr; - break; - } - case ARM::reloc_arm_branch: { - // It is necessary to calculate the correct value of signed_immed_24 - // field. We subtract the base addr from the target addr to form a - // byte offset, which must be inside the range -33554432 and +33554428. - // Then, we set the signed_immed_24 field of the instruction to bits - // [25:2] of the byte offset. More details ARM-ARM p. A4-11. - ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; - ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2; - assert(ResultPtr >= -33554432 && ResultPtr <= 33554428); - *((intptr_t*)RelocPos) |= ResultPtr; - break; - } - case ARM::reloc_arm_jt_base: { - // JT base - (instruction addr + 8) - ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; - *((intptr_t*)RelocPos) |= ResultPtr; - break; - } - case ARM::reloc_arm_movw: { - ResultPtr = ResultPtr & 0xFFFF; - *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; - *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; - break; - } - case ARM::reloc_arm_movt: { - ResultPtr = (ResultPtr >> 16) & 0xFFFF; - *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; - *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; - break; - } - } - } -} - -void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) { - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); - JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); - IsPIC = isPIC; -} diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h deleted file mode 100644 index 27e2a20..0000000 --- a/lib/Target/ARM/ARMJITInfo.h +++ /dev/null @@ -1,177 +0,0 @@ -//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the ARMJITInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef ARMJITINFO_H -#define ARMJITINFO_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetJITInfo.h" - -namespace llvm { - class ARMTargetMachine; - - class ARMJITInfo : public TargetJITInfo { - // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding - // CONSTPOOL_ENTRY addresses. - SmallVector<intptr_t, 16> ConstPoolId2AddrMap; - - // JumpTableId2AddrMap - A map from inline jumptable ids to the - // corresponding inline jump table bases. - SmallVector<intptr_t, 16> JumpTableId2AddrMap; - - // PCLabelMap - A map from PC labels to addresses. - DenseMap<unsigned, intptr_t> PCLabelMap; - - // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol) - // addresses to their indirect symbol addresses. - DenseMap<void*, intptr_t> Sym2IndirectSymMap; - - // IsPIC - True if the relocation model is PIC. This is used to determine - // how to codegen function stubs. - bool IsPIC; - - public: - explicit ARMJITInfo() : IsPIC(false) { useGOT = false; } - - /// replaceMachineCodeForFunction - Make it so that calling the function - /// whose machine code is at OLD turns into a call to NEW, perhaps by - /// overwriting OLD with a branch to NEW. This is used for self-modifying - /// code. - /// - void replaceMachineCodeForFunction(void *Old, void *New) override; - - /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object - /// to emit an indirect symbol which contains the address of the specified - /// ptr. - void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, - JITCodeEmitter &JCE) override; - - // getStubLayout - Returns the size and alignment of the largest call stub - // on ARM. - StubLayout getStubLayout() override; - - /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a - /// small native function that simply calls the function at the specified - /// address. - void *emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE) override; - - /// getLazyResolverFunction - Expose the lazy resolver to the JIT. - LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; - - /// relocate - Before the JIT can run a block of code that has been emitted, - /// it must rewrite the code to contain the actual addresses of any - /// referenced global symbols. - void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase) override; - - /// hasCustomConstantPool - Allows a target to specify that constant - /// pool address resolution is handled by the target. - bool hasCustomConstantPool() const override { return true; } - - /// hasCustomJumpTables - Allows a target to specify that jumptables - /// are emitted by the target. - bool hasCustomJumpTables() const override { return true; } - - /// allocateSeparateGVMemory - If true, globals should be placed in - /// separately allocated heap memory rather than in the same - /// code memory allocated by JITCodeEmitter. - bool allocateSeparateGVMemory() const override { -#ifdef __APPLE__ - return true; -#else - return false; -#endif - } - - /// Initialize - Initialize internal stage for the function being JITted. - /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize - /// jump table ids to jump table bases map; remember if codegen relocation - /// model is PIC. - void Initialize(const MachineFunction &MF, bool isPIC); - - /// getConstantPoolEntryAddr - The ARM target puts all constant - /// pool entries into constant islands. This returns the address of the - /// constant pool entry of the specified index. - intptr_t getConstantPoolEntryAddr(unsigned CPI) const { - assert(CPI < ConstPoolId2AddrMap.size()); - return ConstPoolId2AddrMap[CPI]; - } - - /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address - /// where its associated value is stored. When relocations are processed, - /// this value will be used to resolve references to the constant. - void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) { - assert(CPI < ConstPoolId2AddrMap.size()); - ConstPoolId2AddrMap[CPI] = Addr; - } - - /// getJumpTableBaseAddr - The ARM target inline all jump tables within - /// text section of the function. This returns the address of the base of - /// the jump table of the specified index. - intptr_t getJumpTableBaseAddr(unsigned JTI) const { - assert(JTI < JumpTableId2AddrMap.size()); - return JumpTableId2AddrMap[JTI]; - } - - /// addJumpTableBaseAddr - Map a jump table index to the address where - /// the corresponding inline jump table is emitted. When relocations are - /// processed, this value will be used to resolve references to the - /// jump table. - void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) { - assert(JTI < JumpTableId2AddrMap.size()); - JumpTableId2AddrMap[JTI] = Addr; - } - - /// getPCLabelAddr - Retrieve the address of the PC label of the - /// specified id. - intptr_t getPCLabelAddr(unsigned Id) const { - DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id); - assert(I != PCLabelMap.end()); - return I->second; - } - - /// addPCLabelAddr - Remember the address of the specified PC label. - void addPCLabelAddr(unsigned Id, intptr_t Addr) { - PCLabelMap.insert(std::make_pair(Id, Addr)); - } - - /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the - /// specified symbol located at address. Returns 0 if the indirect symbol - /// has not been emitted. - intptr_t getIndirectSymAddr(void *Addr) const { - DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr); - if (I != Sym2IndirectSymMap.end()) - return I->second; - return 0; - } - - /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to - /// its indirect symbol address. - void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) { - Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr)); - } - - private: - /// resolveRelocDestAddr - Resolve the resulting address of the relocation - /// if it's not already solved. Constantpool entries must be resolved by - /// ARM target. - intptr_t resolveRelocDestAddr(MachineRelocation *MR) const; - }; -} - -#endif diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a03bcdb..c429ac1 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -144,6 +144,46 @@ namespace { char ARMLoadStoreOpt::ID = 0; } +static bool definesCPSR(const MachineInstr *MI) { + for (const auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) + // If the instruction has live CPSR def, then it's not safe to fold it + // into load / store. + return true; + } + + return false; +} + +static int getMemoryOpOffset(const MachineInstr *MI) { + int Opcode = MI->getOpcode(); + bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; + unsigned NumOperands = MI->getDesc().getNumOperands(); + unsigned OffField = MI->getOperand(NumOperands-3).getImm(); + + if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || + Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || + Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || + Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) + return OffField; + + // Thumb1 immediate offsets are scaled by 4 + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) + return OffField * 4; + + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) + : ARM_AM::getAM5Offset(OffField) * 4; + ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) + : ARM_AM::getAM5Op(OffField); + + if (Op == ARM_AM::sub) + return -Offset; + + return Offset; +} + static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Opcode) { default: llvm_unreachable("Unhandled opcode!"); @@ -335,40 +375,50 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, unsigned WordOffset, ARMCC::CondCodes Pred, unsigned PredReg) { assert(isThumb1 && "Can only update base register uses for Thumb1!"); - - // Start updating any instructions with immediate offsets. Insert a sub before + // Start updating any instructions with immediate offsets. Insert a SUB before // the first non-updateable instruction (if any). for (; MBBI != MBB.end(); ++MBBI) { + bool InsertSub = false; + unsigned Opc = MBBI->getOpcode(); + if (MBBI->readsRegister(Base)) { - unsigned Opc = MBBI->getOpcode(); int Offset; - bool InsertSub = false; + bool IsLoad = + Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; + bool IsStore = + Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; - if (Opc == ARM::tLDRi || Opc == ARM::tSTRi || - Opc == ARM::tLDRHi || Opc == ARM::tSTRHi || - Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) { + if (IsLoad || IsStore) { // Loads and stores with immediate offsets can be updated, but only if // the new offset isn't negative. // The MachineOperand containing the offset immediate is the last one // before predicates. MachineOperand &MO = MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); - // The offsets are scaled by 1, 2 or 4 depending on the Opcode + // The offsets are scaled by 1, 2 or 4 depending on the Opcode. Offset = MO.getImm() - WordOffset * getImmScale(Opc); - if (Offset >= 0) + + // If storing the base register, it needs to be reset first. + unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); + + if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) MO.setImm(Offset); else InsertSub = true; - } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) { - // SUB/ADD using this register. Merge it with the update. - // If the merged offset is too large, insert a new sub instead. + } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && + !definesCPSR(MBBI)) { + // SUBS/ADDS using this register, with a dead def of the CPSR. + // Merge it with the update; if the merged offset is too large, + // insert a new sub instead. MachineOperand &MO = MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); Offset = (Opc == ARM::tSUBi8) ? MO.getImm() + WordOffset * 4 : MO.getImm() - WordOffset * 4 ; - if (TL->isLegalAddImmediate(Offset)) { + if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { + // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if + // Offset == 0. MO.setImm(Offset); // The base register has now been reset, so exit early. return; @@ -381,13 +431,19 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, InsertSub = true; } - if (InsertSub) { - // An instruction above couldn't be updated, so insert a sub. - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base)) - .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) - .addImm(Pred).addReg(PredReg); - return; - } + } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { + // Since SUBS sets the condition flags, we can't place the base reset + // after an instruction that has a live CPSR def. + // The base register might also contain an argument for a function call. + InsertSub = true; + } + + if (InsertSub) { + // An instruction above couldn't be updated, so insert a sub. + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); + return; } if (MBBI->killsRegister(Base)) @@ -395,15 +451,18 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, return; } - // The end of the block was reached. This means register liveness escapes the - // block, and it's necessary to insert a sub before the last instruction. - if (MBB.succ_size() > 0) - // But only insert the SUB if there is actually a successor block. - // FIXME: Check more carefully if register is live at this point, e.g. by - // also examining the successor block's register liveness information. - AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base)) - .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + // End of block was reached. + if (MBB.succ_size() > 0) { + // FIXME: Because of a bug, live registers are sometimes missing from + // the successor blocks' live-in sets. This means we can't trust that + // information and *always* have to reset at the end of a block. + // See PR21029. + if (MBBI != MBB.end()) --MBBI; + AddDefaultT1CC( + BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) .addImm(Pred).addReg(PredReg); + } } /// MergeOps - Create and insert a LDM or STM with Base as base register and @@ -422,6 +481,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NumRegs <= 1) return false; + // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. + // Compute liveness information for that register to make the decision. + bool SafeToClobberCPSR = !isThumb1 || + (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == + MachineBasicBlock::LQR_Dead); + + bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. + + // Exception: If the base register is in the input reglist, Thumb1 LDM is + // non-writeback. + // It's also not possible to merge an STR of the base register in Thumb1. + if (isThumb1) + for (unsigned I = 0; I < NumRegs; ++I) + if (Base == Regs[I].first) { + if (Opcode == ARM::tLDRi) { + Writeback = false; + break; + } else if (Opcode == ARM::tSTRi) { + return false; + } + } + ARM_AM::AMSubMode Mode = ARM_AM::ia; // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); @@ -445,6 +526,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NumRegs <= 2) return false; + // On Thumb1, it's not worth materializing a new base register without + // clobbering the CPSR (i.e. not using ADDS/SUBS). + if (!SafeToClobberCPSR) + return false; + unsigned NewBase; if (isi32Load(Opcode)) { // If it is a load, then just use one of the destination register to @@ -459,13 +545,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, int BaseOpc = isThumb2 ? ARM::t2ADDri : + (isThumb1 && Offset < 8) ? ARM::tADDi3 : isThumb1 ? ARM::tADDi8 : ARM::ADDri; if (Offset < 0) { + Offset = - Offset; BaseOpc = isThumb2 ? ARM::t2SUBri : + (isThumb1 && Offset < 8) ? ARM::tSUBi3 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri; - Offset = - Offset; } if (!TL->isLegalAddImmediate(Offset)) @@ -473,22 +561,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; // Probably not worth it then. if (isThumb1) { - if (Base != NewBase) { + // Thumb1: depending on immediate size, use either + // ADDS NewBase, Base, #imm3 + // or + // MOV NewBase, Base + // ADDS NewBase, #imm8. + if (Base != NewBase && Offset >= 8) { // Need to insert a MOV to the new base first. - // FIXME: If the immediate fits in 3 bits, use ADD instead. BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) .addReg(Base, getKillRegState(BaseKill)) .addImm(Pred).addReg(PredReg); + // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. + Base = NewBase; + BaseKill = false; } - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) - .addReg(NewBase, getKillRegState(true)).addImm(Offset) + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg); } else { BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg).addReg(0); } - Base = NewBase; BaseKill = true; // New base is always killed straight away. } @@ -501,16 +595,16 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); if (!Opcode) return false; - bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. - - // Exception: If the base register is in the input reglist, Thumb1 LDM is - // non-writeback. Check for this. - if (Opcode == ARM::tLDMIA && isThumb1) - for (unsigned I = 0; I < NumRegs; ++I) - if (Base == Regs[I].first) { - Writeback = false; - break; - } + // Check if a Thumb1 LDM/STM merge is safe. This is the case if: + // - There is no writeback (LDM of base register), + // - the base register is killed by the merged instruction, + // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS + // to reset the base register. + // Otherwise, don't merge. + // It's safe to return here since the code to materialize a new base register + // above is also conditional on SafeToClobberCPSR. + if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) + return false; MachineInstrBuilder MIB; @@ -525,11 +619,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MIB.addReg(Base, getDefRegState(true)) .addReg(Base, getKillRegState(BaseKill)); - // The base isn't dead after a merged instruction with writeback. Update - // future uses of the base with the added offset (if possible), or reset - // the base register as necessary. + // The base isn't dead after a merged instruction with writeback. + // Insert a sub instruction after the newly formed instruction to reset. if (!BaseKill) UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); + } else { // No writeback, simply build the MachineInstr. MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); @@ -700,6 +794,11 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, memOps[i].MBBI = Merges.back(); memOps[i].Position = insertPos; } + + // Update memOps offsets, since they may have been modified by MergeOps. + for (auto &MemOp : memOps) { + MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); + } } /// MergeLDR_STR - Merge a number of load / store instructions into one or more @@ -721,7 +820,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); unsigned Count = 1; unsigned Limit = ~0U; - + bool BaseKill = false; // vldm / vstm limit are 32 for S variants, 16 for D variants. switch (Opcode) { @@ -760,36 +859,28 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, ++Count; } else { // Can't merge this in. Try merge the earlier ones first. - MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, - Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); + // We need to compute BaseKill here because the MemOps may have been + // reordered. + BaseKill = Loc->killsRegister(Base); + + MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, + BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, MemOps, Merges); return; } - if (MemOps[i].Position > MemOps[insertAfter].Position) + if (MemOps[i].Position > MemOps[insertAfter].Position) { insertAfter = i; + Loc = MemOps[i].MBBI; + } } - bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; + BaseKill = Loc->killsRegister(Base); MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); } -static bool definesCPSR(MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) - // If the instruction has live CPSR def, then it's not safe to fold it - // into load / store. - return true; - } - - return false; -} - static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, unsigned Bytes, unsigned Limit, ARMCC::CondCodes Pred, unsigned PredReg) { @@ -1327,34 +1418,6 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { RS->forward(std::prev(Loc)); } -static int getMemoryOpOffset(const MachineInstr *MI) { - int Opcode = MI->getOpcode(); - bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; - unsigned NumOperands = MI->getDesc().getNumOperands(); - unsigned OffField = MI->getOperand(NumOperands-3).getImm(); - - if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || - Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || - Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || - Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) - return OffField; - - // Thumb1 immediate offsets are scaled by 4 - if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) - return OffField * 4; - - int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) - : ARM_AM::getAM5Offset(OffField) * 4; - if (isAM3) { - if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } else { - if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } - return Offset; -} - static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, @@ -1725,21 +1788,15 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); - TL = TM.getTargetLowering(); + TL = TM.getSubtargetImpl()->getTargetLowering(); AFI = Fn.getInfo<ARMFunctionInfo>(); - TII = TM.getInstrInfo(); - TRI = TM.getRegisterInfo(); + TII = TM.getSubtargetImpl()->getInstrInfo(); + TRI = TM.getSubtargetImpl()->getRegisterInfo(); STI = &TM.getSubtarget<ARMSubtarget>(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; - // FIXME: Temporarily disabling for Thumb-1 due to miscompiles - if (isThumb1) { - delete RS; - return false; - } - bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { @@ -1793,10 +1850,10 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TD = Fn.getTarget().getDataLayout(); - TII = Fn.getTarget().getInstrInfo(); - TRI = Fn.getTarget().getRegisterInfo(); - STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); + TD = Fn.getSubtarget().getDataLayout(); + TII = Fn.getSubtarget().getInstrInfo(); + TRI = Fn.getSubtarget().getRegisterInfo(); + STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); MRI = &Fn.getRegInfo(); MF = &Fn; @@ -1811,7 +1868,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, - SmallPtrSet<MachineInstr*, 4> &MemOps, + SmallPtrSetImpl<MachineInstr*> &MemOps, SmallSet<unsigned, 4> &MemRegs, const TargetRegisterInfo *TRI) { // Are there stores / loads / calls between them? diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 44a9e34..4e67fa1 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMMACHINEFUNCTIONINFO_H -#define ARMMACHINEFUNCTIONINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H #include "ARMSubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" namespace llvm { @@ -47,6 +48,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// unsigned ArgRegsSaveSize; + /// ReturnRegsCount - Number of registers used up in the return. + unsigned ReturnRegsCount; + /// HasStackFrame - True if this function has a stack frame. Set by /// processFunctionBeforeCalleeSavedScan(). bool HasStackFrame; @@ -82,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// areas. unsigned GPRCS1Size; unsigned GPRCS2Size; + unsigned DPRCSAlignGapSize; unsigned DPRCSSize; /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in @@ -118,14 +123,19 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// being passed on the stack unsigned ArgumentStackSize; + /// CoalescedWeights - mapping of basic blocks to the rolling counter of + /// coalesced weights. + DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights; + public: ARMFunctionInfo() : isThumb(false), hasThumb2(false), - ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), + ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false), + RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), - GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0), NumAlignedDPRCS2Regs(0), JumpTableUId(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} @@ -146,6 +156,9 @@ public: } void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; } + unsigned getReturnRegsCount() const { return ReturnRegsCount; } + void setReturnRegsCount(unsigned s) { ReturnRegsCount = s; } + bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } @@ -171,10 +184,12 @@ public: unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } + unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; } unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } + void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; } void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; } unsigned getArgumentStackSize() const { return ArgumentStackSize; } @@ -221,7 +236,16 @@ public: else return -1U; } + + DenseMap<const MachineBasicBlock*, unsigned>::iterator getCoalescedWeight( + MachineBasicBlock* MBB) { + auto It = CoalescedWeights.find(MBB); + if (It == CoalescedWeights.end()) { + It = CoalescedWeights.insert(std::make_pair(MBB, 0)).first; + } + return It; + } }; } // End llvm namespace -#endif // ARMMACHINEFUNCTIONINFO_H +#endif diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h index efa22fb..3ff0bee 100644 --- a/lib/Target/ARM/ARMPerfectShuffle.h +++ b/lib/Target/ARM/ARMPerfectShuffle.h @@ -12,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H +#define LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H + // 31 entries have cost 0 // 242 entries have cost 1 // 1447 entries have cost 2 @@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = { 835584U, // <u,u,u,u>: Cost 0 copy LHS 0 }; + +#endif diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 3e6af3f..b623173 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMREGISTERINFO_H -#define ARMREGISTERINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H +#define LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H #include "ARMBaseRegisterInfo.h" diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h deleted file mode 100644 index 21877fd..0000000 --- a/lib/Target/ARM/ARMRelocations.h +++ /dev/null @@ -1,62 +0,0 @@ -//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the ARM target-specific relocation types. -// -//===----------------------------------------------------------------------===// - -#ifndef ARMRELOCATIONS_H -#define ARMRELOCATIONS_H - -#include "llvm/CodeGen/MachineRelocation.h" - -namespace llvm { - namespace ARM { - enum RelocationType { - // reloc_arm_absolute - Absolute relocation, just add the relocated value - // to the value already in memory. - reloc_arm_absolute, - - // reloc_arm_relative - PC relative relocation, add the relocated value to - // the value already in memory, after we adjust it for where the PC is. - reloc_arm_relative, - - // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose - // addresses are kept locally in a map. - reloc_arm_cp_entry, - - // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset - // should be divided by 4. - reloc_arm_vfp_cp_entry, - - // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool - // entry. - reloc_arm_machine_cp_entry, - - // reloc_arm_jt_base - PC relative relocation for jump tables whose - // addresses are kept locally in a map. - reloc_arm_jt_base, - - // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base. - reloc_arm_pic_jt, - - // reloc_arm_branch - Branch address relocation. - reloc_arm_branch, - - // reloc_arm_movt - MOVT immediate relocation. - reloc_arm_movt, - - // reloc_arm_movw - MOVW immediate relocation. - reloc_arm_movw - }; - } -} - -#endif - diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 3dcc0df..fa30ac3 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -157,7 +157,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, return SDValue(); const ARMTargetLowering &TLI = - *static_cast<const ARMTargetLowering*>(DAG.getTarget().getTargetLowering()); + *DAG.getTarget().getSubtarget<ARMSubtarget>().getTargetLowering(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index 13769dc..94b98e6 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMSELECTIONDAGINFO_H -#define ARMSELECTIONDAGINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Target/TargetSelectionDAGInfo.h" diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 0eb24ef..600f39d 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -15,9 +15,9 @@ #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMInstrInfo.h" -#include "ARMJITInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" +#include "ARMMachineFunctionInfo.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" @@ -27,6 +27,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -47,11 +49,13 @@ static cl::opt<bool> UseFusedMulOps("arm-use-mulops", cl::init(true), cl::Hidden); +namespace { enum AlignMode { DefaultAlign, StrictAlign, NoStrictAlign }; +} static cl::opt<AlignMode> Align(cl::desc("Load/store alignment support"), @@ -98,11 +102,6 @@ static std::string computeDataLayout(ARMSubtarget &ST) { // Pointers are 32 bits and aligned to 32 bits. Ret += "-p:32:32"; - // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to - // align to 32. - if (ST.isThumb()) - Ret += "-i1:8:32-i8:8:32-i16:16:32"; - // ABIs other than APCS have 64 bit integers with natural alignment. if (!ST.isAPCS_ABI()) Ret += "-i64:64"; @@ -119,10 +118,9 @@ static std::string computeDataLayout(ARMSubtarget &ST) { else Ret += "-v128:64:128"; - // On thumb and APCS, only try to align aggregates to 32 bits (the default is - // 64 bits). - if (ST.isThumb() || ST.isAPCS_ABI()) - Ret += "-a:0:32"; + // Try to align aggregates to 32 bits (the default is 64 bits, which has no + // particular hardware support on 32-bit ARM). + Ret += "-a:0:32"; // Integer registers are 32 bits. Ret += "-n32"; @@ -144,18 +142,18 @@ static std::string computeDataLayout(ARMSubtarget &ST) { ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { initializeEnvironment(); - resetSubtargetFeatures(CPU, FS); + initSubtargetFeatures(CPU, FS); return *this; } ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, TargetMachine &TM, - bool IsLittle, const TargetOptions &Options) + const std::string &FS, const TargetMachine &TM, + bool IsLittle) : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), - TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN), + TargetTriple(TT), Options(TM.Options), TargetABI(ARM_ABI_UNKNOWN), DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))), - TSInfo(DL), JITInfo(), + TSInfo(DL), InstrInfo(isThumb1Only() ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) : !isThumb() @@ -188,7 +186,6 @@ void ARMSubtarget::initializeEnvironment() { InThumbMode = false; HasThumb2 = false; NoARM = false; - PostRAScheduler = false; IsR9Reserved = ReserveR9; UseMovt = false; SupportsTailCall = false; @@ -217,23 +214,7 @@ void ARMSubtarget::initializeEnvironment() { UseLong64 = false; } -void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { - AttributeSet FnAttrs = MF->getFunction()->getAttributes(); - Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, - "target-cpu"); - Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, - "target-features"); - std::string CPU = - !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; - std::string FS = - !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) { - initializeEnvironment(); - resetSubtargetFeatures(CPU, FS); - } -} - -void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { +void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (CPUString.empty()) { if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s")) // Default to the Swift CPU when targeting armv7s/thumbv7s. @@ -275,9 +256,8 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { TargetABI = ARM_ABI_AAPCS; break; default: - if ((isTargetIOS() && isMClass()) || - (TargetTriple.isOSBinFormatMachO() && - TargetTriple.getOS() == Triple::UnknownOS)) + if (TargetTriple.isOSBinFormatMachO() && + TargetTriple.getOS() == Triple::UnknownOS) TargetABI = ARM_ABI_AAPCS; else TargetABI = ARM_ABI_APCS; @@ -299,49 +279,39 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { UseMovt = hasV6T2Ops() && ArmUseMOVT; if (isTargetMachO()) { - IsR9Reserved = ReserveR9 | !HasV6Ops; + IsR9Reserved = ReserveR9 || !HasV6Ops; SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0); } else { IsR9Reserved = ReserveR9; SupportsTailCall = !isThumb1Only(); } - if (!isThumb() || hasThumb2()) - PostRAScheduler = true; - - switch (Align) { - case DefaultAlign: - // Assume pre-ARMv6 doesn't support unaligned accesses. - // - // ARMv6 may or may not support unaligned accesses depending on the - // SCTLR.U bit, which is architecture-specific. We assume ARMv6 - // Darwin and NetBSD targets support unaligned accesses, and others don't. - // - // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit - // which raises an alignment fault on unaligned accesses. Linux - // defaults this bit to 0 and handles it as a system-wide (not - // per-process) setting. It is therefore safe to assume that ARMv7+ - // Linux targets support unaligned accesses. The same goes for NaCl. - // - // The above behavior is consistent with GCC. - AllowsUnalignedMem = - (hasV7Ops() && (isTargetLinux() || isTargetNaCl() || - isTargetNetBSD())) || - (hasV6Ops() && (isTargetMachO() || isTargetNetBSD())); - // The one exception is cortex-m0, which despite being v6, does not - // support unaligned accesses. Rather than make the above boolean - // expression even more obtuse, just override the value here. - if (isThumb1Only() && isMClass()) - AllowsUnalignedMem = false; - break; - case StrictAlign: - AllowsUnalignedMem = false; - break; - case NoStrictAlign: - AllowsUnalignedMem = true; - break; + if (Align == DefaultAlign) { + // Assume pre-ARMv6 doesn't support unaligned accesses. + // + // ARMv6 may or may not support unaligned accesses depending on the + // SCTLR.U bit, which is architecture-specific. We assume ARMv6 + // Darwin and NetBSD targets support unaligned accesses, and others don't. + // + // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit + // which raises an alignment fault on unaligned accesses. Linux + // defaults this bit to 0 and handles it as a system-wide (not + // per-process) setting. It is therefore safe to assume that ARMv7+ + // Linux targets support unaligned accesses. The same goes for NaCl. + // + // The above behavior is consistent with GCC. + AllowsUnalignedMem = + (hasV7Ops() && (isTargetLinux() || isTargetNaCl() || + isTargetNetBSD())) || + (hasV6Ops() && (isTargetMachO() || isTargetNetBSD())); + } else { + AllowsUnalignedMem = !(Align == StrictAlign); } + // No v6M core supports unaligned memory access (v6M ARM ARM A3.2) + if (isV6M()) + AllowsUnalignedMem = false; + switch (IT) { case DefaultIT: RestrictIT = hasV8Ops() ? true : false; @@ -368,11 +338,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, if (RelocM == Reloc::Static) return false; - // Materializable GVs (in JIT lazy compilation mode) do not require an extra - // load from stub. - bool isDecl = GV->hasAvailableExternallyLinkage(); - if (GV->isDeclaration() && !GV->isMaterializable()) - isDecl = true; + bool isDecl = GV->isDeclarationForLinker(); if (!isTargetMachO()) { // Extra load is needed for all externally visible. @@ -415,33 +381,22 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, } unsigned ARMSubtarget::getMispredictionPenalty() const { - return SchedModel->MispredictPenalty; + return SchedModel.MispredictPenalty; } bool ARMSubtarget::hasSinCos() const { - return getTargetTriple().getOS() == Triple::IOS && - !getTargetTriple().isOSVersionLT(7, 0); + return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0); } -// Enable the PostMachineScheduler if the target selects it instead of -// PostRAScheduler. Currently only available on the command line via -// -misched-postra. +// This overrides the PostRAScheduler bit in the SchedModel for any CPU. bool ARMSubtarget::enablePostMachineScheduler() const { - return PostRAScheduler; + return (!isThumb() || hasThumb2()); } -bool ARMSubtarget::enableAtomicExpandLoadLinked() const { +bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier() && !isThumb1Only(); } -bool ARMSubtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_NONE; - return PostRAScheduler && OptLevel >= CodeGenOpt::Default; -} - bool ARMSubtarget::useMovt(const MachineFunction &MF) const { // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit // immediates as it is inherently position independent, and may be out of diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8f6c165..d5ee009 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -11,20 +11,18 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMSUBTARGET_H -#define ARMSUBTARGET_H +#ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H +#define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMInstrInfo.h" -#include "ARMJITInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" -#include "ARMJITInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" @@ -44,7 +42,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, - CortexR5, Swift, CortexA53, CortexA57, Krait + CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait, }; enum ARMProcClassEnum { None, AClass, RClass, MClass @@ -105,9 +103,6 @@ protected: /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM; - /// PostRAScheduler - True if using post-register-allocation scheduler. - bool PostRAScheduler; - /// IsR9Reserved - True if R9 is a not available as general purpose register. bool IsR9Reserved; @@ -191,7 +186,7 @@ protected: /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory /// accesses for some types. For details, see - /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). + /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). bool AllowsUnalignedMem; /// RestrictIT - If true, the subtarget disallows generation of deprecated IT @@ -225,7 +220,7 @@ protected: Triple TargetTriple; /// SchedModel - Processor specific instruction costs. - const MCSchedModel *SchedModel; + MCSchedModel SchedModel; /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; @@ -244,8 +239,7 @@ protected: /// of the specified triple. /// ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, TargetMachine &TM, bool IsLittle, - const TargetOptions &Options); + const std::string &FS, const TargetMachine &TM, bool IsLittle); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -256,27 +250,30 @@ protected: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - /// \brief Reset the features for the ARM target. - void resetSubtargetFeatures(const MachineFunction *MF) override; - /// initializeSubtargetDependencies - Initializes using a CPU and feature string /// so that we can use initializer lists for subtarget initialization. ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); - const DataLayout *getDataLayout() const { return &DL; } - const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } - ARMJITInfo *getJITInfo() { return &JITInfo; } - const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); } - const ARMTargetLowering *getTargetLowering() const { return &TLInfo; } - const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } - const ARMBaseRegisterInfo *getRegisterInfo() const { + const DataLayout *getDataLayout() const override { return &DL; } + const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + const ARMBaseInstrInfo *getInstrInfo() const override { + return InstrInfo.get(); + } + const ARMTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const ARMFrameLowering *getFrameLowering() const override { + return FrameLowering.get(); + } + const ARMBaseRegisterInfo *getRegisterInfo() const override { return &InstrInfo->getRegisterInfo(); } private: const DataLayout DL; ARMSelectionDAGInfo TSInfo; - ARMJITInfo JITInfo; // Either Thumb1InstrInfo or Thumb2InstrInfo. std::unique_ptr<ARMBaseInstrInfo> InstrInfo; ARMTargetLowering TLInfo; @@ -284,7 +281,7 @@ private: std::unique_ptr<ARMFrameLowering> FrameLowering; void initializeEnvironment(); - void resetSubtargetFeatures(StringRef CPU, StringRef FS); + void initSubtargetFeatures(StringRef CPU, StringRef FS); public: void computeIssueWidth(); @@ -411,6 +408,10 @@ public: bool isRClass() const { return ARMProcClass == RClass; } bool isAClass() const { return ARMProcClass == AClass; } + bool isV6M() const { + return isThumb1Only() && isMClass(); + } + bool isR9Reserved() const { return IsR9Reserved; } bool useMovt(const MachineFunction &MF) const; @@ -432,19 +433,16 @@ public: bool hasSinCos() const; /// True for some subtargets at > -O0. - bool enablePostMachineScheduler() const; - - /// enablePostRAScheduler - True at 'More' optimization. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const override; + bool enablePostMachineScheduler() const override; - // enableAtomicExpandLoadLinked - True if we need to expand our atomics. - bool enableAtomicExpandLoadLinked() const override; + // enableAtomicExpand- True if we need to expand our atomics. + bool enableAtomicExpand() const override; - /// getInstrItins - Return the instruction itineraies based on subtarget + /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. - const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every @@ -454,6 +452,7 @@ public: /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect /// symbol. bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; + }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index d85194b..88d6c5e 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -13,7 +13,9 @@ #include "ARM.h" #include "ARMTargetMachine.h" #include "ARMFrameLowering.h" +#include "ARMTargetObjectFile.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" @@ -42,6 +44,13 @@ extern "C" void LLVMInitializeARMTarget() { RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget); } +static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) + return make_unique<TargetLoweringObjectFileMachO>(); + if (TT.isOSWindows()) + return make_unique<TargetLoweringObjectFileCOFF>(); + return make_unique<ARMElfTargetObjectFile>(); +} /// TargetMachine ctor - Create an ARM architecture model. /// @@ -51,7 +60,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, *this, isLittle, Options) { + TLOF(createTLOF(Triple(getTargetTriple()))), + Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) { // Default to triple-appropriate float ABI if (Options.FloatABIType == FloatABI::Default) @@ -59,6 +69,46 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft; } +ARMBaseTargetMachine::~ARMBaseTargetMachine() {} + +const ARMSubtarget * +ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { + AttributeSet FnAttrs = F.getAttributes(); + Attribute CPUAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu"); + Attribute FSAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); + + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + // FIXME: This is related to the code below to reset the target options, + // we need to know whether or not the soft float flag is set on the + // function before we can generate a subtarget. We also need to use + // it as a key for the subtarget since that can be the only difference + // between two functions. + Attribute SFAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float"); + bool SoftFloat = !SFAttr.hasAttribute(Attribute::None) + ? SFAttr.getValueAsString() == "true" + : Options.UseSoftFloat; + + auto &I = SubtargetMap[CPU + FS + (SoftFloat ? "use-soft-float=true" + : "use-soft-float=false")]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle); + } + return I.get(); +} + void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { // Add first the target-independent BasicTTI pass, then our ARM pass. This // allows the ARM pass to delegate to the target independent layer when @@ -158,7 +208,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { } void ARMPassConfig::addIRPasses() { - addPass(createAtomicExpandLoadLinkedPass(TM)); + if (TM->Options.ThreadModel == ThreadModel::Single) + addPass(createLowerAtomicPass()); + else + addPass(createAtomicExpandPass(TM)); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in @@ -244,10 +297,3 @@ bool ARMPassConfig::addPreEmitPass() { return true; } - -bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE) { - // Machine code emitter pass for ARM. - PM.add(createARMJITCodeEmitterPass(*this, JCE)); - return false; -} diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index b72b1df..fba0ec2 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMTARGETMACHINE_H -#define ARMTARGETMACHINE_H +#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H +#define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H #include "ARMInstrInfo.h" #include "ARMSubtarget.h" @@ -23,7 +23,11 @@ namespace llvm { class ARMBaseTargetMachine : public LLVMTargetMachine { protected: + std::unique_ptr<TargetLoweringObjectFile> TLOF; ARMSubtarget Subtarget; + bool isLittle; + mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap; + public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -31,30 +35,10 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); + ~ARMBaseTargetMachine() override; const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; } - const ARMBaseRegisterInfo *getRegisterInfo() const override { - return getSubtargetImpl()->getRegisterInfo(); - } - const ARMTargetLowering *getTargetLowering() const override { - return getSubtargetImpl()->getTargetLowering(); - } - const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { - return getSubtargetImpl()->getSelectionDAGInfo(); - } - const ARMBaseInstrInfo *getInstrInfo() const override { - return getSubtargetImpl()->getInstrInfo(); - } - const ARMFrameLowering *getFrameLowering() const override { - return getSubtargetImpl()->getFrameLowering(); - } - const InstrItineraryData *getInstrItineraryData() const override { - return &getSubtargetImpl()->getInstrItineraryData(); - } - const DataLayout *getDataLayout() const override { - return getSubtargetImpl()->getDataLayout(); - } - ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); } + const ARMSubtarget *getSubtargetImpl(const Function &F) const override; /// \brief Register ARM analysis passes with a pass manager. void addAnalysisPasses(PassManagerBase &PM) override; @@ -62,7 +46,9 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE) override; + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } }; /// ARMTargetMachine - ARM target machine. diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index c926421..98e8763 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H -#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H +#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index a2ace62..ec834e8 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -49,7 +49,7 @@ public: ARMTTI(const ARMBaseTargetMachine *TM) : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), - TLI(TM->getTargetLowering()) { + TLI(TM->getSubtargetImpl()->getTargetLowering()) { initializeARMTTIPass(*PassRegistry::getPassRegistry()); } @@ -104,7 +104,7 @@ public: return 32; } - unsigned getMaximumUnrollFactor() const override { + unsigned getMaxInterleaveFactor() const override { // These are out of order CPUs: if (ST->isCortexA15() || ST->isSwift()) return 2; @@ -126,10 +126,11 @@ public: unsigned getAddressComputationCost(Type *Val, bool IsComplex) const override; - unsigned - getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Op1Info = OK_AnyValue, - OperandValueKind Op2Info = OK_AnyValue) const override; + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue, + OperandValueKind Op2Info = OK_AnyValue, + OperandValueProperties Opd1PropInfo = OP_None, + OperandValueProperties Opd2PropInfo = OP_None) const override; unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const override; @@ -389,6 +390,13 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, ValTy->getScalarSizeInBits() <= 32) return 3; + // Cross-class copies are expensive on many microarchitectures, + // so assume they are expensive by default. + if ((Opcode == Instruction::InsertElement || + Opcode == Instruction::ExtractElement) && + ValTy->getVectorElementType()->isIntegerTy()) + return 3; + return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); } @@ -497,9 +505,10 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); } -unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Op1Info, - OperandValueKind Op2Info) const { +unsigned ARMTTI::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Op1Info, + OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo) const { int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); @@ -555,8 +564,8 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, if (Idx != -1) return LT.first * CostTbl[Idx].Cost; - unsigned Cost = - TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); + unsigned Cost = TargetTransformInfo::getArithmeticInstrCost( + Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo); // This is somewhat of a hack. The problem that we are facing is that SROA // creates a sequence of shift, and, or instructions to construct values. diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk index 095955b..55a5775 100644 --- a/lib/Target/ARM/Android.mk +++ b/lib/Target/ARM/Android.mk @@ -19,7 +19,6 @@ arm_codegen_SRC_FILES := \ ARMAsmPrinter.cpp \ ARMBaseInstrInfo.cpp \ ARMBaseRegisterInfo.cpp \ - ARMCodeEmitter.cpp \ ARMConstantIslandPass.cpp \ ARMConstantPoolValue.cpp \ ARMExpandPseudoInsts.cpp \ @@ -29,7 +28,6 @@ arm_codegen_SRC_FILES := \ ARMISelDAGToDAG.cpp \ ARMISelLowering.cpp \ ARMInstrInfo.cpp \ - ARMJITInfo.cpp \ ARMLoadStoreOptimizer.cpp \ ARMMCInstLower.cpp \ ARMMachineFunctionInfo.cpp \ diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index b62706c..9cc89bd 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -129,12 +129,13 @@ public: class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; - MCAsmParser &Parser; const MCInstrInfo &MII; const MCRegisterInfo *MRI; UnwindContext UC; ARMTargetStreamer &getTargetStreamer() { + assert(getParser().getStreamer().getTargetStreamer() && + "do not have a target streamer"); MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast<ARMTargetStreamer &>(TS); } @@ -173,20 +174,16 @@ class ARMAsmParser : public MCTargetAsmParser { ITState.CurPosition = ~0U; // Done with the IT block after this. } - - MCAsmParser &getParser() const { return Parser; } - MCAsmLexer &getLexer() const { return Parser.getLexer(); } - void Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) { - return Parser.Note(L, Msg, Ranges); + return getParser().Note(L, Msg, Ranges); } bool Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) { - return Parser.Warning(L, Msg, Ranges); + return getParser().Warning(L, Msg, Ranges); } bool Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) { - return Parser.Error(L, Msg, Ranges); + return getParser().Error(L, Msg, Ranges); } int tryParseRegister(); @@ -265,9 +262,15 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasARM() const { return !(STI.getFeatureBits() & ARM::FeatureNoARM); } + bool hasThumb2DSP() const { + return STI.getFeatureBits() & ARM::FeatureDSPThumb2; + } + bool hasD16() const { + return STI.getFeatureBits() & ARM::FeatureD16; + } void SwitchMode() { - unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); + uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); setAvailableFeatures(FB); } bool isMClass() const { @@ -290,6 +293,7 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &); OperandMatchResultTy parseProcIFlagsOperand(OperandVector &); OperandMatchResultTy parseMSRMaskOperand(OperandVector &); + OperandMatchResultTy parseBankedRegOperand(OperandVector &); OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low, int High); OperandMatchResultTy parsePKHLSLImm(OperandVector &O) { @@ -329,10 +333,9 @@ public: }; - ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), UC(_Parser) { + ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) { MCAsmParserExtension::Initialize(_Parser); // Cache the MCRegisterInfo. @@ -359,7 +362,7 @@ public: bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, - unsigned &ErrorInfo, + uint64_t &ErrorInfo, bool MatchingInlineAsm) override; void onLabelParsed(MCSymbol *Symbol) override; }; @@ -383,6 +386,7 @@ class ARMOperand : public MCParsedAsmOperand { k_Memory, k_PostIndexRegister, k_MSRMask, + k_BankedReg, k_ProcIFlags, k_VectorIndex, k_Register, @@ -435,6 +439,10 @@ class ARMOperand : public MCParsedAsmOperand { unsigned Val; }; + struct BankedRegOp { + unsigned Val; + }; + struct TokOp { const char *Data; unsigned Length; @@ -517,6 +525,7 @@ class ARMOperand : public MCParsedAsmOperand { struct ITMaskOp ITMask; struct IFlagsOp IFlags; struct MMaskOp MMask; + struct BankedRegOp BankedReg; struct TokOp Tok; struct RegOp Reg; struct VectorListOp VectorList; @@ -585,6 +594,9 @@ public: case k_MSRMask: MMask = o.MMask; break; + case k_BankedReg: + BankedReg = o.BankedReg; + break; case k_ProcIFlags: IFlags = o.IFlags; break; @@ -679,6 +691,11 @@ public: return MMask.Val; } + unsigned getBankedReg() const { + assert(Kind == k_BankedReg && "Invalid access!"); + return BankedReg.Val; + } + bool isCoprocNum() const { return Kind == k_CoprocNum; } bool isCoprocReg() const { return Kind == k_CoprocReg; } bool isCoprocOption() const { return Kind == k_CoprocOption; } @@ -1384,6 +1401,7 @@ public: } bool isMSRMask() const { return Kind == k_MSRMask; } + bool isBankedReg() const { return Kind == k_BankedReg; } bool isProcIFlags() const { return Kind == k_ProcIFlags; } // NEON operands. @@ -1601,9 +1619,18 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; - int64_t Value = CE->getValue(); - // i16 value in the range [0,255] or [0x0100, 0xff00] - return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00); + unsigned Value = CE->getValue(); + return ARM_AM::isNEONi16splat(Value); + } + + bool isNEONi16splatNot() const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + unsigned Value = CE->getValue(); + return ARM_AM::isNEONi16splat(~Value & 0xffff); } bool isNEONi32splat() const { @@ -1614,12 +1641,18 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; - int64_t Value = CE->getValue(); - // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X. - return (Value >= 0 && Value < 256) || - (Value >= 0x0100 && Value <= 0xff00) || - (Value >= 0x010000 && Value <= 0xff0000) || - (Value >= 0x01000000 && Value <= 0xff000000); + unsigned Value = CE->getValue(); + return ARM_AM::isNEONi32splat(Value); + } + + bool isNEONi32splatNot() const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + unsigned Value = CE->getValue(); + return ARM_AM::isNEONi32splat(~Value); } bool isNEONByteReplicate(unsigned NumBytes) const { @@ -1655,6 +1688,7 @@ public: int64_t Value = CE->getValue(); // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. + // FIXME: This is probably wrong and a copy and paste from previous example return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00) || (Value >= 0x010000 && Value <= 0xff0000) || @@ -1670,6 +1704,7 @@ public: int64_t Value = ~CE->getValue(); // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. + // FIXME: This is probably wrong and a copy and paste from previous example return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00) || (Value >= 0x010000 && Value <= 0xff0000) || @@ -2334,6 +2369,11 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask()))); } + void addBankedRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getBankedReg()))); + } + void addProcIFlagsOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags()))); @@ -2378,10 +2418,16 @@ public: // The immediate encodes the type of constant as well as the value. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); unsigned Value = CE->getValue(); - if (Value >= 256) - Value = (Value >> 8) | 0xa00; - else - Value |= 0x800; + Value = ARM_AM::encodeNEONi16splat(Value); + Inst.addOperand(MCOperand::CreateImm(Value)); + } + + void addNEONi16splatNotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + Value = ARM_AM::encodeNEONi16splat(~Value & 0xffff); Inst.addOperand(MCOperand::CreateImm(Value)); } @@ -2390,12 +2436,16 @@ public: // The immediate encodes the type of constant as well as the value. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); unsigned Value = CE->getValue(); - if (Value >= 256 && Value <= 0xff00) - Value = (Value >> 8) | 0x200; - else if (Value > 0xffff && Value <= 0xff0000) - Value = (Value >> 16) | 0x400; - else if (Value > 0xffffff) - Value = (Value >> 24) | 0x600; + Value = ARM_AM::encodeNEONi32splat(Value); + Inst.addOperand(MCOperand::CreateImm(Value)); + } + + void addNEONi32splatNotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + Value = ARM_AM::encodeNEONi32splat(~Value); Inst.addOperand(MCOperand::CreateImm(Value)); } @@ -2736,6 +2786,14 @@ public: Op->EndLoc = S; return Op; } + + static std::unique_ptr<ARMOperand> CreateBankedReg(unsigned Reg, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_BankedReg); + Op->BankedReg.Val = Reg; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } }; } // end anonymous namespace. @@ -2769,6 +2827,9 @@ void ARMOperand::print(raw_ostream &OS) const { case k_MSRMask: OS << "<mask: " << getMSRMask() << ">"; break; + case k_BankedReg: + OS << "<banked reg: " << getBankedReg() << ">"; + break; case k_Immediate: getImm()->print(OS); break; @@ -2871,8 +2932,9 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { - StartLoc = Parser.getTok().getLoc(); - EndLoc = Parser.getTok().getEndLoc(); + const AsmToken &Tok = getParser().getTok(); + StartLoc = Tok.getLoc(); + EndLoc = Tok.getEndLoc(); RegNo = tryParseRegister(); return (RegNo == (unsigned)-1); @@ -2883,6 +2945,7 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo, /// returned. Otherwise return -1. /// int ARMAsmParser::tryParseRegister() { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return -1; @@ -2924,6 +2987,10 @@ int ARMAsmParser::tryParseRegister() { return Entry->getValue(); } + // Some FPUs only have 16 D registers, so D16-D31 are invalid + if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31) + return -1; + Parser.Lex(); // Eat identifier token. return RegNum; @@ -2935,6 +3002,7 @@ int ARMAsmParser::tryParseRegister() { // consumed in the process of trying to parse the shifter (i.e., when it is // indeed a shifter operand, but malformed). int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -3037,6 +3105,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { /// TODO this is likely to change to allow different register types and or to /// parse for a specific register type. bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); const AsmToken &RegTok = Parser.getTok(); int RegNo = tryParseRegister(); if (RegNo == -1) @@ -3118,9 +3187,10 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { return -1; switch (Name[1]) { default: return -1; - // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON) - case '0': return CoprocOp == 'p'? -1: 10; - case '1': return CoprocOp == 'p'? -1: 11; + // CP10 and CP11 are VFP/NEON and so vector instructions should be used. + // However, old cores (v5/v6) did use them in that way. + case '0': return 10; + case '1': return 11; case '2': return 12; case '3': return 13; case '4': return 14; @@ -3132,6 +3202,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { /// parseITCondCode - Try to parse a condition code for an IT instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseITCondCode(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -3169,6 +3240,7 @@ ARMAsmParser::parseITCondCode(OperandVector &Operands) { /// number, the token is eaten and the operand is added to the operand list. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -3177,6 +3249,9 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) { int Num = MatchCoprocessorOperandName(Tok.getString(), 'p'); if (Num == -1) return MatchOperand_NoMatch; + // ARMv7 and v8 don't allow cp10/cp11 due to VFP/NEON specific instructions + if ((hasV7Ops() || hasV8Ops()) && (Num == 10 || Num == 11)) + return MatchOperand_NoMatch; Parser.Lex(); // Eat identifier token. Operands.push_back(ARMOperand::CreateCoprocNum(Num, S)); @@ -3188,6 +3263,7 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) { /// number, the token is eaten and the operand is added to the operand list. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -3206,6 +3282,7 @@ ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) { /// coproc_option : '{' imm0_255 '}' ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); // If this isn't a '{', this isn't a coprocessor immediate operand. @@ -3283,6 +3360,7 @@ static unsigned getDRegFromQReg(unsigned QReg) { /// Parse a register list. bool ARMAsmParser::parseRegisterList(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Curly Brace"); SMLoc S = Parser.getTok().getLoc(); @@ -3414,6 +3492,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) { // Helper function to parse the lane index for vector lists. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { + MCAsmParser &Parser = getParser(); Index = 0; // Always return a defined index value. if (Parser.getTok().is(AsmToken::LBrac)) { Parser.Lex(); // Eat the '['. @@ -3465,6 +3544,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { // parse a vector register list ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseVectorList(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); VectorLaneTy LaneKind; unsigned LaneIndex; SMLoc S = Parser.getTok().getLoc(); @@ -3716,6 +3796,7 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) { /// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); unsigned Opt; @@ -3787,6 +3868,7 @@ ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) { /// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); unsigned Opt; @@ -3838,6 +3920,7 @@ ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) { /// parseProcIFlagsOperand - Try to parse iflags from CPS instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -3872,6 +3955,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) { /// parseMSRMaskOperand - Try to parse mask flags from MSR instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -3892,9 +3976,6 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { // should really only be allowed when writing a special register. Note // they get dropped in the MRS instruction reading a special register as // the SYSm field is only 8 bits. - // - // FIXME: the _g and _nzcvqg versions are only allowed if the processor - // includes the DSP extension but that is not checked. .Case("apsr", 0x800) .Case("apsr_nzcvq", 0x800) .Case("apsr_g", 0x400) @@ -3926,6 +4007,11 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { if (FlagsVal == ~0U) return MatchOperand_NoMatch; + if (!hasThumb2DSP() && (FlagsVal & 0x400)) + // The _g and _nzcvqg versions are only valid if the DSP extension is + // available. + return MatchOperand_NoMatch; + if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813) // basepri, basepri_max and faultmask only valid for V7m. return MatchOperand_NoMatch; @@ -3998,9 +4084,67 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { return MatchOperand_Success; } +/// parseBankedRegOperand - Try to parse a banked register (e.g. "lr_irq") for +/// use in the MRS/MSR instructions added to support virtualization. +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; + StringRef RegName = Tok.getString(); + + // The values here come from B9.2.3 of the ARM ARM, where bits 4-0 are SysM + // and bit 5 is R. + unsigned Encoding = StringSwitch<unsigned>(RegName.lower()) + .Case("r8_usr", 0x00) + .Case("r9_usr", 0x01) + .Case("r10_usr", 0x02) + .Case("r11_usr", 0x03) + .Case("r12_usr", 0x04) + .Case("sp_usr", 0x05) + .Case("lr_usr", 0x06) + .Case("r8_fiq", 0x08) + .Case("r9_fiq", 0x09) + .Case("r10_fiq", 0x0a) + .Case("r11_fiq", 0x0b) + .Case("r12_fiq", 0x0c) + .Case("sp_fiq", 0x0d) + .Case("lr_fiq", 0x0e) + .Case("lr_irq", 0x10) + .Case("sp_irq", 0x11) + .Case("lr_svc", 0x12) + .Case("sp_svc", 0x13) + .Case("lr_abt", 0x14) + .Case("sp_abt", 0x15) + .Case("lr_und", 0x16) + .Case("sp_und", 0x17) + .Case("lr_mon", 0x1c) + .Case("sp_mon", 0x1d) + .Case("elr_hyp", 0x1e) + .Case("sp_hyp", 0x1f) + .Case("spsr_fiq", 0x2e) + .Case("spsr_irq", 0x30) + .Case("spsr_svc", 0x32) + .Case("spsr_abt", 0x34) + .Case("spsr_und", 0x36) + .Case("spsr_mon", 0x3c) + .Case("spsr_hyp", 0x3e) + .Default(~0U); + + if (Encoding == ~0U) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + Operands.push_back(ARMOperand::CreateBankedReg(Encoding, S)); + return MatchOperand_Success; +} + ARMAsmParser::OperandMatchResultTy ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low, int High) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) { Error(Parser.getTok().getLoc(), Op + " operand expected."); @@ -4048,6 +4192,7 @@ ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low, ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseSetEndImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { @@ -4077,6 +4222,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) { /// n == 32 encoded as n == 0. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseShifterImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { @@ -4147,6 +4293,7 @@ ARMAsmParser::parseShifterImm(OperandVector &Operands) { /// ror #n 'n' in {0, 8, 16, 24} ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseRotImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) @@ -4193,6 +4340,7 @@ ARMAsmParser::parseRotImm(OperandVector &Operands) { ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseBitfield(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); // The bitfield descriptor is really two operands, the LSB and the width. if (Parser.getTok().isNot(AsmToken::Hash) && @@ -4269,6 +4417,7 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) { // This method must return MatchOperand_NoMatch without consuming any tokens // in the case where there is no match, as other alternatives take other // parse methods. + MCAsmParser &Parser = getParser(); AsmToken Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); bool haveEaten = false; @@ -4321,6 +4470,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) { // This method must return MatchOperand_NoMatch without consuming any tokens // in the case where there is no match, as other alternatives take other // parse methods. + MCAsmParser &Parser = getParser(); AsmToken Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); @@ -4458,6 +4608,7 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst, /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. bool ARMAsmParser::parseMemory(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S, E; assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); @@ -4649,6 +4800,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { /// return true if it parses a shift otherwise it returns false. bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, unsigned &Amount) { + MCAsmParser &Parser = getParser(); SMLoc Loc = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -4709,6 +4861,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, /// parseFPImm - A floating point immediate expression operand. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseFPImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); // Anything that can accept a floating point constant as an operand // needs to go through here, as the regular parseExpression is // integer only. @@ -4789,6 +4942,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) { /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { + MCAsmParser &Parser = getParser(); SMLoc S, E; // Check if the current operand has a custom associated parser, if so, try to @@ -4921,6 +5075,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { // parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e. // :lower16: and :upper16:. bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { + MCAsmParser &Parser = getParser(); RefKind = ARMMCExpr::VK_ARM_None; // consume an optional '#' (GNU compatibility) @@ -5271,7 +5426,7 @@ static bool isDataTypeToken(StringRef Tok) { static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); } -static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features, +static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features, unsigned VariantID); static bool RequiresVFPRegListValidation(StringRef Inst, @@ -5296,6 +5451,7 @@ static bool RequiresVFPRegListValidation(StringRef Inst, /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { + MCAsmParser &Parser = getParser(); // FIXME: Can this be done via tablegen in some fashion? bool RequireVFPRegisterListCheck; bool AcceptSinglePrecisionOnly; @@ -5309,7 +5465,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // The generic tblgen'erated code does this later, at the start of // MatchInstructionImpl(), but that's too late for aliases that include // any sort of suffix. - unsigned AvailableFeatures = getAvailableFeatures(); + uint64_t AvailableFeatures = getAvailableFeatures(); unsigned AssemblerDialect = getParser().getAssemblerDialect(); applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect); @@ -5415,6 +5571,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Operands.push_back(ARMOperand::CreateImm( MCConstantExpr::Create(ProcessorIMod, getContext()), NameLoc, NameLoc)); + } else if (Mnemonic == "cps" && isMClass()) { + return Error(NameLoc, "instruction 'cps' requires effect for M-class"); } // Add the remaining tokens in the mnemonic. @@ -5546,6 +5704,48 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } + // If first 2 operands of a 3 operand instruction are the same + // then transform to 2 operand version of the same instruction + // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1' + // FIXME: We would really like to be able to tablegen'erate this. + if (isThumbOne() && Operands.size() == 6 && + (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" || + Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" || + Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" || + Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) { + ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); + ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]); + ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]); + + // If both registers are the same then remove one of them from + // the operand list. + if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) { + // If 3rd operand (variable Op5) is a register and the instruction is adds/sub + // then do not transform as the backend already handles this instruction + // correctly. + if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) { + Operands.erase(Operands.begin() + 3); + if (Mnemonic == "add" && !CarrySetting) { + // Special case for 'add' (not 'adds') instruction must + // remove the CCOut operand as well. + Operands.erase(Operands.begin() + 1); + } + } + } + } + + // If instruction is 'add' and first two register operands + // use SP register, then remove one of the SP registers from + // the instruction. + // FIXME: We would really like to be able to tablegen'erate this. + if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) { + ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); + ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); + if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) { + Operands.erase(Operands.begin() + 2); + } + } + // GNU Assembler extension (compatibility) if ((Mnemonic == "ldrd" || Mnemonic == "strd")) { ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); @@ -5728,6 +5928,48 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, "source operands must be sequential"); return false; } + case ARM::STR_PRE_IMM: + case ARM::STR_PRE_REG: + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: + case ARM::STRH_PRE: + case ARM::STRH_POST: + case ARM::STRB_PRE_IMM: + case ARM::STRB_PRE_REG: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: { + // Rt must be different from Rn. + const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg()); + const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg()); + + if (Rt == Rn) + return Error(Operands[3]->getStartLoc(), + "source register and base register can't be identical"); + return false; + } + case ARM::LDR_PRE_IMM: + case ARM::LDR_PRE_REG: + case ARM::LDR_POST_IMM: + case ARM::LDR_POST_REG: + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + case ARM::LDRB_PRE_IMM: + case ARM::LDRB_PRE_REG: + case ARM::LDRB_POST_IMM: + case ARM::LDRB_POST_REG: + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: { + // Rt must be different from Rn. + const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg()); + + if (Rt == Rn) + return Error(Operands[3]->getStartLoc(), + "destination register and base register can't be identical"); + return false; + } case ARM::SBFX: case ARM::UBFX: { // Width must be in range [1, 32-lsb]. @@ -5764,7 +6006,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return Error(Operands[3]->getStartLoc(), "writeback operator '!' not allowed when base register " "in register list"); - + if (listContainsReg(Inst, 3 + HasWritebackToken, ARM::SP)) + return Error(Operands[3 + HasWritebackToken]->getStartLoc(), + "SP not allowed in register list"); break; } case ARM::LDMIA_UPD: @@ -5775,7 +6019,19 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, // UNPREDICTABLE on v7 upwards. Goodness knows what they did before. if (!hasV7Ops()) break; - // Fallthrough + if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg())) + return Error(Operands.back()->getStartLoc(), + "writeback register not allowed in register list"); + break; + case ARM::t2LDMIA: + case ARM::t2LDMDB: + case ARM::t2STMIA: + case ARM::t2STMDB: { + if (listContainsReg(Inst, 3, ARM::SP)) + return Error(Operands.back()->getStartLoc(), + "SP not allowed in register list"); + break; + } case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: case ARM::t2STMIA_UPD: @@ -5783,6 +6039,10 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg())) return Error(Operands.back()->getStartLoc(), "writeback register not allowed in register list"); + + if (listContainsReg(Inst, 4, ARM::SP)) + return Error(Operands.back()->getStartLoc(), + "SP not allowed in register list"); break; } case ARM::sysLDMIA_UPD: @@ -5851,6 +6111,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return Error(Operands[4]->getStartLoc(), "writeback operator '!' not allowed when base register " "in register list"); + if (listContainsReg(Inst, 4, ARM::SP) && !inITBlock()) + return Error(Operands.back()->getStartLoc(), + "SP not allowed in register list"); break; } case ARM::tADDrSP: { @@ -8010,7 +8273,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } // Some high-register supporting Thumb1 encodings only allow both registers // to be from r0-r7 when in Thumb2. - else if (Opc == ARM::tADDhirr && isThumbOne() && + else if (Opc == ARM::tADDhirr && isThumbOne() && !hasV6MOps() && isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg())) return Match_RequiresThumb2; @@ -8028,10 +8291,10 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) { } } -static const char *getSubtargetFeatureName(unsigned Val); +static const char *getSubtargetFeatureName(uint64_t Val); bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, - MCStreamer &Out, unsigned &ErrorInfo, + MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { MCInst Inst; unsigned MatchResult; @@ -8085,7 +8348,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Special case the error message for the very common case where only // a single subtarget feature is missing (Thumb vs. ARM, e.g.). std::string Msg = "instruction requires:"; - unsigned Mask = 1; + uint64_t Mask = 1; for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { if (ErrorInfo & Mask) { Msg += " "; @@ -8097,7 +8360,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0U) { + if (ErrorInfo != ~0ULL) { if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); @@ -8174,6 +8437,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { const MCObjectFileInfo::Environment Format = getContext().getObjectFileInfo()->getObjectFileType(); bool IsMachO = Format == MCObjectFileInfo::IsMachO; + bool IsCOFF = Format == MCObjectFileInfo::IsCOFF; StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") @@ -8225,7 +8489,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { else if (IDVal == ".thumb_set") return parseDirectiveThumbSet(DirectiveID.getLoc()); - if (!IsMachO) { + if (!IsMachO && !IsCOFF) { if (IDVal == ".arch") return parseDirectiveArch(DirectiveID.getLoc()); else if (IDVal == ".cpu") @@ -8256,6 +8520,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { /// ::= .short expression [, expression]* /// ::= .word expression [, expression]* bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; @@ -8285,6 +8550,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) { /// parseDirectiveThumb /// ::= .thumb bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::EndOfStatement)) { Error(L, "unexpected token in directive"); return false; @@ -8306,6 +8572,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { /// parseDirectiveARM /// ::= .arm bool ARMAsmParser::parseDirectiveARM(SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::EndOfStatement)) { Error(L, "unexpected token in directive"); return false; @@ -8334,12 +8601,13 @@ void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) { /// parseDirectiveThumbFunc /// ::= .thumbfunc symbol_name bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); + MCAsmParser &Parser = getParser(); + const auto Format = getContext().getObjectFileInfo()->getObjectFileType(); + bool IsMachO = Format == MCObjectFileInfo::IsMachO; // Darwin asm has (optionally) function name after .thumb_func direction // ELF doesn't - if (isMachO) { + if (IsMachO) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::EndOfStatement)) { if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) { @@ -8356,7 +8624,8 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { } if (getLexer().isNot(AsmToken::EndOfStatement)) { - Error(L, "unexpected token in directive"); + Error(Parser.getTok().getLoc(), "unexpected token in directive"); + Parser.eatToEndOfStatement(); return false; } @@ -8367,6 +8636,7 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { /// parseDirectiveSyntax /// ::= .syntax unified | divided bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) { Error(L, "unexpected token in .syntax directive"); @@ -8398,6 +8668,7 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { /// parseDirectiveCode /// ::= .code 16 | 32 bool ARMAsmParser::parseDirectiveCode(SMLoc L) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Integer)) { Error(L, "unexpected token in .code directive"); @@ -8442,6 +8713,7 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { /// parseDirectiveReq /// ::= name .req registername bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { + MCAsmParser &Parser = getParser(); Parser.Lex(); // Eat the '.req' token. unsigned Reg; SMLoc SRegLoc, ERegLoc; @@ -8460,7 +8732,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { Parser.Lex(); // Consume the EndOfStatement - if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) { + if (!RegisterReqs.insert(std::make_pair(Name, Reg)).second) { Error(SRegLoc, "redefinition of '" + Name + "' does not match original."); return false; } @@ -8471,6 +8743,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { /// parseDirectiveUneq /// ::= .unreq registername bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { + MCAsmParser &Parser = getParser(); if (Parser.getTok().isNot(AsmToken::Identifier)) { Parser.eatToEndOfStatement(); Error(L, "unexpected input in .unreq directive."); @@ -8507,6 +8780,7 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) { /// ::= .eabi_attribute int, int [, "str"] /// ::= .eabi_attribute Tag_name, int [, "str"] bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { + MCAsmParser &Parser = getParser(); int64_t Tag; SMLoc TagLoc; TagLoc = Parser.getTok().getLoc(); @@ -8617,6 +8891,32 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { return false; } +// FIXME: This is duplicated in getARMFPUFeatures() in +// tools/clang/lib/Driver/Tools.cpp +static const struct { + const unsigned Fpu; + const uint64_t Enabled; + const uint64_t Disabled; +} Fpus[] = { + {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON}, + {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON}, + {ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON}, + {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON}, + {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON}, + {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON}, + {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16, + ARM::FeatureNEON | ARM::FeatureCrypto}, + {ARM::FP_ARMV8, ARM::FeatureFPARMv8, + ARM::FeatureNEON | ARM::FeatureCrypto}, + {ARM::NEON, ARM::FeatureNEON, 0}, + {ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0}, + {ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON, + ARM::FeatureCrypto}, + {ARM::CRYPTO_NEON_FP_ARMV8, + ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0}, + {ARM::SOFTVFP, 0, 0}, +}; + /// parseDirectiveFPU /// ::= .fpu str bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { @@ -8632,6 +8932,18 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { return false; } + for (const auto &Fpu : Fpus) { + if (Fpu.Fpu != ID) + continue; + + // Need to toggle features that should be on but are off and that + // should off but are on. + uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) | + (Fpu.Disabled & STI.getFeatureBits()); + setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle))); + break; + } + getTargetStreamer().emitFPU(ID); return false; } @@ -8698,6 +9010,7 @@ bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { /// parseDirectivePersonality /// ::= .personality name bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { + MCAsmParser &Parser = getParser(); bool HasExistingPersonality = UC.hasPersonality(); UC.recordPersonality(L); @@ -8761,6 +9074,7 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { /// parseDirectiveSetFP /// ::= .setfp fpreg, spreg [, offset] bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { + MCAsmParser &Parser = getParser(); // Check the ordering of unwind directives if (!UC.hasFnStart()) { Error(L, ".fnstart must precede .setfp directive"); @@ -8838,6 +9152,7 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { /// parseDirective /// ::= .pad offset bool ARMAsmParser::parseDirectivePad(SMLoc L) { + MCAsmParser &Parser = getParser(); // Check the ordering of unwind directives if (!UC.hasFnStart()) { Error(L, ".fnstart must precede .pad directive"); @@ -8912,6 +9227,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { /// ::= .inst.n opcode [, ...] /// ::= .inst.w opcode [, ...] bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { + MCAsmParser &Parser = getParser(); int Width; if (isThumb()) { @@ -9008,7 +9324,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) { } if (!Section) { - getStreamer().InitSections(); + getStreamer().InitSections(false); Section = getStreamer().getCurrentSection().first; } @@ -9024,6 +9340,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) { /// parseDirectivePersonalityIndex /// ::= .personalityindex index bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) { + MCAsmParser &Parser = getParser(); bool HasExistingPersonality = UC.hasPersonality(); UC.recordPersonalityIndex(L); @@ -9079,6 +9396,7 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) { /// parseDirectiveUnwindRaw /// ::= .unwind_raw offset, opcode [, opcode...] bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { + MCAsmParser &Parser = getParser(); if (!UC.hasFnStart()) { Parser.eatToEndOfStatement(); Error(L, ".fnstart must precede .unwind_raw directives"); @@ -9160,6 +9478,8 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { /// parseDirectiveTLSDescSeq /// ::= .tlsdescseq tls-variable bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { + MCAsmParser &Parser = getParser(); + if (getLexer().isNot(AsmToken::Identifier)) { TokError("expected variable after '.tlsdescseq' directive"); Parser.eatToEndOfStatement(); @@ -9184,6 +9504,7 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { /// parseDirectiveMovSP /// ::= .movsp reg [, #offset] bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { + MCAsmParser &Parser = getParser(); if (!UC.hasFnStart()) { Parser.eatToEndOfStatement(); Error(L, ".fnstart must precede .movsp directives"); @@ -9247,6 +9568,7 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { /// parseDirectiveObjectArch /// ::= .object_arch name bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::Identifier)) { Error(getLexer().getLoc(), "unexpected token"); Parser.eatToEndOfStatement(); @@ -9303,6 +9625,8 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) { /// parseDirectiveThumbSet /// ::= .thumb_set name, value bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) { + MCAsmParser &Parser = getParser(); + StringRef Name; if (Parser.parseIdentifier(Name)) { TokError("expected identifier after '.thumb_set'"); @@ -9349,8 +9673,8 @@ extern "C" void LLVMInitializeARMAsmParser() { #define GET_MATCHER_IMPLEMENTATION #include "ARMGenAsmMatcher.inc" -static const struct ExtMapEntry { - const char *Extension; +static const struct { + const char *Name; const unsigned ArchCheck; const uint64_t Features; } Extensions[] = { @@ -9381,46 +9705,47 @@ static const struct ExtMapEntry { /// parseDirectiveArchExtension /// ::= .arch_extension [no]feature bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { + MCAsmParser &Parser = getParser(); + if (getLexer().isNot(AsmToken::Identifier)) { Error(getLexer().getLoc(), "unexpected token"); Parser.eatToEndOfStatement(); return false; } - StringRef Extension = Parser.getTok().getString(); + StringRef Name = Parser.getTok().getString(); SMLoc ExtLoc = Parser.getTok().getLoc(); getLexer().Lex(); bool EnableFeature = true; - if (Extension.startswith_lower("no")) { + if (Name.startswith_lower("no")) { EnableFeature = false; - Extension = Extension.substr(2); + Name = Name.substr(2); } - for (unsigned EI = 0, EE = array_lengthof(Extensions); EI != EE; ++EI) { - if (Extensions[EI].Extension != Extension) + for (const auto &Extension : Extensions) { + if (Extension.Name != Name) continue; - unsigned FB = getAvailableFeatures(); - if ((FB & Extensions[EI].ArchCheck) != Extensions[EI].ArchCheck) { - Error(ExtLoc, "architectural extension '" + Extension + "' is not " + if (!Extension.Features) + report_fatal_error("unsupported architectural extension: " + Name); + + if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) { + Error(ExtLoc, "architectural extension '" + Name + "' is not " "allowed for the current base architecture"); return false; } - if (!Extensions[EI].Features) - report_fatal_error("unsupported architectural extension: " + Extension); - - if (EnableFeature) - FB |= ComputeAvailableFeatures(Extensions[EI].Features); - else - FB &= ~ComputeAvailableFeatures(Extensions[EI].Features); - - setAvailableFeatures(FB); + uint64_t ToggleFeatures = EnableFeature + ? (~STI.getFeatureBits() & Extension.Features) + : ( STI.getFeatureBits() & Extension.Features); + uint64_t Features = + ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures)); + setAvailableFeatures(Features); return false; } - Error(ExtLoc, "unknown architectural extension: " + Extension); + Error(ExtLoc, "unknown architectural extension: " + Name); Parser.eatToEndOfStatement(); return false; } diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 9b5fa75..2530640 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -2,8 +2,7 @@ set(LLVM_TARGET_DEFINITIONS ARM.td) tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter) -tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher) @@ -19,7 +18,6 @@ add_llvm_target(ARMCodeGen ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp - ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp @@ -29,7 +27,6 @@ add_llvm_target(ARMCodeGen ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp - ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4d4038d..ef65418 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -20,7 +20,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <vector> @@ -85,42 +84,34 @@ namespace { } namespace { -/// ARMDisassembler - ARM disassembler for all ARM platforms. +/// ARM disassembler for all ARM platforms. class ARMDisassembler : public MCDisassembler { public: - /// Constructor - Initializes the disassembler. - /// ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) { } - ~ARMDisassembler() { - } + ~ARMDisassembler() {} - /// getInstruction - See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, uint64_t &size, - const MemoryObject ®ion, uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const override; + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; }; -/// ThumbDisassembler - Thumb disassembler for all Thumb platforms. +/// Thumb disassembler for all Thumb platforms. class ThumbDisassembler : public MCDisassembler { public: - /// Constructor - Initializes the disassembler. - /// ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) { } - ~ThumbDisassembler() { - } + ~ThumbDisassembler() {} - /// getInstruction - See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, uint64_t &size, - const MemoryObject ®ion, uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const override; + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; private: mutable ITStatus ITBlock; @@ -281,6 +272,8 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, @@ -413,103 +406,99 @@ static MCDisassembler *createThumbDisassembler(const Target &T, } DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os, - raw_ostream &cs) const { - CommentStream = &cs; - - uint8_t bytes[4]; + ArrayRef<uint8_t> Bytes, + uint64_t Address, raw_ostream &OS, + raw_ostream &CS) const { + CommentStream = &CS; assert(!(STI.getFeatureBits() & ARM::ModeThumb) && - "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!"); + "Asked to disassemble an ARM instruction but Subtarget is in Thumb " + "mode!"); // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, bytes) == -1) { + if (Bytes.size() < 4) { Size = 0; return MCDisassembler::Fail; } // Encoded as a small-endian 32-bit word in the stream. - uint32_t insn = (bytes[3] << 24) | - (bytes[2] << 16) | - (bytes[1] << 8) | - (bytes[0] << 0); + uint32_t Insn = + (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0); // Calling the auto-generated decoder function. - DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn, - Address, this, STI); - if (result != MCDisassembler::Fail) { + DecodeStatus Result = + decodeInstruction(DecoderTableARM32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } // VFP and NEON instructions, similarly, are shared between ARM // and Thumb modes. MI.clear(); - result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI); - if (result != MCDisassembler::Fail) { + Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableVFPV832, MI, insn, Address, this, STI); - if (result != MCDisassembler::Fail) { + Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction // definitions with Thumb2 where these instructions are predicable. if (!DecodePredicateOperand(MI, 0xE, Address, this)) return MCDisassembler::Fail; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address, + Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address, this, STI); - if (result != MCDisassembler::Fail) { + if (Result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction // definitions with Thumb2 where these instructions are predicable. if (!DecodePredicateOperand(MI, 0xE, Address, this)) return MCDisassembler::Fail; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction // definitions with Thumb2 where these instructions are predicable. if (!DecodePredicateOperand(MI, 0xE, Address, this)) return MCDisassembler::Fail; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTablev8NEON32, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } MI.clear(); @@ -681,55 +670,53 @@ void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const { } DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, - const MemoryObject &Region, + ArrayRef<uint8_t> Bytes, uint64_t Address, - raw_ostream &os, - raw_ostream &cs) const { - CommentStream = &cs; - - uint8_t bytes[4]; + raw_ostream &OS, + raw_ostream &CS) const { + CommentStream = &CS; assert((STI.getFeatureBits() & ARM::ModeThumb) && "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!"); // We want to read exactly 2 bytes of data. - if (Region.readBytes(Address, 2, bytes) == -1) { + if (Bytes.size() < 2) { Size = 0; return MCDisassembler::Fail; } - uint16_t insn16 = (bytes[1] << 8) | bytes[0]; - DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16, - Address, this, STI); - if (result != MCDisassembler::Fail) { + uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0]; + DecodeStatus Result = + decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 2; - Check(result, AddThumbPredicate(MI)); - return result; + Check(Result, AddThumbPredicate(MI)); + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16, - Address, this, STI); - if (result) { + Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this, + STI); + if (Result) { Size = 2; bool InITBlock = ITBlock.instrInITBlock(); - Check(result, AddThumbPredicate(MI)); + Check(Result, AddThumbPredicate(MI)); AddThumb1SBit(MI, InITBlock); - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableThumb216, MI, insn16, - Address, this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 2; // Nested IT blocks are UNPREDICTABLE. Must be checked before we add // the Thumb predicate. if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock()) - result = MCDisassembler::SoftFail; + Result = MCDisassembler::SoftFail; - Check(result, AddThumbPredicate(MI)); + Check(Result, AddThumbPredicate(MI)); // If we find an IT instruction, we need to parse its condition // code and mask operands so that we can apply them correctly @@ -741,115 +728,115 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ITBlock.setITState(Firstcond, Mask); } - return result; + return Result; } // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, bytes) == -1) { + if (Bytes.size() < 4) { Size = 0; return MCDisassembler::Fail; } - uint32_t insn32 = (bytes[3] << 8) | - (bytes[2] << 0) | - (bytes[1] << 24) | - (bytes[0] << 16); + uint32_t Insn32 = + (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16); MI.clear(); - result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; bool InITBlock = ITBlock.instrInITBlock(); - Check(result, AddThumbPredicate(MI)); + Check(Result, AddThumbPredicate(MI)); AddThumb1SBit(MI, InITBlock); - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - Check(result, AddThumbPredicate(MI)); - return result; + Check(Result, AddThumbPredicate(MI)); + return Result; } - if (fieldFromInstruction(insn32, 28, 4) == 0xE) { + if (fieldFromInstruction(Insn32, 28, 4) == 0xE) { MI.clear(); - result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; UpdateThumbVFPPredicate(MI); - return result; + return Result; } } MI.clear(); - result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } - if (fieldFromInstruction(insn32, 28, 4) == 0xE) { + if (fieldFromInstruction(Insn32, 28, 4) == 0xE) { MI.clear(); - result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this, + STI); + if (Result != MCDisassembler::Fail) { Size = 4; - Check(result, AddThumbPredicate(MI)); - return result; + Check(Result, AddThumbPredicate(MI)); + return Result; } } - if (fieldFromInstruction(insn32, 24, 8) == 0xF9) { + if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) { MI.clear(); - uint32_t NEONLdStInsn = insn32; + uint32_t NEONLdStInsn = Insn32; NEONLdStInsn &= 0xF0FFFFFF; NEONLdStInsn |= 0x04000000; - result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn, + Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn, Address, this, STI); - if (result != MCDisassembler::Fail) { + if (Result != MCDisassembler::Fail) { Size = 4; - Check(result, AddThumbPredicate(MI)); - return result; + Check(Result, AddThumbPredicate(MI)); + return Result; } } - if (fieldFromInstruction(insn32, 24, 4) == 0xF) { + if (fieldFromInstruction(Insn32, 24, 4) == 0xF) { MI.clear(); - uint32_t NEONDataInsn = insn32; + uint32_t NEONDataInsn = Insn32; NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24 NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 NEONDataInsn |= 0x12000000; // Set bits 28 and 25 - result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn, + Result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn, Address, this, STI); - if (result != MCDisassembler::Fail) { + if (Result != MCDisassembler::Fail) { Size = 4; - Check(result, AddThumbPredicate(MI)); - return result; + Check(Result, AddThumbPredicate(MI)); + return Result; } MI.clear(); - uint32_t NEONCryptoInsn = insn32; + uint32_t NEONCryptoInsn = Insn32; NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24 NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25 - result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn, + Result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn, Address, this, STI); - if (result != MCDisassembler::Fail) { + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } MI.clear(); - uint32_t NEONv8Insn = insn32; + uint32_t NEONv8Insn = Insn32; NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26 - result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address, + Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address, this, STI); - if (result != MCDisassembler::Fail) { + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } } @@ -1015,7 +1002,11 @@ static const uint16_t DPRDecoderTable[] = { static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 31) + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + bool hasD16 = featureBits & ARM::FeatureD16; + + if (RegNo > 31 || (hasD16 && RegNo > 15)) return MCDisassembler::Fail; unsigned Register = DPRDecoderTable[RegNo]; @@ -2973,11 +2964,9 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, if (size == 0x3) { if (align == 0) return MCDisassembler::Fail; - size = 4; align = 16; } else { if (size == 2) { - size = 1 << size; align *= 8; } else { size = 1 << size; @@ -3267,6 +3256,11 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, unsigned Rt = fieldFromInstruction(Insn, 12, 4); unsigned Rn = fieldFromInstruction(Insn, 16, 4); + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + bool hasMP = featureBits & ARM::FeatureMP; + bool hasV7Ops = featureBits & ARM::HasV7Ops; + if (Rn == 15) { switch (Inst.getOpcode()) { case ARM::t2LDRBs: @@ -3302,11 +3296,10 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, case ARM::t2LDRSHs: return MCDisassembler::Fail; case ARM::t2LDRHs: - // FIXME: this instruction is only available with MP extensions, - // this should be checked first but we don't have access to the - // feature bits here. Inst.setOpcode(ARM::t2PLDWs); break; + case ARM::t2LDRSBs: + Inst.setOpcode(ARM::t2PLIs); default: break; } @@ -3314,8 +3307,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, switch (Inst.getOpcode()) { case ARM::t2PLDs: - case ARM::t2PLDWs: + break; case ARM::t2PLIs: + if (!hasV7Ops) + return MCDisassembler::Fail; + break; + case ARM::t2PLDWs: + if (!hasV7Ops || !hasMP) + return MCDisassembler::Fail; break; default: if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) @@ -3341,6 +3340,12 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, unsigned imm = fieldFromInstruction(Insn, 0, 8); imm |= (U << 8); imm |= (Rn << 9); + unsigned add = fieldFromInstruction(Insn, 9, 1); + + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + bool hasMP = featureBits & ARM::FeatureMP; + bool hasV7Ops = featureBits & ARM::HasV7Ops; if (Rn == 15) { switch (Inst.getOpcode()) { @@ -3375,6 +3380,13 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, switch (Inst.getOpcode()) { case ARM::t2LDRSHi8: return MCDisassembler::Fail; + case ARM::t2LDRHi8: + if (!add) + Inst.setOpcode(ARM::t2PLDWi8); + break; + case ARM::t2LDRSBi8: + Inst.setOpcode(ARM::t2PLIi8); + break; default: break; } @@ -3382,9 +3394,15 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, switch (Inst.getOpcode()) { case ARM::t2PLDi8: + break; case ARM::t2PLIi8: - case ARM::t2PLDWi8: + if (!hasV7Ops) + return MCDisassembler::Fail; break; + case ARM::t2PLDWi8: + if (!hasV7Ops || !hasMP) + return MCDisassembler::Fail; + break; default: if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; @@ -3404,6 +3422,11 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, unsigned imm = fieldFromInstruction(Insn, 0, 12); imm |= (Rn << 13); + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + bool hasMP = (featureBits & ARM::FeatureMP); + bool hasV7Ops = (featureBits & ARM::HasV7Ops); + if (Rn == 15) { switch (Inst.getOpcode()) { case ARM::t2LDRi12: @@ -3438,7 +3461,10 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, case ARM::t2LDRSHi12: return MCDisassembler::Fail; case ARM::t2LDRHi12: - Inst.setOpcode(ARM::t2PLDi12); + Inst.setOpcode(ARM::t2PLDWi12); + break; + case ARM::t2LDRSBi12: + Inst.setOpcode(ARM::t2PLIi12); break; default: break; @@ -3447,9 +3473,15 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, switch (Inst.getOpcode()) { case ARM::t2PLDi12: - case ARM::t2PLDWi12: + break; case ARM::t2PLIi12: + if (!hasV7Ops) + return MCDisassembler::Fail; break; + case ARM::t2PLDWi12: + if (!hasV7Ops || !hasMP) + return MCDisassembler::Fail; + break; default: if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; @@ -3507,6 +3539,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, unsigned U = fieldFromInstruction(Insn, 23, 1); int imm = fieldFromInstruction(Insn, 0, 12); + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + bool hasV7Ops = (featureBits & ARM::HasV7Ops); + if (Rt == 15) { switch (Inst.getOpcode()) { case ARM::t2LDRBpci: @@ -3525,7 +3561,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, switch(Inst.getOpcode()) { case ARM::t2PLDpci: + break; case ARM::t2PLIpci: + if (!hasV7Ops) + return MCDisassembler::Fail; break; default: if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) @@ -3974,7 +4013,85 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val, static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - if (!Val) return MCDisassembler::Fail; + DecodeStatus S = MCDisassembler::Success; + uint64_t FeatureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + if (FeatureBits & ARM::FeatureMClass) { + unsigned ValLow = Val & 0xff; + + // Validate the SYSm value first. + switch (ValLow) { + case 0: // apsr + case 1: // iapsr + case 2: // eapsr + case 3: // xpsr + case 5: // ipsr + case 6: // epsr + case 7: // iepsr + case 8: // msp + case 9: // psp + case 16: // primask + case 20: // control + break; + case 17: // basepri + case 18: // basepri_max + case 19: // faultmask + if (!(FeatureBits & ARM::HasV7Ops)) + // Values basepri, basepri_max and faultmask are only valid for v7m. + return MCDisassembler::Fail; + break; + default: + return MCDisassembler::Fail; + } + + if (Inst.getOpcode() == ARM::t2MSR_M) { + unsigned Mask = fieldFromInstruction(Val, 10, 2); + if (!(FeatureBits & ARM::HasV7Ops)) { + // The ARMv6-M MSR bits {11-10} can be only 0b10, other values are + // unpredictable. + if (Mask != 2) + S = MCDisassembler::SoftFail; + } + else { + // The ARMv7-M architecture stores an additional 2-bit mask value in + // MSR bits {11-10}. The mask is used only with apsr, iapsr, eapsr and + // xpsr, it has to be 0b10 in other cases. Bit mask{1} indicates if + // the NZCVQ bits should be moved by the instruction. Bit mask{0} + // indicates the move for the GE{3:0} bits, the mask{0} bit can be set + // only if the processor includes the DSP extension. + if (Mask == 0 || (Mask != 2 && ValLow > 3) || + (!(FeatureBits & ARM::FeatureDSPThumb2) && (Mask & 1))) + S = MCDisassembler::SoftFail; + } + } + } else { + // A/R class + if (Val == 0) + return MCDisassembler::Fail; + } + Inst.addOperand(MCOperand::CreateImm(Val)); + return S; +} + +static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + + unsigned R = fieldFromInstruction(Val, 5, 1); + unsigned SysM = fieldFromInstruction(Val, 0, 5); + + // The table of encodings for these banked registers comes from B9.2.3 of the + // ARM ARM. There are patterns, but nothing regular enough to make this logic + // neater. So by fiat, these values are UNPREDICTABLE: + if (!R) { + if (SysM == 0x7 || SysM == 0xf || SysM == 0x18 || SysM == 0x19 || + SysM == 0x1a || SysM == 0x1b) + return MCDisassembler::SoftFail; + } else { + if (SysM != 0xe && SysM != 0x10 && SysM != 0x12 && SysM != 0x14 && + SysM != 0x16 && SysM != 0x1c && SysM != 0x1e) + return MCDisassembler::SoftFail; + } + Inst.addOperand(MCOperand::CreateImm(Val)); return MCDisassembler::Success; } diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt index 52d8338..a64a8a9 100644 --- a/lib/Target/ARM/Disassembler/LLVMBuild.txt +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = ARMDisassembler parent = ARM -required_libraries = ARMDesc ARMInfo MC Support +required_libraries = ARMDesc ARMInfo MCDisassembler Support add_to_library_groups = ARM diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 228fb57..0570084 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -503,30 +503,6 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, // Addressing Mode #3 //===--------------------------------------------------------------------===// -void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - const MCOperand &MO3 = MI->getOperand(Op+2); - - O << markup("<mem:") << "["; - printRegName(O, MO1.getReg()); - O << "], " << markup(">"); - - if (MO2.getReg()) { - O << (char)ARM_AM::getAM3Op(MO3.getImm()); - printRegName(O, MO2.getReg()); - return; - } - - unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); - O << markup("<imm:") - << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) - << ImmOffs - << markup(">"); -} - void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O, bool AlwaysPrintImm0) { @@ -568,13 +544,9 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, return; } - const MCOperand &MO3 = MI->getOperand(Op+2); - unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm()); - - if (IdxMode == ARMII::IndexModePost) { - printAM3PostIndexOp(MI, Op, O); - return; - } + assert(ARM_AM::getAM3IdxMode(MI->getOperand(Op + 2).getImm()) != + ARMII::IndexModePost && + "unexpected idxmode"); printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0); } @@ -807,52 +779,56 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, const MCOperand &Op = MI->getOperand(OpNum); unsigned SpecRegRBit = Op.getImm() >> 4; unsigned Mask = Op.getImm() & 0xf; + uint64_t FeatureBits = getAvailableFeatures(); - if (getAvailableFeatures() & ARM::FeatureMClass) { + if (FeatureBits & ARM::FeatureMClass) { unsigned SYSm = Op.getImm(); unsigned Opcode = MI->getOpcode(); - // For reads of the special registers ignore the "mask encoding" bits - // which are only for writes. - if (Opcode == ARM::t2MRS_M) - SYSm &= 0xff; + + // For writes, handle extended mask bits if the DSP extension is present. + if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) { + switch (SYSm) { + case 0x400: O << "apsr_g"; return; + case 0xc00: O << "apsr_nzcvqg"; return; + case 0x401: O << "iapsr_g"; return; + case 0xc01: O << "iapsr_nzcvqg"; return; + case 0x402: O << "eapsr_g"; return; + case 0xc02: O << "eapsr_nzcvqg"; return; + case 0x403: O << "xpsr_g"; return; + case 0xc03: O << "xpsr_nzcvqg"; return; + } + } + + // Handle the basic 8-bit mask. + SYSm &= 0xff; + + if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::HasV7Ops)) { + // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an + // alias for MSR APSR_nzcvq. + switch (SYSm) { + case 0: O << "apsr_nzcvq"; return; + case 1: O << "iapsr_nzcvq"; return; + case 2: O << "eapsr_nzcvq"; return; + case 3: O << "xpsr_nzcvq"; return; + } + } + switch (SYSm) { default: llvm_unreachable("Unexpected mask value!"); - case 0: - case 0x800: O << "apsr"; return; // with _nzcvq bits is an alias for aspr - case 0x400: O << "apsr_g"; return; - case 0xc00: O << "apsr_nzcvqg"; return; - case 1: - case 0x801: O << "iapsr"; return; // with _nzcvq bits is an alias for iapsr - case 0x401: O << "iapsr_g"; return; - case 0xc01: O << "iapsr_nzcvqg"; return; - case 2: - case 0x802: O << "eapsr"; return; // with _nzcvq bits is an alias for eapsr - case 0x402: O << "eapsr_g"; return; - case 0xc02: O << "eapsr_nzcvqg"; return; - case 3: - case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr - case 0x403: O << "xpsr_g"; return; - case 0xc03: O << "xpsr_nzcvqg"; return; - case 5: - case 0x805: O << "ipsr"; return; - case 6: - case 0x806: O << "epsr"; return; - case 7: - case 0x807: O << "iepsr"; return; - case 8: - case 0x808: O << "msp"; return; - case 9: - case 0x809: O << "psp"; return; - case 0x10: - case 0x810: O << "primask"; return; - case 0x11: - case 0x811: O << "basepri"; return; - case 0x12: - case 0x812: O << "basepri_max"; return; - case 0x13: - case 0x813: O << "faultmask"; return; - case 0x14: - case 0x814: O << "control"; return; + case 0: O << "apsr"; return; + case 1: O << "iapsr"; return; + case 2: O << "eapsr"; return; + case 3: O << "xpsr"; return; + case 5: O << "ipsr"; return; + case 6: O << "epsr"; return; + case 7: O << "iepsr"; return; + case 8: O << "msp"; return; + case 9: O << "psp"; return; + case 16: O << "primask"; return; + case 17: O << "basepri"; return; + case 18: O << "basepri_max"; return; + case 19: O << "faultmask"; return; + case 20: O << "control"; return; } } @@ -882,6 +858,42 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, } } +void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + uint32_t Banked = MI->getOperand(OpNum).getImm(); + uint32_t R = (Banked & 0x20) >> 5; + uint32_t SysM = Banked & 0x1f; + + // Nothing much we can do about this, the encodings are specified in B9.2.3 of + // the ARM ARM v7C, and are all over the shop. + if (R) { + O << "SPSR_"; + + switch(SysM) { + case 0x0e: O << "fiq"; return; + case 0x10: O << "irq"; return; + case 0x12: O << "svc"; return; + case 0x14: O << "abt"; return; + case 0x16: O << "und"; return; + case 0x1c: O << "mon"; return; + case 0x1e: O << "hyp"; return; + default: llvm_unreachable("Invalid banked SPSR register"); + } + } + + assert(!R && "should have dealt with SPSR regs"); + const char *RegNames[] = { + "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "", + "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "", + "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt", "sp_abt", "lr_und", "sp_und", + "", "", "", "", "lr_mon", "sp_mon", "elr_hyp", "sp_hyp" + }; + const char *Name = RegNames[SysM]; + assert(Name[0] && "invalid banked register operand"); + + O << Name; +} + void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index f671fe4..09fd536 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMINSTPRINTER_H -#define ARMINSTPRINTER_H +#ifndef LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H +#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -51,7 +51,6 @@ public: void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O); void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O, bool AlwaysPrintImm0); void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum, @@ -117,6 +116,7 @@ public: void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index b6c85c2..f0eed9b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H -#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -575,6 +575,53 @@ namespace ARM_AM { return Val; } + // Generic validation for single-byte immediate (0X00, 00X0, etc). + static inline bool isNEONBytesplat(unsigned Value, unsigned Size) { + assert(Size >= 1 && Size <= 4 && "Invalid size"); + unsigned count = 0; + for (unsigned i = 0; i < Size; ++i) { + if (Value & 0xff) count++; + Value >>= 8; + } + return count == 1; + } + + /// Checks if Value is a correct immediate for instructions like VBIC/VORR. + static inline bool isNEONi16splat(unsigned Value) { + if (Value > 0xffff) + return false; + // i16 value with set bits only in one byte X0 or 0X. + return Value == 0 || isNEONBytesplat(Value, 2); + } + + // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR + static inline unsigned encodeNEONi16splat(unsigned Value) { + assert(isNEONi16splat(Value) && "Invalid NEON splat value"); + if (Value >= 0x100) + Value = (Value >> 8) | 0xa00; + else + Value |= 0x800; + return Value; + } + + /// Checks if Value is a correct immediate for instructions like VBIC/VORR. + static inline bool isNEONi32splat(unsigned Value) { + // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X. + return Value == 0 || isNEONBytesplat(Value, 4); + } + + /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR. + static inline unsigned encodeNEONi32splat(unsigned Value) { + assert(isNEONi32splat(Value) && "Invalid NEON splat value"); + if (Value >= 0x100 && Value <= 0xff00) + Value = (Value >> 8) | 0x200; + else if (Value > 0xffff && Value <= 0xff0000) + Value = (Value >> 16) | 0x400; + else if (Value > 0xffffff) + Value = (Value >> 24) | 0x600; + return Value; + } + AMSubMode getLoadStoreMultipleSubMode(int Opcode); //===--------------------------------------------------------------------===// diff --git a/lib/Target/ARM/MCTargetDesc/ARMArchName.h b/lib/Target/ARM/MCTargetDesc/ARMArchName.h index 34b9fc1..bc05673 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMArchName.h +++ b/lib/Target/ARM/MCTargetDesc/ARMArchName.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMARCHNAME_H -#define ARMARCHNAME_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H namespace llvm { namespace ARM { @@ -24,4 +24,4 @@ enum ArchKind { } // namespace ARM } // namespace llvm -#endif // ARMARCHNAME_H +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 7acd9cc..0b2e3b0 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -9,6 +9,10 @@ #include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMAsmBackend.h" +#include "MCTargetDesc/ARMAsmBackendDarwin.h" +#include "MCTargetDesc/ARMAsmBackendELF.h" +#include "MCTargetDesc/ARMAsmBackendWinCOFF.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/StringSwitch.h" @@ -35,164 +39,136 @@ namespace { class ARMELFObjectWriter : public MCELFObjectTargetWriter { public: ARMELFObjectWriter(uint8_t OSABI) - : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM, - /*HasRelocationAddend*/ false) {} + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM, + /*HasRelocationAddend*/ false) {} }; -class ARMAsmBackend : public MCAsmBackend { - const MCSubtargetInfo* STI; - bool isThumbMode; // Currently emitting Thumb code. - bool IsLittleEndian; // Big or little endian. -public: - ARMAsmBackend(const Target &T, const StringRef TT, bool IsLittle) - : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")), - isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {} - - ~ARMAsmBackend() { - delete STI; - } - - unsigned getNumFixupKinds() const override { - return ARM::NumTargetFixupKinds; - } - - bool hasNOP() const { - return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; - } +const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = { + // This table *must* be in the order that the fixup_* kinds are defined in + // ARMFixupKinds.h. + // + // Name Offset (bits) Size (bits) Flags + {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_ldst_pcrel_12", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_pcrel_10", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_thumb_adr_pcrel_10", 0, 8, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_adr_pcrel_12", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_cp", 0, 8, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel}, + // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 + // - 19. + {"fixup_arm_movt_hi16", 0, 20, 0}, + {"fixup_arm_movw_lo16", 0, 20, 0}, + {"fixup_t2_movt_hi16", 0, 20, 0}, + {"fixup_t2_movw_lo16", 0, 20, 0}, + }; + const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { + // This table *must* be in the order that the fixup_* kinds are defined in + // ARMFixupKinds.h. + // + // Name Offset (bits) Size (bits) Flags + {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_ldst_pcrel_12", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_pcrel_10", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_thumb_adr_pcrel_10", 8, 8, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_adr_pcrel_12", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_cp", 8, 8, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel}, + // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 + // - 19. + {"fixup_arm_movt_hi16", 12, 20, 0}, + {"fixup_arm_movw_lo16", 12, 20, 0}, + {"fixup_t2_movt_hi16", 12, 20, 0}, + {"fixup_t2_movw_lo16", 12, 20, 0}, + }; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind]; +} - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { - const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = { -// This table *must* be in the order that the fixup_* kinds are defined in -// ARMFixupKinds.h. -// -// Name Offset (bits) Size (bits) Flags -{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, -// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19. -{ "fixup_arm_movt_hi16", 0, 20, 0 }, -{ "fixup_arm_movw_lo16", 0, 20, 0 }, -{ "fixup_t2_movt_hi16", 0, 20, 0 }, -{ "fixup_t2_movw_lo16", 0, 20, 0 }, - }; - const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { -// This table *must* be in the order that the fixup_* kinds are defined in -// ARMFixupKinds.h. -// -// Name Offset (bits) Size (bits) Flags -{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_thumb_adr_pcrel_10",8, 8, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cp", 8, 8, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel }, -// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19. -{ "fixup_arm_movt_hi16", 12, 20, 0 }, -{ "fixup_arm_movw_lo16", 12, 20, 0 }, -{ "fixup_t2_movt_hi16", 12, 20, 0 }, -{ "fixup_t2_movw_lo16", 12, 20, 0 }, - }; - - if (Kind < FirstTargetFixupKind) - return MCAsmBackend::getFixupKindInfo(Kind); - - assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && - "Invalid kind!"); - return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind]; - } - - /// processFixupValue - Target hook to process the literal value of a fixup - /// if necessary. - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; - - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const override; - - bool mayNeedRelaxation(const MCInst &Inst) const override; - - bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const override; - - void relaxInstruction(const MCInst &Inst, MCInst &Res) const override; - - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - - void handleAssemblerFlag(MCAssemblerFlag Flag) override { - switch (Flag) { - default: break; - case MCAF_Code16: - setIsThumb(true); - break; - case MCAF_Code32: - setIsThumb(false); - break; - } +void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + default: + break; + case MCAF_Code16: + setIsThumb(true); + break; + case MCAF_Code32: + setIsThumb(false); + break; } - - unsigned getPointerSize() const { return 4; } - bool isThumb() const { return isThumbMode; } - void setIsThumb(bool it) { isThumbMode = it; } - bool isLittle() const { return IsLittleEndian; } -}; +} } // end anonymous namespace static unsigned getRelaxedOpcode(unsigned Op) { switch (Op) { - default: return Op; - case ARM::tBcc: return ARM::t2Bcc; - case ARM::tLDRpci: return ARM::t2LDRpci; - case ARM::tADR: return ARM::t2ADR; - case ARM::tB: return ARM::t2B; - case ARM::tCBZ: return ARM::tHINT; - case ARM::tCBNZ: return ARM::tHINT; + default: + return Op; + case ARM::tBcc: + return ARM::t2Bcc; + case ARM::tLDRpci: + return ARM::t2LDRpci; + case ARM::tADR: + return ARM::t2ADR; + case ARM::tB: + return ARM::t2B; + case ARM::tCBZ: + return ARM::tHINT; + case ARM::tCBNZ: + return ARM::tHINT; } } @@ -202,8 +178,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { return false; } -bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, +bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { switch ((unsigned)Fixup.getKind()) { @@ -265,7 +240,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { Res.addOperand(MCOperand::CreateImm(14)); Res.addOperand(MCOperand::CreateReg(0)); return; - } + } // The rest of instructions we're relaxing have the same operands. // We just need to update to the proper opcode. @@ -276,11 +251,11 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP - const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0 + const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0 const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP if (isThumb()) { - const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding - : Thumb1_16bitNopEncoding; + const uint16_t nopEncoding = + hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding; uint64_t NumNops = Count / 2; for (uint64_t i = 0; i != NumNops; ++i) OW->Write16(nopEncoding); @@ -289,18 +264,26 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } // ARM mode - const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding - : ARMv4_NopEncoding; + const uint32_t nopEncoding = + hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding; uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) OW->Write32(nopEncoding); // FIXME: should this function return false when unable to write exactly // 'Count' bytes with NOP encodings? switch (Count % 4) { - default: break; // No leftover bytes to write - case 1: OW->Write8(0); break; - case 2: OW->Write16(0); break; - case 3: OW->Write16(0); OW->Write8(0xa0); break; + default: + break; // No leftover bytes to write + case 1: + OW->Write8(0); + break; + case 2: + OW->Write16(0); + break; + case 3: + OW->Write16(0); + OW->Write8(0xa0); + break; } return true; @@ -313,8 +296,7 @@ static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) { uint32_t Swapped = (Value & 0xFFFF0000) >> 16; Swapped |= (Value & 0x0000FFFF) << 16; return Swapped; - } - else + } else return Value; } @@ -351,7 +333,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_movt_hi16: if (!IsPCRel) Value >>= 16; - // Fallthrough + // Fallthrough case ARM::fixup_arm_movw_lo16: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned Lo12 = Value & 0x0FFF; @@ -363,7 +345,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_t2_movt_hi16: if (!IsPCRel) Value >>= 16; - // Fallthrough + // Fallthrough case ARM::fixup_t2_movw_lo16: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned i = (Value & 0x800) >> 11; @@ -379,7 +361,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_ldst_pcrel_12: // ARM PC-relative values are offset by 8. Value -= 4; - // FALLTHROUGH + // FALLTHROUGH case ARM::fixup_t2_ldst_pcrel_12: { // Offset by 4, adjusted by two due to the half-word ordering of thumb. Value -= 4; @@ -438,7 +420,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_blx: // These values don't encode the low two bits since they're always zero. // Offset by 8 just as above. - if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) + if (const MCSymbolRefExpr *SRE = + dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL) return 0; return 0xffffff & ((Value - 8) >> 2); @@ -447,17 +430,17 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Value >>= 1; // Low bit is not encoded. uint32_t out = 0; - bool I = Value & 0x800000; + bool I = Value & 0x800000; bool J1 = Value & 0x400000; bool J2 = Value & 0x200000; J1 ^= I; J2 ^= I; - out |= I << 26; // S bit - out |= !J1 << 13; // J1 bit - out |= !J2 << 11; // J2 bit - out |= (Value & 0x1FF800) << 5; // imm6 field - out |= (Value & 0x0007FF); // imm11 field + out |= I << 26; // S bit + out |= !J1 << 13; // J1 bit + out |= !J2 << 11; // J2 bit + out |= (Value & 0x1FF800) << 5; // imm6 field + out |= (Value & 0x0007FF); // imm11 field return swapHalfWords(out, IsLittleEndian); } @@ -498,7 +481,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | - (uint16_t)imm11Bits); + (uint16_t)imm11Bits); return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } case ARM::fixup_arm_thumb_blx: { @@ -515,7 +498,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Note that the halfwords are stored high first, low second; so we need // to transpose the fixup value here to map properly. uint32_t offset = (Value - 2) >> 2; - if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) + if (const MCSymbolRefExpr *SRE = + dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL) offset = 0; uint32_t signBit = (offset & 0x400000) >> 22; @@ -528,7 +512,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | - ((uint16_t)imm10LBits) << 1); + ((uint16_t)imm10LBits) << 1); return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } case ARM::fixup_arm_thumb_cp: @@ -564,7 +548,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_pcrel_10: Value = Value - 4; // ARM fixups offset by an additional word and don't // need to adjust for the half-word ordering. - // Fall through. + // Fall through. case ARM::fixup_t2_pcrel_10: { // Offset by 4, adjusted by two due to the half-word ordering of thumb. Value = Value - 4; @@ -735,7 +719,8 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, bool IsPCRel) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian); - if (!Value) return; // Doesn't change encoding. + if (!Value) + return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); @@ -757,80 +742,36 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } } -namespace { -// FIXME: This should be in a separate file. -class ARMWinCOFFAsmBackend : public ARMAsmBackend { -public: - ARMWinCOFFAsmBackend(const Target &T, const StringRef &Triple) - : ARMAsmBackend(T, Triple, true) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { - return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); - } -}; - -// FIXME: This should be in a separate file. -// ELF is an ELF of course... -class ELFARMAsmBackend : public ARMAsmBackend { -public: - uint8_t OSABI; - ELFARMAsmBackend(const Target &T, const StringRef TT, - uint8_t OSABI, bool IsLittle) - : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) { } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { - return createARMELFObjectWriter(OS, OSABI, isLittle()); - } -}; - -// FIXME: This should be in a separate file. -class DarwinARMAsmBackend : public ARMAsmBackend { -public: - const MachO::CPUSubTypeARM Subtype; - DarwinARMAsmBackend(const Target &T, const StringRef TT, - MachO::CPUSubTypeARM st) - : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) { - HasDataInCodeSupport = true; - } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { - return createARMMachObjectWriter(OS, /*Is64Bit=*/false, - MachO::CPU_TYPE_ARM, - Subtype); - } -}; - -} // end anonymous namespace - MCAsmBackend *llvm::createARMAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU, - bool isLittle) { + const MCRegisterInfo &MRI, StringRef TT, + StringRef CPU, bool isLittle) { Triple TheTriple(TT); switch (TheTriple.getObjectFormat()) { - default: llvm_unreachable("unsupported object format"); + default: + llvm_unreachable("unsupported object format"); case Triple::MachO: { MachO::CPUSubTypeARM CS = - StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName()) - .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) - .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ) - .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6) - .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M) - .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM) - .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K) - .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M) - .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S) - .Default(MachO::CPU_SUBTYPE_ARM_V7); - - return new DarwinARMAsmBackend(T, TT, CS); + StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName()) + .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) + .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ) + .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6) + .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M) + .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM) + .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K) + .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M) + .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S) + .Default(MachO::CPU_SUBTYPE_ARM_V7); + + return new ARMAsmBackendDarwin(T, TT, CS); } case Triple::COFF: assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); - return new ARMWinCOFFAsmBackend(T, TT); + return new ARMAsmBackendWinCOFF(T, TT); case Triple::ELF: assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target"); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); - return new ELFARMAsmBackend(T, TT, OSABI, isLittle); + return new ARMAsmBackendELF(T, TT, OSABI, isLittle); } } @@ -847,14 +788,13 @@ MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T, } MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { return createARMAsmBackend(T, MRI, TT, CPU, true); } MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { return createARMAsmBackend(T, MRI, TT, CPU, false); } - diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h new file mode 100644 index 0000000..f4f1082 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -0,0 +1,69 @@ +//===-- ARMAsmBackend.h - ARM Assembler Backend -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H +#define LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H + +#include "MCTargetDesc/ARMFixupKinds.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" + +using namespace llvm; + +namespace { + +class ARMAsmBackend : public MCAsmBackend { + const MCSubtargetInfo *STI; + bool isThumbMode; // Currently emitting Thumb code. + bool IsLittleEndian; // Big or little endian. +public: + ARMAsmBackend(const Target &T, StringRef TT, bool IsLittle) + : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")), + isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {} + + ~ARMAsmBackend() override { delete STI; } + + unsigned getNumFixupKinds() const override { + return ARM::NumTargetFixupKinds; + } + + bool hasNOP() const { return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + + /// processFixupValue - Target hook to process the literal value of a fixup + /// if necessary. + void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + const MCValue &Target, uint64_t &Value, + bool &IsResolved) override; + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value, bool IsPCRel) const override; + + bool mayNeedRelaxation(const MCInst &Inst) const override; + + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override; + + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override; + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + + void handleAssemblerFlag(MCAssemblerFlag Flag) override; + + unsigned getPointerSize() const { return 4; } + bool isThumb() const { return isThumbMode; } + void setIsThumb(bool it) { isThumbMode = it; } + bool isLittle() const { return IsLittleEndian; } +}; +} // end anonymous namespace + +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h new file mode 100644 index 0000000..3bd7ab7 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -0,0 +1,33 @@ +//===-- ARMAsmBackendDarwin.h ARM Asm Backend Darwin ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H +#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H + +#include "llvm/Support/MachO.h" + +using namespace llvm; + +namespace { +class ARMAsmBackendDarwin : public ARMAsmBackend { +public: + const MachO::CPUSubTypeARM Subtype; + ARMAsmBackendDarwin(const Target &T, StringRef TT, MachO::CPUSubTypeARM st) + : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) { + HasDataInCodeSupport = true; + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM, + Subtype); + } +}; +} + +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h new file mode 100644 index 0000000..4efd325 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h @@ -0,0 +1,27 @@ +//===-- ARMAsmBackendELF.h ARM Asm Backend ELF -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H +#define LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H + +using namespace llvm; +namespace { +class ARMAsmBackendELF : public ARMAsmBackend { +public: + uint8_t OSABI; + ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle) + : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createARMELFObjectWriter(OS, OSABI, isLittle()); + } +}; +} + +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h new file mode 100644 index 0000000..33be347 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h @@ -0,0 +1,26 @@ +//===-- ARMAsmBackendWinCOFF.h - ARM Asm Backend WinCOFF --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H +#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H + +using namespace llvm; + +namespace { +class ARMAsmBackendWinCOFF : public ARMAsmBackend { +public: + ARMAsmBackendWinCOFF(const Target &T, StringRef Triple) + : ARMAsmBackend(T, Triple, true) {} + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); + } +}; +} + +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 1686d76..4289a73 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMBASEINFO_H -#define ARMBASEINFO_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H #include "ARMMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 1c84263..f24b419 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -37,7 +37,8 @@ namespace { unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; - bool needsRelocateWithSymbol(unsigned Type) const override; + bool needsRelocateWithSymbol(const MCSymbolData &SD, + unsigned Type) const override; }; } @@ -48,7 +49,8 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) ARMELFObjectWriter::~ARMELFObjectWriter() {} -bool ARMELFObjectWriter::needsRelocateWithSymbol(unsigned Type) const { +bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, + unsigned Type) const { // FIXME: This is extremelly conservative. This really needs to use a // whitelist with a clear explanation for why each realocation needs to // point to the symbol, not to the section. @@ -100,7 +102,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_uncondbl: switch (Modifier) { case MCSymbolRefExpr::VK_PLT: - Type = ELF::R_ARM_PLT32; + Type = ELF::R_ARM_CALL; break; case MCSymbolRefExpr::VK_ARM_TLSCALL: Type = ELF::R_ARM_TLS_CALL; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 7b5d8b0..24ee537 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -848,6 +848,14 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so + // uses the FP_ARMV8_D16 build attribute. + case ARM::FPV5_D16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPARMv8B, + /* OverwriteExisting= */ false); + break; + case ARM::NEON: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3A, @@ -1339,10 +1347,9 @@ MCStreamer *createARMNullStreamer(MCContext &Ctx) { return S; } - MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack, - bool IsThumb) { +MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool IsThumb) { ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); new ARMTargetELFStreamer(*S); // FIXME: This should eventually end up somewhere else where more @@ -1352,8 +1359,6 @@ MCStreamer *createARMNullStreamer(MCContext &Ctx) { if (RelaxAll) S->getAssembler().setRelaxAll(true); - if (NoExecStack) - S->getAssembler().setNoExecStack(true); return S; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index bfd9e33..46ba571 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ARM_ARMFIXUPKINDS_H -#define LLVM_ARM_ARMFIXUPKINDS_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H #include "llvm/MC/MCFixup.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 7a19208..1d82099 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -55,7 +55,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; - HasLEB128 = true; SupportsDebugInformation = true; // Exceptions handling @@ -103,7 +102,6 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() { Code32Directive = ".code\t32"; PrivateGlobalPrefix = ".L"; - HasLEB128 = true; SupportsDebugInformation = true; ExceptionsType = ExceptionHandling::None; UseParensForSymbolVariant = true; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index 51cfa0a..f1fef41 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ARMTARGETASMINFO_H -#define LLVM_ARMTARGETASMINFO_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H #include "llvm/MC/MCAsmInfoCOFF.h" #include "llvm/MC/MCAsmInfoDarwin.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index e545e3c..68d32b2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -35,12 +35,6 @@ void ARMMCExpr::PrintImpl(raw_ostream &OS) const { OS << ')'; } -bool -ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { - return false; -} - void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const { Streamer.visitUsedExpr(*getSubExpr()); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index c5c0b10..06bf6c9 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMMCEXPR_H -#define ARMMCEXPR_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H #include "llvm/MC/MCExpr.h" @@ -58,8 +58,11 @@ public: void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const override; - void visitUsedExpr(MCStreamer &Streamer) const override; + const MCAsmLayout *Layout, + const MCFixup *Fixup) const override { + return false; + } + void visitUsedExpr(MCStreamer &Streamer) const override; const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 2b3855d..98190ba 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -84,93 +84,89 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { Triple triple(TT); - // Set the boolean corresponding to the current target triple, or the default - // if one cannot be determined, to true. - unsigned Len = TT.size(); - unsigned Idx = 0; - - // FIXME: Enhance Triple helper class to extract ARM version. bool isThumb = triple.getArch() == Triple::thumb || triple.getArch() == Triple::thumbeb; - if (Len >= 5 && TT.substr(0, 4) == "armv") - Idx = 4; - else if (Len >= 7 && TT.substr(0, 6) == "armebv") - Idx = 6; - else if (Len >= 7 && TT.substr(0, 6) == "thumbv") - Idx = 6; - else if (Len >= 9 && TT.substr(0, 8) == "thumbebv") - Idx = 8; bool NoCPU = CPU == "generic" || CPU.empty(); std::string ARMArchFeature; - if (Idx) { - unsigned SubVer = TT[Idx]; - if (SubVer == '8') { - if (NoCPU) - // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, - // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, - // FeatureT2XtPk, FeatureCrypto, FeatureCRC - ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," - "+trustzone,+t2xtpk,+crypto,+crc"; - else - // Use CPU to figure out the exact features - ARMArchFeature = "+v8"; - } else if (SubVer == '7') { - if (Len >= Idx+2 && TT[Idx+1] == 'm') { - isThumb = true; - if (NoCPU) - // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass - ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') { - if (NoCPU) - // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, - // FeatureT2XtPk, FeatureMClass - ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - } else if (Len >= Idx+2 && TT[Idx+1] == 's') { - if (NoCPU) - // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS - // Swift - ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - } else { - // v7 CPUs have lots of different feature sets. If no CPU is specified, - // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return - // the "minimum" feature set and use CPU string to figure out the exact - // features. - if (NoCPU) - // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk - ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - } - } else if (SubVer == '6') { - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') - ARMArchFeature = "+v6t2"; - else if (Len >= Idx+2 && TT[Idx+1] == 'm') { - isThumb = true; - if (NoCPU) - // v6m: FeatureNoARM, FeatureMClass - ARMArchFeature = "+v6m,+noarm,+mclass"; - else - ARMArchFeature = "+v6"; - } else - ARMArchFeature = "+v6"; - } else if (SubVer == '5') { - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') - ARMArchFeature = "+v5te"; - else - ARMArchFeature = "+v5t"; - } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't') - ARMArchFeature = "+v4t"; + switch (triple.getSubArch()) { + default: + llvm_unreachable("invalid sub-architecture for ARM"); + case Triple::ARMSubArch_v8: + if (NoCPU) + // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, + // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, + // FeatureT2XtPk, FeatureCrypto, FeatureCRC + ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," + "+trustzone,+t2xtpk,+crypto,+crc"; + else + // Use CPU to figure out the exact features + ARMArchFeature = "+v8"; + break; + case Triple::ARMSubArch_v7m: + isThumb = true; + if (NoCPU) + // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; + break; + case Triple::ARMSubArch_v7em: + if (NoCPU) + // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, + // FeatureT2XtPk, FeatureMClass + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; + break; + case Triple::ARMSubArch_v7s: + if (NoCPU) + // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS + // Swift + ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; + break; + case Triple::ARMSubArch_v7: + // v7 CPUs have lots of different feature sets. If no CPU is specified, + // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return + // the "minimum" feature set and use CPU string to figure out the exact + // features. + if (NoCPU) + // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk + ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; + break; + case Triple::ARMSubArch_v6t2: + ARMArchFeature = "+v6t2"; + break; + case Triple::ARMSubArch_v6m: + isThumb = true; + if (NoCPU) + // v6m: FeatureNoARM, FeatureMClass + ARMArchFeature = "+v6m,+noarm,+mclass"; + else + ARMArchFeature = "+v6"; + break; + case Triple::ARMSubArch_v6: + ARMArchFeature = "+v6"; + break; + case Triple::ARMSubArch_v5te: + ARMArchFeature = "+v5te"; + break; + case Triple::ARMSubArch_v5: + ARMArchFeature = "+v5t"; + break; + case Triple::ARMSubArch_v4t: + ARMArchFeature = "+v4t"; + break; + case Triple::NoSubArch: + break; } if (isThumb) { @@ -221,31 +217,14 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); MCAsmInfo *MAI; - switch (TheTriple.getOS()) { - case llvm::Triple::Darwin: - case llvm::Triple::IOS: - case llvm::Triple::MacOSX: + if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO()) MAI = new ARMMCAsmInfoDarwin(TT); - break; - case llvm::Triple::Win32: - switch (TheTriple.getEnvironment()) { - case llvm::Triple::Itanium: - MAI = new ARMCOFFMCAsmInfoGNU(); - break; - case llvm::Triple::MSVC: - MAI = new ARMCOFFMCAsmInfoMicrosoft(); - break; - default: - llvm_unreachable("invalid environment"); - } - break; - default: - if (TheTriple.isOSBinFormatMachO()) - MAI = new ARMMCAsmInfoDarwin(TT); - else - MAI = new ARMELFMCAsmInfo(TT); - break; - } + else if (TheTriple.isWindowsItaniumEnvironment()) + MAI = new ARMCOFFMCAsmInfoGNU(); + else if (TheTriple.isWindowsMSVCEnvironment()) + MAI = new ARMCOFFMCAsmInfoMicrosoft(); + else + MAI = new ARMELFMCAsmInfo(TT); unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true); MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0)); @@ -269,11 +248,8 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, // This is duplicated code. Refactor this. static MCStreamer *createMCStreamer(const Target &T, StringRef TT, MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, - bool RelaxAll, - bool NoExecStack) { + raw_ostream &OS, MCCodeEmitter *Emitter, + const MCSubtargetInfo &STI, bool RelaxAll) { Triple TheTriple(TT); switch (TheTriple.getObjectFormat()) { @@ -287,7 +263,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS); case Triple::ELF: - return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack, + return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, TheTriple.getArch() == Triple::thumb); } } @@ -362,8 +338,10 @@ extern "C" void LLVMInitializeARMTargetMC() { // Register the MC codegen info. TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo); TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget, createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget, createARMMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget, + createARMMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget, + createARMMCCodeGenInfo); // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 5326e56..a6c20d5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMMCTARGETDESC_H -#define ARMMCTARGETDESC_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H #include "llvm/Support/DataTypes.h" #include <string> diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 186776a..7da5003 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -428,7 +428,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // For external relocations, make sure to offset the fixup value to // compensate for the addend of the symbol address, if it was // undefined. This occurs with weak definitions, for example. - if (!SD->Symbol->isUndefined()) + if (!SD->getSymbol().isUndefined()) FixedValue -= Layout.getSymbolOffset(SD); } else { // The index is the section ordinal (1-based). diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index ad3f1ca..8acd7af 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -28,7 +28,7 @@ ARMTargetStreamer::~ARMTargetStreamer() {} // The constant pool handling is shared by all ARMTargetStreamer // implementations. const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr) { - return ConstantPools->addEntry(Streamer, Expr); + return ConstantPools->addEntry(Streamer, Expr, 4); } void ARMTargetStreamer::emitCurrentConstantPool() { diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h index cd58759..e0c113e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h +++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARM_UNWIND_OP_ASM_H -#define ARM_UNWIND_OP_ASM_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ARMEHABI.h" @@ -90,4 +90,4 @@ private: } // namespace llvm -#endif // ARM_UNWIND_OP_ASM_H +#endif diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt index 2a7fe61..db8fc92 100644 --- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = ARMDesc parent = ARM -required_libraries = ARMAsmPrinter ARMInfo MC Support +required_libraries = ARMAsmPrinter ARMInfo MC MCDisassembler Support add_to_library_groups = ARM diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index f6d24e9..35fe9b3 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -378,8 +378,8 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { } bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { - TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); - TRI = Fn.getTarget().getRegisterInfo(); + TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); + TRI = Fn.getSubtarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); isLikeA9 = STI->isLikeA9() || STI->isSwift(); diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index f069535..c1601a3 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -15,7 +15,7 @@ TARGET = ARM BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \ ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \ - ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ + ARMGenCallingConv.inc \ ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index baa97a7..6deab4f 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -52,9 +52,9 @@ void Thumb1FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const Thumb1InstrInfo &TII = - *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); + *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo()); + const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount @@ -89,12 +89,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); + const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); const Thumb1InstrInfo &TII = - *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo()); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); assert(NumBytes >= ArgRegsSaveSize && @@ -321,12 +324,15 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); + const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); const Thumb1InstrInfo &TII = - *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo()); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && @@ -382,28 +388,65 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, } } - if (ArgRegsSaveSize) { - // Unlike T2 and ARM mode, the T1 pop instruction cannot restore - // to LR, and we can't pop the value directly to the PC since - // we need to update the SP after popping the value. Therefore, we - // pop the old LR into R3 as a temporary. - + bool IsV4PopReturn = false; + for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo()) + if (CSI.getReg() == ARM::LR) + IsV4PopReturn = true; + IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps(); + + // Unlike T2 and ARM mode, the T1 pop instruction cannot restore + // to LR, and we can't pop the value directly to the PC since + // we need to update the SP after popping the value. So instead + // we have to emit: + // POP {r3} + // ADD sp, #offset + // BX r3 + // If this would clobber a return value, then generate this sequence instead: + // MOV ip, r3 + // POP {r3} + // ADD sp, #offset + // MOV lr, r3 + // MOV r3, ip + // BX lr + if (ArgRegsSaveSize || IsV4PopReturn) { // Get the last instruction, tBX_RET MBBI = MBB.getLastNonDebugInstr(); assert (MBBI->getOpcode() == ARM::tBX_RET); - // Epilogue for vararg functions: pop LR to R3 and branch off it. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(ARM::R3, RegState::Define); - - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) - .addReg(ARM::R3, RegState::Kill); - AddDefaultPred(MIB); - MIB.copyImplicitOps(&*MBBI); - // erase the old tBX_RET instruction - MBB.erase(MBBI); + DebugLoc dl = MBBI->getDebugLoc(); + + if (AFI->getReturnRegsCount() <= 3) { + // Epilogue: pop saved LR to R3 and branch off it. + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(ARM::R3, RegState::Define); + + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) + .addReg(ARM::R3, RegState::Kill); + AddDefaultPred(MIB); + MIB.copyImplicitOps(&*MBBI); + // erase the old tBX_RET instruction + MBB.erase(MBBI); + } else { + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::R12, RegState::Define) + .addReg(ARM::R3, RegState::Kill)); + + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(ARM::R3, RegState::Define); + + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); + + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::R3, RegState::Kill)); + + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::R3, RegState::Define) + .addReg(ARM::R12, RegState::Kill)); + // Keep the tBX_RET instruction + } } } @@ -417,7 +460,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL; MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -456,7 +499,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI->getDebugLoc(); @@ -470,6 +513,9 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // Special epilogue for vararg functions. See emitEpilogue if (isVarArg) continue; + // ARMv4T requires BX, see emitEpilogue + if (STI.hasV4TOps() && !STI.hasV5TOps()) + continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); MIB.copyImplicitOps(&*MI); diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index a227f8e..b785b28 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ARM_THUMB1FRAMELOWERING_H -#define LLVM_ARM_THUMB1FRAMELOWERING_H +#ifndef LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H +#define LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H #include "ARMFrameLowering.h" #include "Thumb1InstrInfo.h" diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 68cbb5c..8ea912e 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -41,10 +42,30 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc))); + // Need to check the arch. + MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &st = MF.getTarget().getSubtarget<ARMSubtarget>(); + assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && "Thumb1 can only copy GPR registers"); + + if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) + || !ARM::tGPRRegClass.contains(DestReg)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc))); + else { + // FIXME: The performance consequences of this are going to be atrocious. + // Some things to try that should be better: + // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11 + // * 'movs $dst, $src' if cpsr isn't live + // See: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html + + // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH))) + .addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP))) + .addReg(DestReg, getDefRegState(true)); + } } void Thumb1InstrInfo:: @@ -101,3 +122,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } + +void +Thumb1InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const { + if (RM == Reloc::PIC_) + expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi, RM); + else + expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi, RM); +} diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index c5845b7..9fba760 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef THUMB1INSTRUCTIONINFO_H -#define THUMB1INSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H +#define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H #include "ARMBaseInstrInfo.h" #include "Thumb1RegisterInfo.h" @@ -54,7 +54,10 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; +private: + void expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const override; }; } -#endif // THUMB1INSTRUCTIONINFO_H +#endif diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index f907b14..c10c809 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -66,8 +66,12 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, int Val, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { + assert((isARMLowRegister(DestReg) || + isVirtualRegister(DestReg)) && + "Thumb1 does not have ldr to high register"); + MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); @@ -106,15 +110,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, NumBytes = -NumBytes; } unsigned LdReg = DestReg; - if (DestReg == ARM::SP) { + if (DestReg == ARM::SP) assert(BaseReg == ARM::SP && "Unexpected!"); + if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg)) LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); - } - if (NumBytes <= 255 && NumBytes >= 0) + if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) { AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) .addImm(NumBytes).setMIFlags(MIFlags); - else if (NumBytes < 0 && NumBytes >= -255) { + } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) { AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) .addImm(NumBytes).setMIFlags(MIFlags); AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) @@ -124,7 +128,8 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, ARMCC::AL, 0, MIFlags); // Emit add / sub. - int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); + int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr + : ARM::tADDrr); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (Opc != ARM::tADDhirr) @@ -136,32 +141,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, AddDefaultPred(MIB); } -/// calcNumMI - Returns the number of instructions required to materialize -/// the specific add / sub r, c instruction. -static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, - unsigned NumBits, unsigned Scale) { - unsigned NumMIs = 0; - unsigned Chunk = ((1 << NumBits) - 1) * Scale; - - if (Opc == ARM::tADDrSPi) { - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - NumMIs++; - NumBits = 8; - Scale = 1; // Followed by a number of tADDi8. - Chunk = ((1 << NumBits) - 1) * Scale; - } - - NumMIs += Bytes / Chunk; - if ((Bytes % Chunk) != 0) - NumMIs++; - if (ExtraOpc) - NumMIs++; - return NumMIs; -} - /// emitThumbRegPlusImmediate - Emits a series of instructions to materialize -/// a destreg = basereg + immediate in Thumb code. +/// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or +/// SUBs first, and uses a constant pool value if the instruction sequence would +/// be too long. This is allowed to modify the condition flags. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, @@ -172,151 +155,146 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; - bool isMul4 = (Bytes & 3) == 0; - bool isTwoAddr = false; - bool DstNotEqBase = false; - unsigned NumBits = 1; - unsigned Scale = 1; - int Opc = 0; + + int CopyOpc = 0; + unsigned CopyBits = 0; + unsigned CopyScale = 1; + bool CopyNeedsCC = false; int ExtraOpc = 0; - bool NeedCC = false; - - if (DestReg == BaseReg && BaseReg == ARM::SP) { - assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); - NumBits = 7; - Scale = 4; - Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - isTwoAddr = true; - } else if (!isSub && BaseReg == ARM::SP) { - // r1 = add sp, 403 - // => - // r1 = add sp, 100 * 4 - // r1 = add r1, 3 - if (!isMul4) { - Bytes &= ~3; - ExtraOpc = ARM::tADDi3; + unsigned ExtraBits = 0; + unsigned ExtraScale = 1; + bool ExtraNeedsCC = false; + + // Strategy: + // We need to select two types of instruction, maximizing the available + // immediate range of each. The instructions we use will depend on whether + // DestReg and BaseReg are low, high or the stack pointer. + // * CopyOpc - DestReg = BaseReg + imm + // This will be emitted once if DestReg != BaseReg, and never if + // DestReg == BaseReg. + // * ExtraOpc - DestReg = DestReg + imm + // This will be emitted as many times as necessary to add the + // full immediate. + // If the immediate ranges of these instructions are not large enough to cover + // NumBytes with a reasonable number of instructions, we fall back to using a + // value loaded from a constant pool. + if (DestReg == ARM::SP) { + if (BaseReg == ARM::SP) { + // sp -> sp + // Already in right reg, no copy needed + } else { + // low -> sp or high -> sp + CopyOpc = ARM::tMOVr; + CopyBits = 0; } - NumBits = 8; - Scale = 4; - Opc = ARM::tADDrSPi; - } else { - // sp = sub sp, c - // r1 = sub sp, c - // r8 = sub sp, c - if (DestReg != BaseReg) - DstNotEqBase = true; - NumBits = 8; - if (DestReg == ARM::SP) { - Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); - NumBits = 7; - Scale = 4; + ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi; + ExtraBits = 7; + ExtraScale = 4; + } else if (isARMLowRegister(DestReg)) { + if (BaseReg == ARM::SP) { + // sp -> low + assert(!isSub && "Thumb1 does not have tSUBrSPi"); + CopyOpc = ARM::tADDrSPi; + CopyBits = 8; + CopyScale = 4; + } else if (DestReg == BaseReg) { + // low -> same low + // Already in right reg, no copy needed + } else if (isARMLowRegister(BaseReg)) { + // low -> different low + CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3; + CopyBits = 3; + CopyNeedsCC = true; } else { - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - NumBits = 8; - NeedCC = true; + // high -> low + CopyOpc = ARM::tMOVr; + CopyBits = 0; } - isTwoAddr = true; + ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + ExtraBits = 8; + ExtraNeedsCC = true; + } else /* DestReg is high */ { + if (DestReg == BaseReg) { + // high -> same high + // Already in right reg, no copy needed + } else { + // {low,high,sp} -> high + CopyOpc = ARM::tMOVr; + CopyBits = 0; + } + ExtraOpc = 0; } - unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); + // We could handle an unaligned immediate with an unaligned copy instruction + // and an aligned extra instruction, but this case is not currently needed. + assert(((Bytes & 3) == 0 || ExtraScale == 1) && + "Unaligned offset, but all instructions require alignment"); + + unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale; + // If we would emit the copy with an immediate of 0, just use tMOVr. + if (CopyOpc && Bytes < CopyScale) { + CopyOpc = ARM::tMOVr; + CopyBits = 0; + CopyScale = 1; + CopyNeedsCC = false; + CopyRange = 0; + } + unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction + unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0; + unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange); + + // We could handle this case when the copy instruction does not require an + // aligned immediate, but we do not currently do this. + assert(RangeAfterCopy % ExtraScale == 0 && + "Extra instruction requires immediate to be aligned"); + + unsigned RequiredExtraInstrs; + if (ExtraRange) + RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange; + else if (RangeAfterCopy > 0) + // We need an extra instruction but none is available + RequiredExtraInstrs = 1000000; + else + RequiredExtraInstrs = 0; + unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs; unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; - if (NumMIs > Threshold) { - // This will expand into too many instructions. Load the immediate from a - // constpool entry. + + // Use a constant pool, if the sequence of ADDs/SUBs is too expensive. + if (RequiredInstrs > Threshold) { emitThumbRegPlusImmInReg(MBB, MBBI, dl, DestReg, BaseReg, NumBytes, true, TII, MRI, MIFlags); return; } - if (DstNotEqBase) { - if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) { - // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) - unsigned Chunk = (1 << 3) - 1; - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); - const MachineInstrBuilder MIB = - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg) - .setMIFlags(MIFlags)); - AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); - } else { - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(BaseReg, RegState::Kill)) - .setMIFlags(MIFlags); + // Emit zero or one copy instructions + if (CopyOpc) { + unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale; + Bytes -= CopyImm * CopyScale; + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg); + if (CopyNeedsCC) + MIB = AddDefaultT1CC(MIB); + MIB.addReg(BaseReg, RegState::Kill); + if (CopyOpc != ARM::tMOVr) { + MIB.addImm(CopyImm); } + AddDefaultPred(MIB.setMIFlags(MIFlags)); + BaseReg = DestReg; } - unsigned Chunk = ((1 << NumBits) - 1) * Scale; + // Emit zero or more in-place add/sub instructions while (Bytes) { - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - ThisVal /= Scale; - // Build the new tADD / tSUB. - if (isTwoAddr) { - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); - if (NeedCC) - MIB = AddDefaultT1CC(MIB); - MIB.addReg(DestReg).addImm(ThisVal); - MIB = AddDefaultPred(MIB); - MIB.setMIFlags(MIFlags); - } else { - bool isKill = BaseReg != ARM::SP; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); - if (NeedCC) - MIB = AddDefaultT1CC(MIB); - MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); - MIB = AddDefaultPred(MIB); - MIB.setMIFlags(MIFlags); - - BaseReg = DestReg; - if (Opc == ARM::tADDrSPi) { - // r4 = add sp, imm - // r4 = add r4, imm - // ... - NumBits = 8; - Scale = 1; - Chunk = ((1 << NumBits) - 1) * Scale; - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - NeedCC = isTwoAddr = true; - } - } - } - - if (ExtraOpc) { - const MCInstrDesc &MCID = TII.get(ExtraOpc); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) - .addReg(DestReg, RegState::Kill) - .addImm(((unsigned)NumBytes) & 3) - .setMIFlags(MIFlags)); - } -} + unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale; + Bytes -= ExtraImm * ExtraScale; -/// emitThumbConstant - Emit a series of instructions to materialize a -/// constant. -static void emitThumbConstant(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, int Imm, - const TargetInstrInfo &TII, - const Thumb1RegisterInfo& MRI, - DebugLoc dl) { - bool isSub = Imm < 0; - if (isSub) Imm = -Imm; - - int Chunk = (1 << 8) - 1; - int ThisVal = (Imm > Chunk) ? Chunk : Imm; - Imm -= ThisVal; - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), - DestReg)) - .addImm(ThisVal)); - if (Imm > 0) - emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI); - if (isSub) { - const MCInstrDesc &MCID = TII.get(ARM::tRSB); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) - .addReg(DestReg, RegState::Kill)); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg); + if (ExtraNeedsCC) + MIB = AddDefaultT1CC(MIB); + MIB.addReg(BaseReg).addImm(ExtraImm); + MIB = AddDefaultPred(MIB); + MIB.setMIFlags(MIFlags); } } @@ -352,86 +330,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - if (Opcode == ARM::tADDrSPi) { + if (Opcode == ARM::tADDframe) { Offset += MI.getOperand(FrameRegIdx+1).getImm(); - - // Can't use tADDrSPi if it's based off the frame pointer. - unsigned NumBits = 0; - unsigned Scale = 1; - if (FrameReg != ARM::SP) { - Opcode = ARM::tADDi3; - NumBits = 3; - } else { - NumBits = 8; - Scale = 4; - assert((Offset & 3) == 0 && - "Thumb add/sub sp, #imm immediate must be multiple of 4!"); - } - - unsigned PredReg; - if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { - // Turn it into a move. - MI.setDesc(TII.get(ARM::tMOVr)); - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - // Remove offset - MI.RemoveOperand(FrameRegIdx+1); - return true; - } - - // Common case: small offset, fits into instruction. - unsigned Mask = (1 << NumBits) - 1; - if (((Offset / Scale) & ~Mask) == 0) { - // Replace the FrameIndex with sp / fp - if (Opcode == ARM::tADDi3) { - MI.setDesc(TII.get(Opcode)); - removeOperands(MI, FrameRegIdx); - AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) - .addImm(Offset / Scale)); - } else { - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale); - } - return true; - } - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned Bytes = (Offset > 0) ? Offset : -Offset; - unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale); - // MI would expand into a large number of instructions. Don't try to - // simplify the immediate. - if (NumMIs > 2) { - emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, - *this); - MBB.erase(II); - return true; - } - if (Offset > 0) { - // Translate r0 = add sp, imm to - // r0 = add sp, 255*4 - // r0 = add r0, (imm - 255*4) - if (Opcode == ARM::tADDi3) { - MI.setDesc(TII.get(Opcode)); - removeOperands(MI, FrameRegIdx); - AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); - } else { - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask); - } - Offset = (Offset - Mask * Scale); - MachineBasicBlock::iterator NII = std::next(II); - emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII, - *this); - } else { - // Translate r0 = add sp, -imm to - // r0 = -imm (this is then translated into a series of instructions) - // r0 = add r0, sp - emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); - - MI.setDesc(TII.get(ARM::tADDhirr)); - MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true); - MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false); - } + emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, + *this); + MBB.erase(II); return true; } else { if (AddrMode != ARMII::AddrModeT1_s) @@ -485,8 +390,11 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>( - MI.getParent()->getParent()->getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MI.getParent() + ->getParent() + ->getTarget() + .getSubtargetImpl() + ->getInstrInfo()); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -512,7 +420,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // off the frame pointer (if, for example, there are alloca() calls in // the function, the offset will be negative. Use R12 instead since that's // a call clobbered register that we know won't be used in Thumb1 mode. - const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); DebugLoc DL; AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) .addReg(ARM::R12, RegState::Define) @@ -559,7 +467,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); MachineInstrBuilder MIB(*MBB.getParent(), &MI); @@ -570,7 +478,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MF.getFrameInfo()->getStackSize() + SPAdj; if (MF.getFrameInfo()->hasVarSizedObjects()) { - assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) && + assert(SPAdj == 0 && MF.getSubtarget().getFrameLowering()->hasFP(MF) && "Unexpected"); // There are alloca()'s in this function, must reference off the frame // pointer or base pointer instead. @@ -587,7 +495,10 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // when !hasReservedCallFrame(). #ifndef NDEBUG if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){ - assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) && + assert(MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); assert(!MF.getFrameInfo()->hasVarSizedObjects() && diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 0c0abbe..5feaf52 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef THUMB1REGISTERINFO_H -#define THUMB1REGISTERINFO_H +#ifndef LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H +#define LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H #include "ARMBaseRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -60,4 +60,4 @@ public: }; } -#endif // THUMB1REGISTERINFO_H +#endif diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index edb9ff3..fdcb522 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -188,7 +188,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { true/*isImp*/, false/*isKill*/)); MachineInstr *LastITMI = MI; - MachineBasicBlock::iterator InsertPos = MIB; + MachineBasicBlock::iterator InsertPos = MIB.getInstr(); ++MBBI; // Form IT block. @@ -255,8 +255,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); AFI = Fn.getInfo<ARMFunctionInfo>(); - TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); - TRI = TM.getRegisterInfo(); + TII = static_cast<const Thumb2InstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); + TRI = TM.getSubtargetImpl()->getRegisterInfo(); restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT(); if (!AFI->isThumbFunction()) diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index a9df006..91973e1 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -209,6 +209,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } +void +Thumb2InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const { + if (RM == Reloc::PIC_) + expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12, RM); + else + expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12, RM); +} + void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 34d45d3..46a1f6d 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef THUMB2INSTRUCTIONINFO_H -#define THUMB2INSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H +#define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H #include "ARMBaseInstrInfo.h" #include "Thumb2RegisterInfo.h" @@ -61,6 +61,10 @@ public: /// always be able to get register info as well (through this method). /// const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; } + +private: + void expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const override; }; /// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical @@ -71,4 +75,4 @@ ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg); } -#endif // THUMB2INSTRUCTIONINFO_H +#endif diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 782d81f..0d5d85a 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -40,7 +40,7 @@ Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index 8a33e6c..1dd94cc 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef THUMB2REGISTERINFO_H -#define THUMB2REGISTERINFO_H +#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H +#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H #include "ARMBaseRegisterInfo.h" @@ -35,4 +35,4 @@ public: }; } -#endif // THUMB2REGISTERINFO_H +#endif diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 09debe7..c51eb8b 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -335,7 +335,7 @@ static bool VerifyLowRegs(MachineInstr *MI) { bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA || Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD || Opc == ARM::t2LDMDB_UPD); - bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD); + bool isLROk = (Opc == ARM::t2STMDB_UPD); bool isSPOk = isPCOk || isLROk; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); @@ -384,7 +384,6 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, if (MI->getOperand(1).getReg() == ARM::SP) { Opc = Entry.NarrowOpc2; ImmLimit = Entry.Imm2Limit; - HasOffReg = false; } Scale = 4; @@ -1003,7 +1002,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); - TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); + TII = static_cast<const Thumb2InstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); STI = &TM.getSubtarget<ARMSubtarget>(); // Optimizing / minimizing size? |