From 3ef1c8759a20167457eb7fd82ebcaffe7ccaa1d1 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 10 Sep 2010 01:29:16 +0000 Subject: Teach if-converter to be more careful with predicating instructions that would take multiple cycles to decode. For the current if-converter clients (actually only ARM), the instructions that are predicated on false are not nops. They would still take machine cycles to decode. Micro-coded instructions such as LDM / STM can potentially take multiple cycles to decode. If-converter should take treat them as non-micro-coded simple instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113570 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.td | 15 ++++++++++--- lib/Target/ARM/ARMBaseInstrInfo.cpp | 40 +++++++++++++++++++++++---------- lib/Target/ARM/ARMBaseInstrInfo.h | 2 +- lib/Target/ARM/ARMISelLowering.cpp | 4 ++-- lib/Target/ARM/ARMISelLowering.h | 2 ++ lib/Target/ARM/ARMSubtarget.cpp | 3 ++- lib/Target/ARM/ARMSubtarget.h | 10 +++++++++ lib/Target/ARM/ARMTargetMachine.h | 4 ++-- lib/Target/ARM/Thumb2HazardRecognizer.h | 2 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 2 +- lib/Target/ARM/Thumb2InstrInfo.h | 2 +- 11 files changed, 62 insertions(+), 24 deletions(-) (limited to 'lib/Target/ARM') diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index d6a8f19..f3693e3 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -91,6 +91,15 @@ def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", include "ARMSchedule.td" +// ARM processor families. +def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others", + "One of the other ARM processor families">; +def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", + "Cortex-A8 ARM processors", + [FeatureSlowFPBrcc, FeatureNEONForFP]>; +def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", + "Cortex-A9 ARM processors">; + class ProcNoItin Features> : Processor; @@ -150,10 +159,10 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>; // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, - [ArchV7A, FeatureHasSlowVMLx, - FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>; + [ArchV7A, ProcA8, + FeatureHasSlowVMLx, FeatureT2XtPk]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, FeatureT2XtPk]>; + [ArchV7A, ProcA9, FeatureT2XtPk]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [ArchV7M]>; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c824b8b..e7b35c6 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1415,13 +1415,13 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { unsigned ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI, - const InstrItineraryData &ItinData) const { - if (ItinData.isEmpty()) + const InstrItineraryData *ItinData) const { + if (!ItinData || ItinData->isEmpty()) return 1; const TargetInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); - unsigned UOps = ItinData.Itineratries[Class].NumMicroOps; + unsigned UOps = ItinData->Itineratries[Class].NumMicroOps; if (UOps) return UOps; @@ -1430,16 +1430,19 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI, default: llvm_unreachable("Unexpected multi-uops instruction!"); break; + case ARM::VLDMQ: case ARM::VSTMQ: return 2; // The number of uOps for load / store multiple are determined by the number // registers. - // On Cortex-A8, each odd / even pair of register loads / stores - // (e.g. r5 + r6) can be completed on the same cycle. The minimum is - // 2. For VFP / NEON load / store multiple, the formula is + // On Cortex-A8, each pair of register loads / stores can be scheduled on the + // same cycle. The scheduling for the first load / store must be done + // separately by assuming the the address is not 64-bit aligned. + // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address + // is not 64-bit aligned, then AGU would take an extra cycle. + // For VFP / NEON load / store multiple, the formula is // (#reg / 2) + (#reg % 2) + 1. - // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). case ARM::VLDMD: case ARM::VLDMS: case ARM::VLDMD_UPD: @@ -1467,11 +1470,24 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI, case ARM::t2LDM_UPD: case ARM::t2STM: case ARM::t2STM_UPD: { - // FIXME: Distinquish between Cortex-A8 / Cortex-A9 and other processor - // families. - unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); - UOps = (NumRegs / 2) + (NumRegs % 2); - return (UOps > 2) ? UOps : 2; + unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; + if (Subtarget.isCortexA8()) { + // 4 registers would be issued: 1, 2, 1. + // 5 registers would be issued: 1, 2, 2. + return 1 + (NumRegs / 2); + } else if (Subtarget.isCortexA9()) { + UOps = (NumRegs / 2); + // If there are odd number of registers or if it's not 64-bit aligned, + // then it takes an extra AGU (Address Generation Unit) cycle. + if ((NumRegs % 2) || + !MI->hasOneMemOperand() || + (*MI->memoperands_begin())->getAlignment() < 8) + ++UOps; + return UOps; + } else { + // Assume the worst. + return NumRegs; + } } } } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b3abdee..f471b67 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -350,7 +350,7 @@ public: MachineInstr *CmpInstr) const; virtual unsigned getNumMicroOps(const MachineInstr *MI, - const InstrItineraryData &ItinData) const; + const InstrItineraryData *ItinData) const; }; static inline diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d4198a5..637c6e3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -177,6 +177,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget(); RegInfo = TM.getRegisterInfo(); + Itins = TM.getInstrItineraryData(); if (Subtarget->isTargetDarwin()) { // Uses VFP for Thumb libfuncs if available. @@ -749,8 +750,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { if (TID.mayLoad()) return Sched::Latency; - const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData(); - if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2) + if (!Itins->isEmpty() && Itins->getStageLatency(TID.getSchedClass()) > 2) return Sched::Latency; return Sched::RegPressure; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index ba9ea7f..58b8b9e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -301,6 +301,8 @@ namespace llvm { const TargetRegisterInfo *RegInfo; + const InstrItineraryData *Itins; + /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. /// unsigned ARMPCLabelIndex; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index cb539f4..8a4052b 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -30,6 +30,7 @@ UseMOVT("arm-use-movt", ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isT) : ARMArchVersion(V4) + , ARMProcFamily(Others) , ARMFPUType(None) , UseNEONForSinglePrecisionFP(false) , SlowVMLx(false) @@ -50,7 +51,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. , TargetABI(ARM_ABI_APCS) { - // default to soft float ABI + // Default to soft float ABI if (FloatABIType == FloatABI::Default) FloatABIType = FloatABI::Soft; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 67e5803..34f571f 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -29,6 +29,10 @@ protected: V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M }; + enum ARMProcFamilyEnum { + Others, CortexA8, CortexA9 + }; + enum ARMFPEnum { None, VFPv2, VFPv3, NEON }; @@ -42,6 +46,9 @@ protected: /// V6, V6T2, V7A, V7M. ARMArchEnum ARMArchVersion; + /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. + ARMProcFamilyEnum ARMProcFamily; + /// ARMFPUType - Floating Point Unit type. ARMFPEnum ARMFPUType; @@ -143,6 +150,9 @@ protected: bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; } bool hasV7Ops() const { return ARMArchVersion >= V7A; } + bool isCortexA8() const { return ARMProcFamily == CortexA8; } + bool isCortexA9() const { return ARMProcFamily == CortexA9; } + bool hasARMOps() const { return !NoARM; } bool hasVFP2() const { return ARMFPUType >= VFPv2; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 17e5425..9b375d7 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -45,8 +45,8 @@ public: virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const InstrItineraryData getInstrItineraryData() const { - return InstrItins; + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; } // Pass Pipeline Configuration diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h index 4726658..aa4411f 100644 --- a/lib/Target/ARM/Thumb2HazardRecognizer.h +++ b/lib/Target/ARM/Thumb2HazardRecognizer.h @@ -26,7 +26,7 @@ class Thumb2HazardRecognizer : public PostRAHazardRecognizer { MachineInstr *ITBlockMIs[4]; public: - Thumb2HazardRecognizer(const InstrItineraryData &ItinData) : + Thumb2HazardRecognizer(const InstrItineraryData *ItinData) : PostRAHazardRecognizer(ItinData) {} virtual HazardType getHazardType(SUnit *SU); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 442f41d..962b312 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -194,7 +194,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } ScheduleHazardRecognizer *Thumb2InstrInfo:: -CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const { return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II); } diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 3a9f8b1..b66be8e 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -72,7 +72,7 @@ public: const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } ScheduleHazardRecognizer * - CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const; + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const; }; /// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical -- cgit v1.1