diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/MachineLICM.cpp | 88 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 20 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.h | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 35 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 5 |
5 files changed, 101 insertions, 52 deletions
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 607e8f1..829fae6 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -43,11 +43,6 @@ using namespace llvm; -static cl::opt<bool> -TrackRegPressure("rp-aware-machine-licm", - cl::desc("Register pressure aware machine LICM"), - cl::init(false), cl::Hidden); - STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumLowRP, @@ -124,6 +119,7 @@ namespace { RegSeen.clear(); RegPressure.clear(); RegLimit.clear(); + BackTrace.clear(); for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) CI->second.clear(); @@ -171,9 +167,10 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); - /// ComputeOperandLatency - Compute operand latency between a def of 'Reg' - /// and an use in the current loop. - int ComputeOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg); + /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' + /// and an use in the current loop, return true if the target considered + /// it 'high'. + bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg); /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check /// if hoisting an instruction of the given cost matrix can cause high @@ -556,28 +553,24 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { if (!Preheader) return; - if (TrackRegPressure) { - if (IsHeader) { - // Compute registers which are liveout of preheader. - RegSeen.clear(); - BackTrace.clear(); - InitRegPressure(Preheader); - } - - // Remember livein register pressure. - BackTrace.push_back(RegPressure); + if (IsHeader) { + // Compute registers which are liveout of preheader. + RegSeen.clear(); + BackTrace.clear(); + InitRegPressure(Preheader); } + // Remember livein register pressure. + BackTrace.push_back(RegPressure); + for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ) { MachineBasicBlock::iterator NextMII = MII; ++NextMII; MachineInstr *MI = &*MII; - if (TrackRegPressure) - UpdateRegPressureBefore(MI); + UpdateRegPressureBefore(MI); Hoist(MI, Preheader); - if (TrackRegPressure) - UpdateRegPressureAfter(MI); + UpdateRegPressureAfter(MI); MII = NextMII; } @@ -591,8 +584,7 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { HoistRegion(Children[I]); } - if (TrackRegPressure) - BackTrace.pop_back(); + BackTrace.pop_back(); } /// InitRegPressure - Find all virtual register references that are liveout of @@ -788,15 +780,14 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { } } -/// ComputeOperandLatency - Compute operand latency between a def of 'Reg' -/// and an use in the current loop. -int MachineLICM::ComputeOperandLatency(MachineInstr &MI, - unsigned DefIdx, unsigned Reg) { +/// HasHighOperandLatency - Compute operand latency between a def of 'Reg' +/// and an use in the current loop, return true if the target considered +/// it 'high'. +bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, + unsigned DefIdx, unsigned Reg) { if (MRI->use_nodbg_empty(Reg)) - // No use? Return arbitrary large number! - return 300; + return false; - int Latency = -1; for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; ++I) { MachineInstr *UseMI = &*I; @@ -810,18 +801,15 @@ int MachineLICM::ComputeOperandLatency(MachineInstr &MI, if (MOReg != Reg) continue; - int UseCycle = TII->getOperandLatency(InstrItins, &MI, DefIdx, UseMI, i); - Latency = std::max(Latency, UseCycle); + if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) + return true; } - if (Latency != -1) - break; + // Only look at the first in loop use. + break; } - if (Latency == -1) - Latency = InstrItins->getOperandCycle(MI.getDesc().getSchedClass(), DefIdx); - - return Latency; + return false; } /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check @@ -855,19 +843,19 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; - // FIXME: For now, only hoist re-materilizable instructions. LICM will - // increase register pressure. We want to make sure it doesn't increase - // spilling. + // If the instruction is cheap, only hoist if it is re-materilizable. LICM + // will increase register pressure. It's probably not worth it if the + // instruction is cheap. // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting // these tend to help performance in low register pressure situation. The // trade off is it may cause spill in high pressure situation. It will end up // adding a store in the loop preheader. But the reload is no more expensive. // The side benefit is these loads are frequently CSE'ed. - if (!TrackRegPressure || MI.getDesc().isAsCheapAsAMove()) { - if (!TII->isTriviallyReMaterializable(&MI, AA) && - !isLoadFromConstantMemory(&MI)) + if (MI.getDesc().isAsCheapAsAMove()) { + if (!TII->isTriviallyReMaterializable(&MI, AA)) return false; } else { + // Estimate register pressure to determine whether to LICM the instruction. // In low register pressure situation, we can be more aggressive about // hoisting. Also, favors hoisting long latency instructions even in // moderately high pressure situation. @@ -880,13 +868,9 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (MO.isDef()) { - if (InstrItins && !InstrItins->isEmpty()) { - int Cycle = ComputeOperandLatency(MI, i, Reg); - if (Cycle > 3) { - // FIXME: Target specific high latency limit? - ++NumHighLatency; - return true; - } + if (HasHighOperandLatency(MI, i, Reg)) { + ++NumHighLatency; + return true; } const TargetRegisterClass *RC = MRI->getRegClass(Reg); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index aca292a..0b5b243 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1925,3 +1925,23 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, UseTID, UseIdx, UseAlign); } + +bool ARMBaseInstrInfo:: +hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; + unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; + if (Subtarget.isCortexA8() && + (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) + // CortexA8 VFP instructions are not pipelined. + return true; + + // Hoist VFP / NEON instructions with 4 or higher latency. + int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + if (Latency <= 3) + return false; + return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || + UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 36be336..b3a8329 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -377,6 +377,11 @@ private: unsigned DefIdx, unsigned DefAlign, const TargetInstrDesc &UseTID, unsigned UseIdx, unsigned UseAlign) const; + + bool hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const; }; static inline diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 40ef3db..79d9872 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3152,6 +3152,41 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +bool X86InstrInfo:: +hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + switch (DefMI->getOpcode()) { + default: return false; + case X86::DIVSDrm: + case X86::DIVSDrm_Int: + case X86::DIVSDrr: + case X86::DIVSDrr_Int: + case X86::DIVSSrm: + case X86::DIVSSrm_Int: + case X86::DIVSSrr: + case X86::DIVSSrr_Int: + case X86::SQRTPDm: + case X86::SQRTPDm_Int: + case X86::SQRTPDr: + case X86::SQRTPDr_Int: + case X86::SQRTPSm: + case X86::SQRTPSm_Int: + case X86::SQRTPSr: + case X86::SQRTPSr_Int: + case X86::SQRTSDm: + case X86::SQRTSDm_Int: + case X86::SQRTSDr: + case X86::SQRTSDr_Int: + case X86::SQRTSSm: + case X86::SQRTSSm_Int: + case X86::SQRTSSr: + case X86::SQRTSSr_Int: + return true; + } +} + namespace { /// CGBR - Create Global Base Reg pass. This initializes the PIC /// global base register for x86-32. diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index e43cfac..5060ad8 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -864,6 +864,11 @@ public: unsigned OpNum, const SmallVectorImpl<MachineOperand> &MOs, unsigned Size, unsigned Alignment) const; + + bool hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const; private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, |