aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/MachineLICM.cpp88
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp20
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h5
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp35
-rw-r--r--lib/Target/X86/X86InstrInfo.h5
5 files changed, 101 insertions, 52 deletions
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 607e8f1..829fae6 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -43,11 +43,6 @@
using namespace llvm;
-static cl::opt<bool>
-TrackRegPressure("rp-aware-machine-licm",
- cl::desc("Register pressure aware machine LICM"),
- cl::init(false), cl::Hidden);
-
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
STATISTIC(NumLowRP,
@@ -124,6 +119,7 @@ namespace {
RegSeen.clear();
RegPressure.clear();
RegLimit.clear();
+ BackTrace.clear();
for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
CI->second.clear();
@@ -171,9 +167,10 @@ namespace {
///
bool IsLoopInvariantInst(MachineInstr &I);
- /// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
- /// and an use in the current loop.
- int ComputeOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
+ /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+ /// and an use in the current loop, return true if the target considered
+ /// it 'high'.
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
/// if hoisting an instruction of the given cost matrix can cause high
@@ -556,28 +553,24 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
if (!Preheader)
return;
- if (TrackRegPressure) {
- if (IsHeader) {
- // Compute registers which are liveout of preheader.
- RegSeen.clear();
- BackTrace.clear();
- InitRegPressure(Preheader);
- }
-
- // Remember livein register pressure.
- BackTrace.push_back(RegPressure);
+ if (IsHeader) {
+ // Compute registers which are liveout of preheader.
+ RegSeen.clear();
+ BackTrace.clear();
+ InitRegPressure(Preheader);
}
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+
for (MachineBasicBlock::iterator
MII = BB->begin(), E = BB->end(); MII != E; ) {
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
MachineInstr *MI = &*MII;
- if (TrackRegPressure)
- UpdateRegPressureBefore(MI);
+ UpdateRegPressureBefore(MI);
Hoist(MI, Preheader);
- if (TrackRegPressure)
- UpdateRegPressureAfter(MI);
+ UpdateRegPressureAfter(MI);
MII = NextMII;
}
@@ -591,8 +584,7 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
HoistRegion(Children[I]);
}
- if (TrackRegPressure)
- BackTrace.pop_back();
+ BackTrace.pop_back();
}
/// InitRegPressure - Find all virtual register references that are liveout of
@@ -788,15 +780,14 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
}
}
-/// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
-/// and an use in the current loop.
-int MachineLICM::ComputeOperandLatency(MachineInstr &MI,
- unsigned DefIdx, unsigned Reg) {
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+ unsigned DefIdx, unsigned Reg) {
if (MRI->use_nodbg_empty(Reg))
- // No use? Return arbitrary large number!
- return 300;
+ return false;
- int Latency = -1;
for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
E = MRI->use_nodbg_end(); I != E; ++I) {
MachineInstr *UseMI = &*I;
@@ -810,18 +801,15 @@ int MachineLICM::ComputeOperandLatency(MachineInstr &MI,
if (MOReg != Reg)
continue;
- int UseCycle = TII->getOperandLatency(InstrItins, &MI, DefIdx, UseMI, i);
- Latency = std::max(Latency, UseCycle);
+ if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+ return true;
}
- if (Latency != -1)
- break;
+ // Only look at the first in loop use.
+ break;
}
- if (Latency == -1)
- Latency = InstrItins->getOperandCycle(MI.getDesc().getSchedClass(), DefIdx);
-
- return Latency;
+ return false;
}
/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
@@ -855,19 +843,19 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
if (MI.isImplicitDef())
return true;
- // FIXME: For now, only hoist re-materilizable instructions. LICM will
- // increase register pressure. We want to make sure it doesn't increase
- // spilling.
+ // If the instruction is cheap, only hoist if it is re-materilizable. LICM
+ // will increase register pressure. It's probably not worth it if the
+ // instruction is cheap.
// Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
// these tend to help performance in low register pressure situation. The
// trade off is it may cause spill in high pressure situation. It will end up
// adding a store in the loop preheader. But the reload is no more expensive.
// The side benefit is these loads are frequently CSE'ed.
- if (!TrackRegPressure || MI.getDesc().isAsCheapAsAMove()) {
- if (!TII->isTriviallyReMaterializable(&MI, AA) &&
- !isLoadFromConstantMemory(&MI))
+ if (MI.getDesc().isAsCheapAsAMove()) {
+ if (!TII->isTriviallyReMaterializable(&MI, AA))
return false;
} else {
+ // Estimate register pressure to determine whether to LICM the instruction.
// In low register pressure situation, we can be more aggressive about
// hoisting. Also, favors hoisting long latency instructions even in
// moderately high pressure situation.
@@ -880,13 +868,9 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
if (MO.isDef()) {
- if (InstrItins && !InstrItins->isEmpty()) {
- int Cycle = ComputeOperandLatency(MI, i, Reg);
- if (Cycle > 3) {
- // FIXME: Target specific high latency limit?
- ++NumHighLatency;
- return true;
- }
+ if (HasHighOperandLatency(MI, i, Reg)) {
+ ++NumHighLatency;
+ return true;
}
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index aca292a..0b5b243 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1925,3 +1925,23 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
UseTID, UseIdx, UseAlign);
}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (Subtarget.isCortexA8() &&
+ (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+ // CortexA8 VFP instructions are not pipelined.
+ return true;
+
+ // Hoist VFP / NEON instructions with 4 or higher latency.
+ int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ if (Latency <= 3)
+ return false;
+ return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+ UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 36be336..b3a8329 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -377,6 +377,11 @@ private:
unsigned DefIdx, unsigned DefAlign,
const TargetInstrDesc &UseTID,
unsigned UseIdx, unsigned UseAlign) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
};
static inline
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 40ef3db..79d9872 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3152,6 +3152,41 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ switch (DefMI->getOpcode()) {
+ default: return false;
+ case X86::DIVSDrm:
+ case X86::DIVSDrm_Int:
+ case X86::DIVSDrr:
+ case X86::DIVSDrr_Int:
+ case X86::DIVSSrm:
+ case X86::DIVSSrm_Int:
+ case X86::DIVSSrr:
+ case X86::DIVSSrr_Int:
+ case X86::SQRTPDm:
+ case X86::SQRTPDm_Int:
+ case X86::SQRTPDr:
+ case X86::SQRTPDr_Int:
+ case X86::SQRTPSm:
+ case X86::SQRTPSm_Int:
+ case X86::SQRTPSr:
+ case X86::SQRTPSr_Int:
+ case X86::SQRTSDm:
+ case X86::SQRTSDm_Int:
+ case X86::SQRTSDr:
+ case X86::SQRTSDr_Int:
+ case X86::SQRTSSm:
+ case X86::SQRTSSm_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return true;
+ }
+}
+
namespace {
/// CGBR - Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index e43cfac..5060ad8 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -864,6 +864,11 @@ public:
unsigned OpNum,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Alignment) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,