aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Trick <atrick@apple.com>2012-06-05 21:11:27 +0000
committerAndrew Trick <atrick@apple.com>2012-06-05 21:11:27 +0000
commitb7e0289fb320c8440ba5eed121a8b932dbd806a2 (patch)
treed2b65520c1191a79fa7dbccaf1947a82ede9d1ca
parent1d72dadddbd3ec9a393dbaadda4c459ab1c4aeb1 (diff)
downloadexternal_llvm-b7e0289fb320c8440ba5eed121a8b932dbd806a2.zip
external_llvm-b7e0289fb320c8440ba5eed121a8b932dbd806a2.tar.gz
external_llvm-b7e0289fb320c8440ba5eed121a8b932dbd806a2.tar.bz2
misched: API for minimum vs. expected latency.
Minimum latency determines per-cycle scheduling groups. Expected latency determines critical path and cost. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158021 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h15
-rw-r--r--include/llvm/CodeGen/ScheduleDAGInstrs.h10
-rw-r--r--include/llvm/MC/MCInstrItineraries.h8
-rw-r--r--include/llvm/Target/TargetInstrInfo.h40
-rw-r--r--lib/CodeGen/MachineScheduler.cpp112
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp79
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h6
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp19
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h5
-rw-r--r--lib/Target/TargetInstrInfo.cpp121
11 files changed, 277 insertions, 140 deletions
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index f4de693..3dd3c0c 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -272,6 +272,9 @@ namespace llvm {
unsigned Depth; // Node depth.
unsigned Height; // Node height.
public:
+ unsigned TopReadyCycle; // Cycle relative to start when node is ready.
+ unsigned BotReadyCycle; // Cycle relative to end when node is ready.
+
const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null.
const TargetRegisterClass *CopySrcRC;
@@ -287,7 +290,7 @@ namespace llvm {
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
- CopyDstRC(NULL), CopySrcRC(NULL) {}
+ TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
/// SUnit - Construct an SUnit for post-regalloc scheduling to represent
/// a MachineInstr.
@@ -301,7 +304,7 @@ namespace llvm {
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
- CopyDstRC(NULL), CopySrcRC(NULL) {}
+ TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
/// SUnit - Construct a placeholder SUnit.
SUnit()
@@ -314,7 +317,7 @@ namespace llvm {
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
- CopyDstRC(NULL), CopySrcRC(NULL) {}
+ TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
/// setNode - Assign the representative SDNode for this SUnit.
/// This may be used during pre-regalloc scheduling.
@@ -552,12 +555,6 @@ namespace llvm {
///
virtual void computeLatency(SUnit *SU) = 0;
- /// ComputeOperandLatency - Override dependence edge latency using
- /// operand use/def information
- ///
- virtual void computeOperandLatency(SUnit *, SUnit *,
- SDep&) const { }
-
/// ForceUnitLatencies - Return true if all scheduling edges should be given
/// a latency value of one. The default is to return false; schedulers may
/// override this as needed.
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 968cc56..874f9f1 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -291,11 +291,15 @@ namespace llvm {
///
virtual void computeLatency(SUnit *SU);
- /// computeOperandLatency - Override dependence edge latency using
+ /// computeOperandLatency - Return dependence edge latency using
/// operand use/def information
///
- virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const;
+ /// FindMin may be set to get the minimum vs. expected latency. Minimum
+ /// latency is used for scheduling groups, while expected latency is for
+ /// instruction cost and critical path.
+ virtual unsigned computeOperandLatency(SUnit *Def, SUnit *Use,
+ const SDep& dep,
+ bool FindMin = false) const;
/// schedule - Order nodes according to selected style, filling
/// in the Sequence member.
diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h
index 1994895..62e1914 100644
--- a/include/llvm/MC/MCInstrItineraries.h
+++ b/include/llvm/MC/MCInstrItineraries.h
@@ -214,6 +214,12 @@ public:
/// class. The latency is the maximum completion time for any stage
/// in the itinerary.
///
+ /// InstrStages override the itinerary's MinLatency property. In fact, if the
+ /// stage latencies, which may be zero, are less than MinLatency,
+ /// getStageLatency returns a value less than MinLatency.
+ ///
+ /// If no stages exist, MinLatency is used. If MinLatency is invalid (<0),
+ /// then it defaults to one cycle.
unsigned getStageLatency(unsigned ItinClassIndx) const {
// If the target doesn't provide itinerary information, use a simple
// non-zero default value for all instructions. Some target's provide a
@@ -222,7 +228,7 @@ public:
// stage). This is different from beginStage == endStage != 0, which could
// be used for zero-latency pseudo ops.
if (isEmpty() || Itineraries[ItinClassIndx].FirstStage == 0)
- return 1;
+ return (Props.MinLatency < 0) ? 1 : Props.MinLatency;
// Calculate the maximum completion time for any stage.
unsigned Latency = 0, StartCycle = 0;
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 896c152..693166f 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -668,18 +668,36 @@ public:
return Opcode <= TargetOpcode::COPY;
}
+ virtual int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const = 0;
+
/// getOperandLatency - Compute and return the use operand latency of a given
/// pair of def and use.
/// In most cases, the static scheduling itinerary was enough to determine the
/// operand latency. But it may not be possible for instructions with variable
/// number of defs / uses.
+ ///
+ /// This is a raw interface to the itinerary that may be directly overriden by
+ /// a target. Use computeOperandLatency to get the best estimate of latency.
virtual int getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const;
-
- virtual int getOperandLatency(const InstrItineraryData *ItinData,
- SDNode *DefNode, unsigned DefIdx,
- SDNode *UseNode, unsigned UseIdx) const = 0;
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const;
+
+ /// computeOperandLatency - Compute and return the latency of the given data
+ /// dependent def and use. DefMI must be a valid def. UseMI may be NULL for
+ /// an unknown use. If the subtarget allows, this may or may not need to call
+ /// getOperandLatency().
+ ///
+ /// FindMin may be set to get the minimum vs. expected latency. Minimum
+ /// latency is used for scheduling groups, while expected latency is for
+ /// instruction cost and critical path.
+ unsigned computeOperandLatency(const InstrItineraryData *ItinData,
+ const TargetRegisterInfo *TRI,
+ const MachineInstr *DefMI,
+ const MachineInstr *UseMI,
+ unsigned Reg, bool FindMin) const;
/// getOutputLatency - Compute and return the output dependency latency of a
/// a given pair of defs which both target the same register. This is usually
@@ -693,13 +711,17 @@ public:
/// getInstrLatency - Compute the instruction latency of a given instruction.
/// If the instruction has higher cost when predicated, it's returned via
/// PredCost.
- virtual int getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost = 0) const;
+ virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const;
virtual int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const = 0;
+ /// Return the default expected latency for a def based on it's opcode.
+ unsigned defaultDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI) const;
+
/// isHighLatencyDef - Return true if this opcode has high latency to its
/// result.
virtual bool isHighLatencyDef(int opc) const { return false; }
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index c318d84..aa59177 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -21,8 +21,9 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -394,6 +395,12 @@ public:
return RegionCriticalPSets;
}
+ /// getIssueWidth - Return the max instructions per scheduling group.
+ ///
+ unsigned getIssueWidth() const {
+ return InstrItins ? InstrItins->Props.IssueWidth : 1;
+ }
+
protected:
void initRegPressure();
void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
@@ -787,13 +794,16 @@ class ConvergingScheduler : public MachineSchedStrategy {
/// MinReadyCycle - Cycle of the soonest available instruction.
unsigned MinReadyCycle;
+ // Remember the greatest min operand latency.
+ unsigned MaxMinLatency;
+
/// Pending queues extend the ready queues with the same ID and the
/// PendingFlag set.
SchedBoundary(unsigned ID, const Twine &Name):
Available(ID, Name+".A"),
Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
- MinReadyCycle(UINT_MAX) {}
+ MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
~SchedBoundary() { delete HazardRec; }
@@ -805,6 +815,8 @@ class ConvergingScheduler : public MachineSchedStrategy {
void bumpCycle();
+ void bumpNode(SUnit *SU, unsigned IssueWidth);
+
void releasePending();
void removeReady(SUnit *SU);
@@ -868,25 +880,53 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
}
void ConvergingScheduler::releaseTopNode(SUnit *SU) {
- Top.releaseNode(SU, SU->getDepth());
+ if (SU->isScheduled)
+ return;
+
+ for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+ unsigned Latency =
+ DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true);
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + Latency)
+ SU->TopReadyCycle = PredReadyCycle + Latency;
+ }
+ Top.releaseNode(SU, SU->TopReadyCycle);
}
void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
- Bot.releaseNode(SU, SU->getHeight());
+ if (SU->isScheduled)
+ return;
+
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned Latency =
+ DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true);
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + Latency)
+ SU->BotReadyCycle = SuccReadyCycle + Latency;
+ }
+ Bot.releaseNode(SU, SU->BotReadyCycle);
}
void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
unsigned ReadyCycle) {
- if (SU->isScheduled)
- return;
-
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
// Check for interlocks first. For the purpose of other heuristics, an
// instruction that cannot issue appears as if it's not in the ReadyQueue.
- if (HazardRec->isEnabled()
- && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard)
+ if (ReadyCycle > CurrCycle
+ || (HazardRec->isEnabled() && (HazardRec->getHazardType(SU)
+ != ScheduleHazardRecognizer::NoHazard)))
Pending.push(SU);
else
Available.push(SU);
@@ -900,10 +940,11 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
if (!HazardRec->isEnabled()) {
- // Bypass lots of virtual calls in case of long latency.
+ // Bypass HazardRec virtual calls.
CurrCycle = NextCycle;
}
else {
+ // Bypass getHazardType calls in case of long latency.
for (; CurrCycle != NextCycle; ++CurrCycle) {
if (isTop())
HazardRec->AdvanceCycle();
@@ -917,6 +958,26 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
<< CurrCycle << '\n');
}
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU,
+ unsigned IssueWidth) {
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+ // Check the instruction group size limit.
+ ++IssueCount;
+ if (IssueCount == IssueWidth) {
+ DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ }
+}
+
/// Release pending ready nodes in to the available queue. This makes them
/// visible to heuristics.
void ConvergingScheduler::SchedBoundary::releasePending() {
@@ -928,7 +989,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
// so, add them to the available queue.
for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
SUnit *SU = *(Pending.begin()+i);
- unsigned ReadyCycle = isTop() ? SU->getHeight() : SU->getDepth();
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
@@ -965,7 +1026,8 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
releasePending();
for (unsigned i = 0; Available.empty(); ++i) {
- assert(i <= HazardRec->getMaxLookAhead() && "permanent hazard"); (void)i;
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard"); (void)i;
bumpCycle();
releasePending();
}
@@ -1205,27 +1267,15 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
/// Update the scheduler's state after scheduling a node. This is the same node
/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
-/// it's state based on the current cycle before MachineSchedStrategy.
+/// it's state based on the current cycle before MachineSchedStrategy does.
void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
- // Update the reservation table.
- if (IsTopNode && Top.HazardRec->isEnabled()) {
- Top.HazardRec->EmitInstruction(SU);
- if (Top.HazardRec->atIssueLimit()) {
- DEBUG(dbgs() << "*** Max instrs at cycle " << Top.CurrCycle << '\n');
- Top.bumpCycle();
- }
+ if (IsTopNode) {
+ SU->TopReadyCycle = Top.CurrCycle;
+ Top.bumpNode(SU, DAG->getIssueWidth());
}
- else if (Bot.HazardRec->isEnabled()) {
- if (SU->isCall) {
- // Calls are scheduled with their preceding instructions. For bottom-up
- // scheduling, clear the pipeline state before emitting.
- Bot.HazardRec->Reset();
- }
- Bot.HazardRec->EmitInstruction(SU);
- if (Bot.HazardRec->atIssueLimit()) {
- DEBUG(dbgs() << "*** Max instrs at cycle " << Bot.CurrCycle << '\n');
- Bot.bumpCycle();
- }
+ else {
+ SU->BotReadyCycle = Bot.CurrCycle;
+ Bot.bumpNode(SU, DAG->getIssueWidth());
}
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 773a29d..875e012 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -271,10 +271,12 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
// Adjust the dependence latency using operand def/use
// information (if any), and then allow the target to
// perform its own adjustments.
- const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
+ SDep dep(SU, SDep::Data, LDataLatency, *Alias);
if (!UnitLatencies) {
- computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
- ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+ unsigned Latency = computeOperandLatency(SU, UseSU, dep);
+ dep.setLatency(Latency);
+
+ ST.adjustSchedDependency(SU, UseSU, dep);
}
UseSU->addPred(dep);
}
@@ -461,11 +463,13 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
// Create a data dependence.
//
// TODO: Handle "special" address latencies cleanly.
- const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+ SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
if (!UnitLatencies) {
// Adjust the dependence latency using operand def/use information, then
// allow the target to perform its own adjustments.
- computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ dep.setLatency(Latency);
+
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
}
@@ -970,8 +974,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
}
void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
- // Compute the latency for the node.
- if (!InstrItins || InstrItins->isEmpty()) {
+ // Compute the latency for the node. We only provide a default for missing
+ // itineraries. Empty itineraries still have latency properties.
+ if (!InstrItins) {
SU->Latency = 1;
// Simplistic target-independent heuristic: assume that loads take
@@ -983,63 +988,15 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
}
}
-void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const {
- if (!InstrItins || InstrItins->isEmpty())
- return;
-
+unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
+ const SDep& dep,
+ bool FindMin) const {
// For a data dependency with a known register...
if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
- return;
+ return 1;
- const unsigned Reg = dep.getReg();
-
- // ... find the definition of the register in the defining
- // instruction
- MachineInstr *DefMI = Def->getInstr();
- int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
- if (DefIdx != -1) {
- const MachineOperand &MO = DefMI->getOperand(DefIdx);
- if (MO.isReg() && MO.isImplicit() &&
- DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
- // This is an implicit def, getOperandLatency() won't return the correct
- // latency. e.g.
- // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
- // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
- // What we want is to compute latency between def of %D6/%D7 and use of
- // %Q3 instead.
- unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
- if (DefMI->getOperand(Op2).isReg())
- DefIdx = Op2;
- }
- MachineInstr *UseMI = Use->getInstr();
- // For all uses of the register, calculate the maxmimum latency
- int Latency = -1;
- if (UseMI) {
- for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = UseMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned MOReg = MO.getReg();
- if (MOReg != Reg)
- continue;
-
- int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
- UseMI, i);
- Latency = std::max(Latency, UseCycle);
- }
- } else {
- // UseMI is null, then it must be a scheduling barrier.
- if (!InstrItins || InstrItins->isEmpty())
- return;
- unsigned DefClass = DefMI->getDesc().getSchedClass();
- Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
- }
-
- // If we found a latency, then replace the existing dependence latency.
- if (Latency >= 0)
- dep.setLatency(Latency);
- }
+ return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
+ Use->getInstr(), dep.getReg(), FindMin);
}
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 75940ec..5384576 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -98,12 +98,6 @@ namespace llvm {
///
virtual void computeLatency(SUnit *SU);
- /// computeOperandLatency - Override dependence edge latency using
- /// operand use/def information
- ///
- virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const { }
-
virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const;
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 5218aa1..ec2b577 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1046,7 +1046,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
return true; // Below MI
unsigned DefDist = DDI->second;
assert(Dist > DefDist && "Visited def already?");
- if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
+ if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
return true;
}
return false;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 398e0c9..bcc4f9c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2567,12 +2567,13 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
int
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
- DefMI->isRegSequence() || DefMI->isImplicitDef())
+ DefMI->isRegSequence() || DefMI->isImplicitDef()) {
return 1;
-
+ }
if (!ItinData || ItinData->isEmpty())
return DefMI->mayLoad() ? 3 : 1;
@@ -2983,14 +2984,16 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
DepMI->getNumOperands());
}
-int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost) const {
+unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
if (MI->isCopyLike() || MI->isInsertSubreg() ||
MI->isRegSequence() || MI->isImplicitDef())
return 1;
- if (!ItinData || ItinData->isEmpty())
+ // Be sure to call getStageLatency for an empty itinerary in case it has a
+ // valid MinLatency property.
+ if (!ItinData)
return 1;
if (MI->isBundle()) {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 2fe8507..8217f23 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -249,8 +249,9 @@ private:
const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const;
- int getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI, unsigned *PredCost = 0) const;
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const;
int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const;
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index 6088ba5..ae38732 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -61,22 +61,125 @@ TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
return 1;
}
+/// Return the default expected latency for a def based on it's opcode.
+unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI) const {
+ if (DefMI->mayLoad())
+ return ItinData->Props.LoadLatency;
+ if (isHighLatencyDef(DefMI->getOpcode()))
+ return ItinData->Props.HighLatency;
+ return 1;
+}
+
+/// Both DefMI and UseMI must be valid. By default, call directly to the
+/// itinerary. This may be overriden by the target.
int
TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
- if (!ItinData || ItinData->isEmpty())
- return -1;
-
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
unsigned DefClass = DefMI->getDesc().getSchedClass();
unsigned UseClass = UseMI->getDesc().getSchedClass();
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
-int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost) const {
- if (!ItinData || ItinData->isEmpty())
+/// computeOperandLatency - Compute and return the latency of the given data
+/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an
+/// unknown use. Depending on the subtarget's itinerary properties, this may or
+/// may not need to call getOperandLatency().
+///
+/// FindMin may be set to get the minimum vs. expected latency. Minimum
+/// latency is used for scheduling groups, while expected latency is for
+/// instruction cost and critical path.
+///
+/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency.
+/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use.
+unsigned TargetInstrInfo::
+computeOperandLatency(const InstrItineraryData *ItinData,
+ const TargetRegisterInfo *TRI,
+ const MachineInstr *DefMI, const MachineInstr *UseMI,
+ unsigned Reg, bool FindMin) const {
+
+ // Default to one cycle for missing itinerary. Empty itineraries still have
+ // a properties. We have one hard-coded exception for loads, to preserve
+ // existing behavior.
+ if (!ItinData)
+ return DefMI->mayLoad() ? 2 : 1;
+
+ // Return a latency based on the itinerary properties and defining instruction
+ // if possible. Some common subtargets don't require per-operand latency,
+ // especially for minimum latencies.
+ if (FindMin) {
+ // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
+ // it exists before defaulting to MinLatency.
+ if (ItinData->Props.MinLatency >= 0)
+ return getInstrLatency(ItinData, DefMI);
+
+ // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
+ // For empty itineraries, short-cirtuit the check and default to one cycle.
+ if (ItinData->isEmpty())
+ return 1;
+ }
+ else if(ItinData->isEmpty())
+ return defaultDefLatency(ItinData, DefMI);
+
+ // ...operand lookup required
+
+ // Find the definition of the register in the defining instruction.
+ int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+ if (DefIdx != -1) {
+ const MachineOperand &MO = DefMI->getOperand(DefIdx);
+ if (MO.isReg() && MO.isImplicit() &&
+ DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+ // This is an implicit def, getOperandLatency() won't return the correct
+ // latency. e.g.
+ // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+ // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+ // What we want is to compute latency between def of %D6/%D7 and use of
+ // %Q3 instead.
+ unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (DefMI->getOperand(Op2).isReg())
+ DefIdx = Op2;
+ }
+ // For all uses of the register, calculate the maxmimum latency
+ int OperLatency = -1;
+
+ // UseMI is null, then it must be a scheduling barrier.
+ if (!UseMI) {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
+ }
+ else {
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i);
+ OperLatency = std::max(OperLatency, UseCycle);
+ }
+ }
+ // If we found an operand latency, we're done.
+ if (OperLatency >= 0)
+ return OperLatency;
+ }
+ // No operand latency was found.
+ unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
+ // Expected latency is the max of the stage latency and itinerary props.
+ if (!FindMin)
+ InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+ return InstrLatency;
+}
+
+unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ // Default to one cycle for no itinerary. However, an "empty" itinerary may
+ // still have a MinLatency property, which getStageLatency checks.
+ if (!ItinData)
return 1;
return ItinData->getStageLatency(MI->getDesc().getSchedClass());