aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h11
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp173
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp74
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h32
5 files changed, 176 insertions, 120 deletions
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index d11562c..3864ffd 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -248,6 +248,7 @@ namespace llvm {
unsigned NumSuccs; // # of SDep::Data sucss.
unsigned NumPredsLeft; // # of preds not scheduled.
unsigned NumSuccsLeft; // # of succs not scheduled.
+ unsigned short NumRegDefsLeft; // # of reg defs with no scheduled use.
unsigned short Latency; // Node latency.
bool isCall : 1; // Is a function call.
bool isTwoAddress : 1; // Is a two-address instruction.
@@ -276,7 +277,7 @@ namespace llvm {
SUnit(SDNode *node, unsigned nodenum)
: Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
- NumSuccsLeft(0), Latency(0),
+ NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isCall(false), isTwoAddress(false), isCommutable(false),
hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
@@ -290,7 +291,7 @@ namespace llvm {
SUnit(MachineInstr *instr, unsigned nodenum)
: Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
- NumSuccsLeft(0), Latency(0),
+ NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isCall(false), isTwoAddress(false), isCommutable(false),
hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
@@ -303,7 +304,7 @@ namespace llvm {
SUnit()
: Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
- NumSuccsLeft(0), Latency(0),
+ NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isCall(false), isTwoAddress(false), isCommutable(false),
hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
@@ -347,7 +348,7 @@ namespace llvm {
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
- void addPred(const SDep &D);
+ bool addPred(const SDep &D);
/// removePred - This removes the specified edge as a pred of the current
/// node if it exists. It also removes the current node as a successor of
@@ -442,6 +443,8 @@ namespace llvm {
bool hasReadyFilter() const { return HasReadyFilter; }
+ virtual bool tracksRegPressure() const { return false; }
+
virtual bool isReady(SUnit *) const {
assert(!HasReadyFilter && "The ready filter must override isReady()");
return true;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 83f2dd0..3388889 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -75,12 +75,12 @@ void ScheduleDAG::Run(MachineBasicBlock *bb,
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
-void SUnit::addPred(const SDep &D) {
+bool SUnit::addPred(const SDep &D) {
// If this node already has this depenence, don't add a redundant one.
for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
I != E; ++I)
if (*I == D)
- return;
+ return false;
// Now add a corresponding succ to N.
SDep P = D;
P.setSUnit(this);
@@ -106,6 +106,7 @@ void SUnit::addPred(const SDep &D) {
this->setDepthDirty();
N->setHeightDirty();
}
+ return true;
}
/// removePred - This removes the specified edge as a pred of the current
@@ -285,6 +286,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << " # preds left : " << NumPredsLeft << "\n";
dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
dbgs() << " Latency : " << Latency << "\n";
dbgs() << " Depth : " << Depth << "\n";
dbgs() << " Height : " << Height << "\n";
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index ad83580..0b548b2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -706,6 +706,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
} else {
LoadSU = CreateNewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
ComputeLatency(LoadSU);
}
@@ -722,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
}
if (TID.isCommutable())
NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
ComputeLatency(NewSU);
// Record all the edges to and from the old SU, by category.
@@ -772,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+ && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
}
for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
SDep D = ChainSuccs[i];
@@ -1436,6 +1444,8 @@ public:
SU->NodeQueueId = 0;
}
+ bool tracksRegPressure() const { return TracksRegPressure; }
+
void dumpRegPressure() const;
bool HighRegPressure(const SUnit *SU) const;
@@ -1571,7 +1581,8 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
// Add pseudo dependency edges for two-address nodes.
AddPseudoTwoAddrDeps();
// Reroute edges to nodes with multiple uses.
- PrescheduleNodesWithMultipleUses();
+ if (!TracksRegPressure)
+ PrescheduleNodesWithMultipleUses();
// Calculate node priorities.
CalculateSethiUllmanNumbers();
}
@@ -1642,64 +1653,20 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
if (I->isCtrl())
continue;
SUnit *PredSU = I->getSUnit();
- // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
- // counts data deps. To be more precise, we could maintain a
- // NumDataSuccsLeft count.
- if (PredSU->NumSuccsLeft != PredSU->Succs.size()) {
- DEBUG(dbgs() << " SU(" << PredSU->NodeNum << ") live across SU("
- << SU->NodeNum << ")\n");
- continue;
- }
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
continue;
}
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ EVT VT = RegDefPos.GetValue();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] >= RegLimit[RCId])
- return true; // Reg pressure already high.
unsigned Cost = TLI->getRepRegClassCostFor(VT);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
return true;
}
}
-
return false;
}
@@ -1725,80 +1692,64 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
if (!TracksRegPressure)
return;
- const SDNode *N = SU->getNode();
- if (!N->isMachineOpcode()) {
- if (N->getOpcode() != ISD::CopyToReg)
- return;
- } else {
- unsigned Opc = N->getMachineOpcode();
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::INSERT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::REG_SEQUENCE ||
- Opc == TargetOpcode::IMPLICIT_DEF)
- return;
- }
-
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl())
continue;
SUnit *PredSU = I->getSUnit();
- // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
- // counts data deps.
- if (PredSU->NumSuccsLeft != PredSU->Succs.size())
- continue;
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
continue;
}
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- if (!PN->hasAnyUseOfValue(i))
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
continue;
+ EVT VT = RegDefPos.GetValue();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ break;
}
}
- // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
- // may transfer data dependencies to CopyToReg.
- if (SU->NumSuccs && N->isMachineOpcode()) {
- unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = N->getValueType(i);
- if (!N->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
- // Register pressure tracking is imprecise. This can happen.
- RegPressure[RCId] = 0;
- else
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
}
}
-
dumpRegPressure();
}
@@ -2314,7 +2265,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
if (PredSU->NumSuccs == 1)
continue;
// Avoid prescheduling to copies from virtual registers, which don't behave
- // like other nodes from the perspective of scheduling // heuristics.
+ // like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU->getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
TargetRegisterInfo::isVirtualRegister
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index da3966c..477c1ff 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -332,6 +332,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
// Assign the Latency field of NodeSUnit using target-provided information.
ComputeLatency(NodeSUnit);
}
@@ -407,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
- SU->addPred(dep);
+ if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ --OpSU->NumRegDefsLeft;
+ }
}
}
}
@@ -426,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
AddSchedEdges();
}
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) {
+ // Propagate the incoming (full-register) type. I doubt it's needed.
+ ValueType = Node->getOperand(0).getValueType();
+ }
+ else {
+ ValueType = Node->getValueType(DefIdx);
+ }
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (Node == NULL) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
// Check to see if the scheduler cares about latencies.
if (ForceUnitLatencies()) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 00b3556..cc7310e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -20,7 +20,7 @@
namespace llvm {
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
- ///
+ ///
/// Edges between SUnits are initially based on edges in the SelectionDAG,
/// and additional edges can be added by the schedulers as heuristics.
/// SDNodes such as Constants, Registers, and a few others that are not
@@ -73,13 +73,17 @@ namespace llvm {
/// predecessors / successors info nor the temporary scheduling states.
///
SUnit *Clone(SUnit *N);
-
+
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
virtual void BuildSchedGraph(AliasAnalysis *AA);
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
/// ComputeLatency - Compute node latency.
///
virtual void ComputeLatency(SUnit *SU);
@@ -106,6 +110,30 @@ namespace llvm {
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx;
+ unsigned NodeNumDefs;
+ EVT ValueType;
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != NULL; }
+
+ EVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ void Advance();
+ private:
+ void InitNodeNumDefs();
+ };
+
private:
/// ClusterNeighboringLoads - Cluster loads from "near" addresses into
/// combined SUnits.