diff options
author | Andrew Trick <atrick@apple.com> | 2011-02-04 03:18:17 +0000 |
---|---|---|
committer | Andrew Trick <atrick@apple.com> | 2011-02-04 03:18:17 +0000 |
commit | 92e946630d5f9bb092853b93501387dd216899b9 (patch) | |
tree | d8efcc7cbac11a9c8560e53ccb8a036a1063ecbf | |
parent | f827cd717b63fd3f1d9ad16e69087c65e9af8c82 (diff) | |
download | external_llvm-92e946630d5f9bb092853b93501387dd216899b9.zip external_llvm-92e946630d5f9bb092853b93501387dd216899b9.tar.gz external_llvm-92e946630d5f9bb092853b93501387dd216899b9.tar.bz2 |
Introducing a new method of tracking register pressure. We can't
precisely track pressure on a selection DAG, but we can at least keep
it balanced. This design accounts for various interesting aspects of
selection DAGS: register and subregister copies, glued nodes, dead
nodes, unused registers, etc.
Added SUnit::NumRegDefsLeft and ScheduleDAGSDNodes::RegDefIter.
Note: I disabled PrescheduleNodesWithMultipleUses when register
pressure is enabled, based on no evidence other than I don't think it
makes sense to have both enabled.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124853 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/CodeGen/ScheduleDAG.h | 11 | ||||
-rw-r--r-- | lib/CodeGen/ScheduleDAG.cpp | 6 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 173 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 74 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 32 |
5 files changed, 176 insertions, 120 deletions
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index d11562c..3864ffd 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -248,6 +248,7 @@ namespace llvm { unsigned NumSuccs; // # of SDep::Data sucss. unsigned NumPredsLeft; // # of preds not scheduled. unsigned NumSuccsLeft; // # of succs not scheduled. + unsigned short NumRegDefsLeft; // # of reg defs with no scheduled use. unsigned short Latency; // Node latency. bool isCall : 1; // Is a function call. bool isTwoAddress : 1; // Is a two-address instruction. @@ -276,7 +277,7 @@ namespace llvm { SUnit(SDNode *node, unsigned nodenum) : Node(node), Instr(0), OrigNode(0), NodeNum(nodenum), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), - NumSuccsLeft(0), Latency(0), + NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), isCall(false), isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), @@ -290,7 +291,7 @@ namespace llvm { SUnit(MachineInstr *instr, unsigned nodenum) : Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), - NumSuccsLeft(0), Latency(0), + NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), isCall(false), isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), @@ -303,7 +304,7 @@ namespace llvm { SUnit() : Node(0), Instr(0), OrigNode(0), NodeNum(~0u), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), - NumSuccsLeft(0), Latency(0), + NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), isCall(false), isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), @@ -347,7 +348,7 @@ namespace llvm { /// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. - void addPred(const SDep &D); + bool addPred(const SDep &D); /// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of @@ -442,6 +443,8 @@ namespace llvm { bool hasReadyFilter() const { return HasReadyFilter; } + virtual bool tracksRegPressure() const { return false; } + virtual bool isReady(SUnit *) const { assert(!HasReadyFilter && "The ready filter must override isReady()"); return true; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 83f2dd0..3388889 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -75,12 +75,12 @@ void ScheduleDAG::Run(MachineBasicBlock *bb, /// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. -void SUnit::addPred(const SDep &D) { +bool SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) - return; + return false; // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); @@ -106,6 +106,7 @@ void SUnit::addPred(const SDep &D) { this->setDepthDirty(); N->setHeightDirty(); } + return true; } /// removePred - This removes the specified edge as a pred of the current @@ -285,6 +286,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { dbgs() << " # preds left : " << NumPredsLeft << "\n"; dbgs() << " # succs left : " << NumSuccsLeft << "\n"; + dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; dbgs() << " Latency : " << Latency << "\n"; dbgs() << " Depth : " << Depth << "\n"; dbgs() << " Height : " << Height << "\n"; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index ad83580..0b548b2 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -706,6 +706,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } else { LoadSU = CreateNewSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); + + InitNumRegDefsLeft(LoadSU); ComputeLatency(LoadSU); } @@ -722,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } if (TID.isCommutable()) NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); ComputeLatency(NewSU); // Record all the edges to and from the old SU, by category. @@ -772,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { RemovePred(SuccDep, D); D.setSUnit(NewSU); AddPred(SuccDep, D); + // Balance register pressure. + if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled + && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) + --NewSU->NumRegDefsLeft; } for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { SDep D = ChainSuccs[i]; @@ -1436,6 +1444,8 @@ public: SU->NodeQueueId = 0; } + bool tracksRegPressure() const { return TracksRegPressure; } + void dumpRegPressure() const; bool HighRegPressure(const SUnit *SU) const; @@ -1571,7 +1581,8 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { // Add pseudo dependency edges for two-address nodes. AddPseudoTwoAddrDeps(); // Reroute edges to nodes with multiple uses. - PrescheduleNodesWithMultipleUses(); + if (!TracksRegPressure) + PrescheduleNodesWithMultipleUses(); // Calculate node priorities. CalculateSethiUllmanNumbers(); } @@ -1642,64 +1653,20 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { if (I->isCtrl()) continue; SUnit *PredSU = I->getSUnit(); - // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only - // counts data deps. To be more precise, we could maintain a - // NumDataSuccsLeft count. - if (PredSU->NumSuccsLeft != PredSU->Succs.size()) { - DEBUG(dbgs() << " SU(" << PredSU->NodeNum << ") live across SU(" - << SU->NodeNum << ")\n"); - continue; - } - const SDNode *PN = PredSU->getNode(); - if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyFromReg) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; - } - continue; - } - unsigned POpc = PN->getMachineOpcode(); - if (POpc == TargetOpcode::IMPLICIT_DEF) - continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); - // Check if this increases register pressure of the specific register - // class to the point where it would cause spills. - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); - // Check if this increases register pressure of the specific register - // class to the point where it would cause spills. - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { continue; } - unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = PN->getValueType(i); + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + EVT VT = RegDefPos.GetValue(); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] >= RegLimit[RCId]) - return true; // Reg pressure already high. unsigned Cost = TLI->getRepRegClassCostFor(VT); - if (!PN->hasAnyUseOfValue(i)) - continue; - // Check if this increases register pressure of the specific register - // class to the point where it would cause spills. if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) return true; } } - return false; } @@ -1725,80 +1692,64 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { if (!TracksRegPressure) return; - const SDNode *N = SU->getNode(); - if (!N->isMachineOpcode()) { - if (N->getOpcode() != ISD::CopyToReg) - return; - } else { - unsigned Opc = N->getMachineOpcode(); - if (Opc == TargetOpcode::EXTRACT_SUBREG || - Opc == TargetOpcode::INSERT_SUBREG || - Opc == TargetOpcode::SUBREG_TO_REG || - Opc == TargetOpcode::REG_SEQUENCE || - Opc == TargetOpcode::IMPLICIT_DEF) - return; - } - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; SUnit *PredSU = I->getSUnit(); - // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only - // counts data deps. - if (PredSU->NumSuccsLeft != PredSU->Succs.size()) - continue; - const SDNode *PN = PredSU->getNode(); - if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyFromReg) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - } - continue; - } - unsigned POpc = PN->getMachineOpcode(); - if (POpc == TargetOpcode::IMPLICIT_DEF) - continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { continue; } - unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = PN->getValueType(i); - if (!PN->hasAnyUseOfValue(i)) + // FIXME: The ScheduleDAG currently loses information about which of a + // node's values is consumed by each dependence. Consequently, if the node + // defines multiple register classes, we don't know which to pressurize + // here. Instead the following loop consumes the register defs in an + // arbitrary order. At least it handles the common case of clustered loads + // to the same class. For precise liveness, each SDep needs to indicate the + // result number. But that tightly couples the ScheduleDAG with the + // SelectionDAG making updates tricky. A simpler hack would be to attach a + // value type or register class to SDep. + // + // The most important aspect of register tracking is balancing the increase + // here with the reduction further below. Note that this SU may use multiple + // defs in PredSU. The can't be determined here, but we've already + // compensated by reducing NumRegDefsLeft in PredSU during + // ScheduleDAGSDNodes::AddSchedEdges. + --PredSU->NumRegDefsLeft; + unsigned SkipRegDefs = PredSU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs) continue; + EVT VT = RegDefPos.GetValue(); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + break; } } - // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() - // may transfer data dependencies to CopyToReg. - if (SU->NumSuccs && N->isMachineOpcode()) { - unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = N->getValueType(i); - if (!N->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) - // Register pressure tracking is imprecise. This can happen. - RegPressure[RCId] = 0; - else - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + // We should have this assert, but there may be dead SDNodes that never + // materialize as SUnits, so they don't appear to generate liveness. + //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); + int SkipRegDefs = (int)SU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs > 0) + continue; + EVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { + // Register pressure tracking is imprecise. This can happen. But we try + // hard not to let it happen because it likely results in poor scheduling. + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); + RegPressure[RCId] = 0; + } + else { + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); } } - dumpRegPressure(); } @@ -2314,7 +2265,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { if (PredSU->NumSuccs == 1) continue; // Avoid prescheduling to copies from virtual registers, which don't behave - // like other nodes from the perspective of scheduling // heuristics. + // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU->getNode()) if (N->getOpcode() == ISD::CopyFromReg && TargetRegisterInfo::isVirtualRegister diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index da3966c..477c1ff 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -332,6 +332,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); + // Compute NumRegDefsLeft. This must be done before AddSchedEdges. + InitNumRegDefsLeft(NodeSUnit); + // Assign the Latency field of NodeSUnit using target-provided information. ComputeLatency(NodeSUnit); } @@ -407,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() { ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } - SU->addPred(dep); + if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) { + // Multiple register uses are combined in the same SUnit. For example, + // we could have a set of glued nodes with all their defs consumed by + // another set of glued nodes. Register pressure tracking sees this as + // a single use, so to keep pressure balanced we reduce the defs. + --OpSU->NumRegDefsLeft; + } } } } @@ -426,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { AddSchedEdges(); } +// Initialize NumNodeDefs for the current Node's opcode. +void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { + if (!Node->isMachineOpcode()) { + if (Node->getOpcode() == ISD::CopyFromReg) + NodeNumDefs = 1; + else + NodeNumDefs = 0; + return; + } + unsigned POpc = Node->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) { + // No register need be allocated for this. + NodeNumDefs = 0; + return; + } + unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); + // Some instructions define regs that are not represented in the selection DAG + // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. + NodeNumDefs = std::min(Node->getNumValues(), NRegDefs); + DefIdx = 0; +} + +// Construct a RegDefIter for this SUnit and find the first valid value. +ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, + const ScheduleDAGSDNodes *SD) + : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) { + InitNodeNumDefs(); + Advance(); +} + +// Advance to the next valid value defined by the SUnit. +void ScheduleDAGSDNodes::RegDefIter::Advance() { + for (;Node;) { // Visit all glued nodes. + for (;DefIdx < NodeNumDefs; ++DefIdx) { + if (!Node->hasAnyUseOfValue(DefIdx)) + continue; + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) { + // Propagate the incoming (full-register) type. I doubt it's needed. + ValueType = Node->getOperand(0).getValueType(); + } + else { + ValueType = Node->getValueType(DefIdx); + } + ++DefIdx; + return; // Found a normal regdef. + } + Node = Node->getGluedNode(); + if (Node == NULL) { + return; // No values left to visit. + } + InitNodeNumDefs(); + } +} + +void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { + assert(SU->NumRegDefsLeft == 0 && "expect a new node"); + for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { + assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected"); + ++SU->NumRegDefsLeft; + } +} + void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 00b3556..cc7310e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -20,7 +20,7 @@ namespace llvm { /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. - /// + /// /// Edges between SUnits are initially based on edges in the SelectionDAG, /// and additional edges can be added by the schedulers as heuristics. /// SDNodes such as Constants, Registers, and a few others that are not @@ -73,13 +73,17 @@ namespace llvm { /// predecessors / successors info nor the temporary scheduling states. /// SUnit *Clone(SUnit *N); - + /// BuildSchedGraph - Build the SUnit graph from the selection dag that we /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. virtual void BuildSchedGraph(AliasAnalysis *AA); + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. + /// + void InitNumRegDefsLeft(SUnit *SU); + /// ComputeLatency - Compute node latency. /// virtual void ComputeLatency(SUnit *SU); @@ -106,6 +110,30 @@ namespace llvm { virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; + /// RegDefIter - In place iteration over the values defined by an + /// SUnit. This does not need copies of the iterator or any other STLisms. + /// The iterator creates itself, rather than being provided by the SchedDAG. + class RegDefIter { + const ScheduleDAGSDNodes *SchedDAG; + const SDNode *Node; + unsigned DefIdx; + unsigned NodeNumDefs; + EVT ValueType; + public: + RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); + + bool IsValid() const { return Node != NULL; } + + EVT GetValue() const { + assert(IsValid() && "bad iterator"); + return ValueType; + } + + void Advance(); + private: + void InitNodeNumDefs(); + }; + private: /// ClusterNeighboringLoads - Cluster loads from "near" addresses into /// combined SUnits. |